fs-lawrisk/lawrisk_v2_service.py

140 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import json
from typing import Any, Dict, List
from licensing_repo import (
list_region_theme_options,
load_theme_payload,
)
from lawrisk_service import ChatClient
def _compose_prompt(payload: Dict[str, Any]) -> str:
"""Build a natural-language prompt snippet from structured payload."""
region = payload.get("region", {})
theme = payload.get("theme", {})
permits = payload.get("permits", [])
lines: List[str] = []
lines.append(f"地区:{region.get('name', '')}")
lines.append(f"主题事项:{theme.get('name', '')}")
for permit in permits:
pname = permit.get("name", "")
lines.append(f"许可事项:{pname}")
permit_scopes = permit.get("business_scopes", [])
if permit_scopes:
scope_text = "".join(
scope.get("description", "") for scope in permit_scopes if scope.get("description")
)
if scope_text:
lines.append(f" 经营范围:{scope_text}")
risks = permit.get("risks", [])
for idx, risk in enumerate(risks, start=1):
detail_parts = []
if risk.get("risk_content"):
detail_parts.append(f"风险提示:{risk['risk_content']}")
if risk.get("legal_basis"):
detail_parts.append(f"法律依据:{risk['legal_basis']}")
if risk.get("document_no"):
detail_parts.append(f"文号:{risk['document_no']}")
if risk.get("summary"):
detail_parts.append(f"摘要:{risk['summary']}")
if detail_parts:
lines.append(f" 风险{idx}" + "".join(detail_parts))
return "\n".join(lines)
def _select_theme_options(query: str, catalog: List[Dict[str, str]]) -> List[str]:
"""Use LLM to choose relevant region-theme option ids."""
if not catalog:
return []
lines = [f"{item['option_id']}\t{item['display_name']}" for item in catalog]
options_block = "\n".join(lines)
system_msg = (
"你是政务事项检索助手。根据用户提供的问题,"
"从给定的地区-主题列表中选择最相关的主题事项,返回其 option_id。"
"输出 JSON 数组,例如: [\"region_uuid:theme_uuid\"]."
)
user_msg = (
f"用户问题: {query}\n\n"
"候选主题列表 (option_id<tab>地区·主题):\n"
f"{options_block}\n\n"
"请仅输出 JSON 数组,内容为选择的 option_id。如果没有匹配请输出 []."
)
chat = ChatClient()
content = chat.chat(
[
{"role": "system", "content": system_msg},
{"role": "user", "content": user_msg},
]
)
raw = content.strip()
start = raw.find("[")
end = raw.rfind("]")
if start != -1 and end != -1 and end > start:
snippet = raw[start : end + 1]
else:
snippet = raw
selected: List[str] = []
try:
data = json.loads(snippet)
if isinstance(data, list):
for item in data:
if isinstance(item, str):
selected.append(item)
elif isinstance(item, dict) and isinstance(item.get("id"), str):
selected.append(item["id"])
except Exception:
selected = []
known_ids = {item["option_id"] for item in catalog}
uniq: List[str] = []
for option_id in selected:
if option_id in known_ids and option_id not in uniq:
uniq.append(option_id)
return uniq
def search_v2(query: str, return_debug: bool = False) -> Dict[str, Any]:
catalog = list_region_theme_options()
selected_ids = _select_theme_options(query, catalog)
catalog_map = {item["option_id"]: item for item in catalog}
results: List[Dict[str, Any]] = []
debug_info: Dict[str, Any] = {}
for option_id in selected_ids:
item = catalog_map[option_id]
if ":" not in option_id:
continue
region_id, theme_id = option_id.split(":", 1)
payload = load_theme_payload(region_id, theme_id)
prompt_text = _compose_prompt(payload)
results.append(
{
"id": option_id,
"display_name": item["display_name"],
"region": payload["region"],
"theme": payload["theme"],
"business_scopes": payload["business_scopes"],
"permits": payload["permits"],
"prompt_snippet": prompt_text,
}
)
if return_debug:
debug_info = {
"catalog_size": len(catalog),
"selected_option_ids": selected_ids,
}
return {
"risk_subject": results,
"debug": debug_info if return_debug else {},
}