diff --git a/.gitignore b/.gitignore index 00ba7e8..1b6094f 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,4 @@ temp_repo.py excel_info.txt parsing_output.txt unmatched_permits_report.txt -analysis/ \ No newline at end of file +analysis/data/checkpoints/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..55c37e9 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +# 市监局法律风险提示系统 (LawRisk Backend) + +智能法律风险检索与管理系统后端服务,基于 Flask 开发,提供行政许可事项的风险提示检索、数据管理及自动化备份功能。 + +## 🌟 核心功能 + +- **智能检索 (V2 API)**: 支持自然语言查询,结合向量嵌入和 LLM 技术,精准匹配许可事项与风险点。 +- **行政许可管理**: 提供完善的 Excel 导入机制,支持“以新盖旧”模式,并在覆盖前自动创建风险点数据快照。 +- **自动化备份 (Checkpoint)**: 内置数据库检查点系统,定期或手动记录数据库全量状态,支持一键恢复。 +- **权限管理**: 灵活的角色与层级控制,支持市级、区级及具体单位的细粒度数据隔离。 +- **组织架构**: 动态组织架构管理,支持拖拽调整层级关系。 + +## 🚀 快速开始 + +### 1. 环境准备 +```bash +python -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate +pip install -r requirements.txt +``` + +### 2. 配置环境变量 +修改 `.env` 文件,配置数据库连接、DashScope API 密钥等。详细说明请参考 [配置指南](docs/guides/README.md#2-配置环境变量)。 + +### 3. 运行服务 +```bash +python app.py +``` + +## 📖 详细文档 + +- **[项目架构与指南](docs/guides/README.md)**: 了解技术栈、项目结构及开发流程。 +- **[API 文档 (V2)](docs/V2_API文档.md)**: 详细的 API 接口说明与示例。 +- **[文档索引](docs/README.md)**: 包含所有功能开发、测试报告及维护文档的详细列表。 + +## 🛠️ 最近更新 + +- **Checkpoint 系统增强**: 修复了二进制数据(Excel 原始文件)备份时的 JSON 序列化问题,增强了备份稳定性。 +- **导入机制确认**: 支持许可事项的智能覆盖模式,系统在覆盖前会自动执行风险点快照备份。 +- **数据清理**: 优化了检查点清理逻辑,支持手动触发全量备份并清空冗余历史记录。 + +--- +© 2025 市监局项目开发组 diff --git a/docs/guides/README.md b/docs/guides/README.md index 13d674c..32a4a51 100644 --- a/docs/guides/README.md +++ b/docs/guides/README.md @@ -109,10 +109,15 @@ curl -X POST "http://localhost:8000/fs-ai-asistant/api/workflow/lawrisk/v2" \ ## 身份认证 -- 设置 `FLASK_SECRET_KEY` 保护会话 Cookie,同时通过 `LAWRISK_ADMIN_USERNAME` 和 `LAWRISK_ADMIN_PASSWORD` 注入首个管理员账号(可选 `LAWRISK_ADMIN_ROLE`, `LAWRISK_ADMIN_GRADE` 和 `LAWRISK_ADMIN_DISPLAY_NAME`)。 +- 设置 `FLASK_SECRET_KEY` 保护会话 Cookie,同时通过 `LAWRISK_ADMIN_USERNAME` 和 `LAWRISK_ADMIN_PASSWORD` 注入首个管理员账号。 - 首次启动会自动创建 `auth_users` 表,用于存储用户名、哈希密码、角色(role)和级别(grade)。 -- 登录页位于 `http://localhost:8000/fs-ai-asistant/lawrisk/login`,表单提交至 `/auth/login`;成功后会在会话中写入当前用户信息。 -- API 客户端可以调用 `/auth/me` 获取当前登录信息,调用 `/auth/logout` 注销。 +- 登录页位于 `/fs-ai-asistant/lawrisk/login`,API 客户端可以调用 `/auth/me` 获取当前登录信息。 + +## 数据备份与恢复 (Checkpoint) + +- 系统提供 Checkpoint 功能,可对数据库全表进行 JSON 序列化备份。 +- 支持二进制数据(如 `bytes`, `memoryview`)的 Base64 自动转换,确保 Excel 原始文件等资产的完整备份。 +- 管理员可通过管理后台 `/db_admin` 手动创建、列出或恢复检查点。 ## API文档 diff --git a/lawrisk/services/licensing_repo.py b/lawrisk/services/licensing_repo.py index 555142d..fb059e3 100644 --- a/lawrisk/services/licensing_repo.py +++ b/lawrisk/services/licensing_repo.py @@ -93,10 +93,14 @@ _IMPORT_HEADER_ALIASES: Dict[str, Set[str]] = { "summary": { "风险说明", "摘要", - "备注", "风险摘要", "补充说明", }, + "remark": { + "备注", + "其他", + "备注项", + }, "permit_status": { "许可状态", "事项状态", @@ -159,7 +163,8 @@ _IMPORT_HEADER_KEYWORDS: List[Tuple[str, Tuple[str, ...]]] = [ ("risk_content", ("风险",)), ("legal_basis", ("依据",)), ("document_no", ("文号", "编号")), - ("summary", ("备注", "摘要")), + ("summary", ("摘要", "说明")), + ("remark", ("备注",)), ("responsible_contact", ("责任", "主管")), ("jurisdiction_scope", ("范围", "区域")), ] @@ -328,6 +333,9 @@ def _score_import_header(canonical: str, cell_text: str, col_idx: int) -> float: elif canonical == "summary": if "摘要" in text: score += 3 + elif canonical == "remark": + if "备注" in text: + score += 3 elif canonical == "serial_number": if "序号" in text: @@ -402,6 +410,7 @@ def _normalize_import_row( legal_basis = _clean_empty(raw_row.get("legal_basis")) document_no = _clean_empty(raw_row.get("document_no")) summary = _clean_empty(raw_row.get("summary")) + remark = _clean_empty(raw_row.get("remark")) permit_status = _clean_empty(raw_row.get("permit_status") or sheet_defaults.get("permit_status")) filler_name = _clean_empty(raw_row.get("filler_name") or sheet_defaults.get("filler_name")) unit_name = _clean_empty(raw_row.get("unit_name") or sheet_defaults.get("unit_name")) @@ -436,6 +445,7 @@ def _normalize_import_row( "legal_basis": legal_basis, "document_no": document_no, "summary": summary, + "remark": remark, "permit_status": permit_status, "responsible_contact": responsible_contact, "jurisdiction_scope": jurisdiction_scope, @@ -1025,17 +1035,18 @@ def _ensure_risk( legal_basis: Optional[str], document_no: Optional[str], summary: Optional[str], + remark: Optional[str] = None, ) -> str: cur = conn.cursor() cur.execute( """ - INSERT INTO risks (risk_content, legal_basis, document_no, summary) - VALUES (%s, %s, %s, %s) - ON CONFLICT (risk_content, legal_basis, document_no, summary) + INSERT INTO risks (risk_content, legal_basis, document_no, summary, remark) + VALUES (%s, %s, %s, %s, %s) + ON CONFLICT (risk_content, legal_basis, document_no, summary, remark) DO UPDATE SET risk_content = EXCLUDED.risk_content RETURNING id """, - (risk_content, legal_basis, document_no, summary), + (risk_content, legal_basis, document_no, summary, remark), ) risk_id = cur.fetchone()[0] return str(risk_id) @@ -1570,6 +1581,7 @@ def commit_permit_import_session( legal_basis=row.get("legal_basis"), document_no=row.get("document_no"), summary=row.get("summary"), + remark=row.get("remark"), ) cur.execute( """ @@ -2444,6 +2456,45 @@ def list_all_themes() -> List[Dict[str, Any]]: return items +def list_unbound_permits() -> List[Dict[str, Any]]: + """Return all permits that are in a region but not bound to any theme in that region.""" + sql = """ + SELECT + r.id AS region_id, + r.name AS region_name, + p.id AS permit_id, + p.name AS permit_name, + rpd.unit_name, + rpd.updated_at + FROM region_permit_details rpd + JOIN regions r ON r.id = rpd.region_id + JOIN permits p ON p.id = rpd.permit_id + LEFT JOIN region_theme_permits rtp + ON rtp.region_id = rpd.region_id + AND rtp.permit_id = rpd.permit_id + WHERE rtp.theme_id IS NULL + ORDER BY r.name, p.name + """ + items: List[Dict[str, Any]] = [] + try: + with _lic_pg_conn() as conn: + cur = conn.cursor() + cur.execute(sql) + rows = cur.fetchall() + columns = tuple(col[0] for col in cur.description) + for row in rows: + record = {columns[idx]: row[idx] for idx in range(len(columns))} + # Serialize UUIDs and timestamps + record["region_id"] = str(record["region_id"]) + record["permit_id"] = str(record["permit_id"]) + if record["updated_at"]: + record["updated_at"] = record["updated_at"].isoformat() + items.append(record) + except Exception as e: + logger.error(f"Error listing unbound permits: {e}") + return items + + def create_theme(name: str) -> Dict[str, Any]: normalized = _clean_text(name) if not normalized: @@ -3329,6 +3380,7 @@ def load_permits_and_risks( rk.legal_basis, rk.document_no, rk.summary, + rk.remark, rpr.serial_number, rpd.permit_status, rpd.subitem_summary, @@ -3336,21 +3388,19 @@ def load_permits_and_risks( rpd.jurisdiction_scope, rpd.filler_name, rpd.unit_name, - rpd.filler_name, - rpd.unit_name, rpd.source_update_date, rpd.contact_info - FROM region_theme_permits rtp - JOIN permits p ON p.id = rtp.permit_id + FROM region_permit_details rpd + JOIN permits p ON p.id = rpd.permit_id + LEFT JOIN region_theme_permits rtp + ON rtp.region_id = rpd.region_id + AND rtp.permit_id = rpd.permit_id LEFT JOIN themes t ON t.id = rtp.theme_id LEFT JOIN region_permit_risks rpr - ON rpr.region_id = rtp.region_id - AND rpr.permit_id = rtp.permit_id + ON rpr.region_id = rpd.region_id + AND rpr.permit_id = rpd.permit_id LEFT JOIN risks rk ON rk.id = rpr.risk_id - LEFT JOIN region_permit_details rpd - ON rpd.region_id = rtp.region_id - AND rpd.permit_id = rtp.permit_id - WHERE rtp.region_id = %s + WHERE rpd.region_id = %s """ params: List[Any] = [region_id] theme_filter = theme_id if (theme_id and not _is_all_theme_marker(theme_id)) else None @@ -3358,13 +3408,14 @@ def load_permits_and_risks( sql += " AND (rtp.theme_id = %s OR t.name = '所有主题事项')" params.append(theme_filter) if permit_id is not None: - sql += " AND rtp.permit_id = %s" + sql += " AND rpd.permit_id = %s" params.append(permit_id) sql += """ ORDER BY p.name, rk.risk_content """ permits: Dict[str, Dict[str, object]] = {} + risk_seen_map: Dict[str, Set[str]] = {} # pid -> set of risk_ids with _lic_pg_conn() as conn: _ensure_contact_info_column(conn) cur = conn.cursor() @@ -3380,6 +3431,7 @@ def load_permits_and_risks( legal_basis, document_no, summary, + remark, serial_number, permit_status, subitem_summary, @@ -3387,8 +3439,6 @@ def load_permits_and_risks( jurisdiction_scope, filler_name, unit_name, - filler_name, - unit_name, source_update_date, contact_info, ) = row @@ -3408,7 +3458,6 @@ def load_permits_and_risks( "jurisdiction_scope": None, "filler_name": None, "unit_name": None, - "unit_name": None, "source_update_date": None, "contact_info": None, "theme": { @@ -3463,17 +3512,24 @@ def load_permits_and_risks( if entry["contact_info"] is None and contact_info: entry["contact_info"] = contact_info.strip() or None if risk_id is not None: - summary_markdown = _format_summary_markdown(summary or "") - entry["risks"].append( - { - "id": str(risk_id), - "risk_content": risk_content or "", - "legal_basis": legal_basis or "", - "document_no": document_no or "", - "summary": summary_markdown, - "serial_number": serial_number, - } - ) + risk_id_str = str(risk_id) + # Avoid duplicates when a permit has multiple themes + seen_risk_ids = risk_seen_map.setdefault(pid, set()) + if risk_id_str not in seen_risk_ids: + seen_risk_ids.add(risk_id_str) + summary_markdown = _format_summary_markdown(summary or "") + remark_markdown = _format_summary_markdown(remark or "") + entry["risks"].append( + { + "id": risk_id_str, + "risk_content": risk_content or "", + "legal_basis": legal_basis or "", + "document_no": document_no or "", + "summary": summary_markdown, + "remark": remark_markdown, + "serial_number": serial_number, + } + ) permit_ids = list(permits.keys()) scope_map = _load_permit_scopes_for_region(conn, region_id, permit_ids) @@ -4133,6 +4189,11 @@ def create_checkpoint(description: str = "") -> Dict[str, Any]: except ImportError: pass + if isinstance(obj, (bytes, bytearray, memoryview)): + import base64 + # For data integrity in JSON, base64 is standard + return base64.b64encode(obj).decode('ascii') + if hasattr(obj, 'isoformat'): return str(obj) raise TypeError(f"Object of type {type(obj)} is not JSON serializable") @@ -5600,19 +5661,62 @@ def filter_permits_advanced( """ print(f"[DEBUG] filter_permits_advanced called with limit={limit}, offset={offset}") # Use subquery to avoid DISTINCT with window functions issue - sql = """ + # Subquery to get unique permits matching filters with pagination + # We use a CTE to ensure limit/offset apply to unique permits, not to rows (which can duplicate per theme) + base_where = " WHERE 1=1 " + base_params = [] + + if regions: + placeholders = ', '.join(['%s'] * len(regions)) + base_where += f" AND rpd.region_id IN ({placeholders})" + base_params.extend(regions) + + if themes: + placeholders = ', '.join(['%s'] * len(themes)) + base_where += f" AND rtp.theme_id IN ({placeholders})" + base_params.extend(themes) + + if departments: + placeholders = ', '.join(['%s'] * len(departments)) + base_where += f" AND (ps.uploader_department_id IN ({placeholders}) OR ps.bound_department_id IN ({placeholders}))" + base_params.extend(departments * 2) + + if search_text: + base_where += f" AND LOWER(p.name) LIKE LOWER(%s)" + base_params.append(f"%{search_text}%") + + sql = f""" + WITH filtered_p AS ( + SELECT rpd.permit_id, rpd.region_id + FROM region_permit_details rpd + JOIN permits p ON p.id = rpd.permit_id + LEFT JOIN region_theme_permits rtp + ON rtp.permit_id = rpd.permit_id + AND rtp.region_id = rpd.region_id + LEFT JOIN permit_sources ps + ON ps.permit_id = rpd.permit_id + AND ps.region_id = rpd.region_id + {base_where} + GROUP BY rpd.permit_id, rpd.region_id, p.name + ORDER BY LOWER(p.name) + LIMIT %s OFFSET %s + ) SELECT p.id AS permit_id, p.name AS permit_name, - rtp.region_id, + rpd.region_id, r.name AS region_name, rtp.theme_id, t.name AS theme_name, COALESCE(risk_counts.risk_count, 0) AS risk_count, COALESCE(theme_counts.theme_count, 0) AS theme_count - FROM region_theme_permits rtp - JOIN permits p ON p.id = rtp.permit_id - JOIN regions r ON r.id = rtp.region_id + FROM filtered_p fp + JOIN region_permit_details rpd ON rpd.permit_id = fp.permit_id AND rpd.region_id = fp.region_id + JOIN permits p ON p.id = rpd.permit_id + JOIN regions r ON r.id = rpd.region_id + LEFT JOIN region_theme_permits rtp + ON rtp.permit_id = rpd.permit_id + AND rtp.region_id = rpd.region_id LEFT JOIN themes t ON t.id = rtp.theme_id LEFT JOIN ( SELECT @@ -5621,8 +5725,8 @@ def filter_permits_advanced( COUNT(risk_id) AS risk_count FROM region_permit_risks GROUP BY permit_id, region_id - ) risk_counts ON risk_counts.permit_id = rtp.permit_id - AND risk_counts.region_id = rtp.region_id + ) risk_counts ON risk_counts.permit_id = rpd.permit_id + AND risk_counts.region_id = rpd.region_id LEFT JOIN ( SELECT permit_id, @@ -5630,51 +5734,11 @@ def filter_permits_advanced( COUNT(DISTINCT theme_id) AS theme_count FROM region_theme_permits GROUP BY permit_id, region_id - ) theme_counts ON theme_counts.permit_id = rtp.permit_id - AND theme_counts.region_id = rtp.region_id - LEFT JOIN permit_sources ps - ON ps.permit_id = rtp.permit_id - AND ps.region_id = rtp.region_id - WHERE 1=1 - """ - params = [] - param_count = 0 - - if regions: - placeholders = ', '.join(['%s'] * len(regions)) - param_count += len(regions) - sql += f" AND rtp.region_id IN ({placeholders})" - params.extend(regions) - - if themes: - placeholders = ', '.join(['%s'] * len(themes)) - param_count += len(themes) - sql += f" AND rtp.theme_id IN ({placeholders})" - params.extend(themes) - - if departments: - placeholders = ', '.join(['%s'] * len(departments)) - param_count += len(departments) - sql += f" AND (ps.uploader_department_id IN ({placeholders}) OR ps.bound_department_id IN ({placeholders}))" - params.extend(departments * 2) - - if search_text: - param_count += 1 - sql += f" AND LOWER(p.name) LIKE LOWER(%s)" - params.append(f"%{search_text}%") - - sql += """ + ) theme_counts ON theme_counts.permit_id = rpd.permit_id + AND theme_counts.region_id = rpd.region_id ORDER BY LOWER(p.name), LOWER(r.name), LOWER(COALESCE(t.name, '')) """ - - # Add pagination - param_count += 1 - sql += f" LIMIT %s" - params.append(limit) - - param_count += 1 - sql += f" OFFSET %s" - params.append(offset) + params = base_params + [limit, offset] permits_map = {} with _lic_pg_conn() as conn: @@ -5718,38 +5782,24 @@ def filter_permits_advanced( ): existing_themes.append(theme_payload) + # Use OrderedDict or sorted permits_list to maintain name order after dict values collection permits_list = list(permits_map.values()) + # Sort again by name to ensure order because dict.values() might not be stable depending on Python version/access + permits_list.sort(key=lambda x: x["name"].lower()) # Get total count for pagination - count_sql = """ - SELECT COUNT(DISTINCT rtp.permit_id || '_' || rtp.region_id) - FROM region_theme_permits rtp - JOIN permits p ON p.id = rtp.permit_id - LEFT JOIN permit_sources ps - ON ps.permit_id = rtp.permit_id - AND ps.region_id = rtp.region_id - WHERE 1=1 + count_sql = f""" + SELECT COUNT(DISTINCT rpd.permit_id || '_' || rpd.region_id) + FROM region_permit_details rpd + JOIN permits p ON p.id = rpd.permit_id + LEFT JOIN region_theme_permits rtp ON rtp.permit_id = rpd.permit_id AND rtp.region_id = rpd.region_id + LEFT JOIN permit_sources ps ON ps.permit_id = rpd.permit_id AND ps.region_id = rpd.region_id + {base_where} """ - count_params = [] - if regions: - placeholders = ', '.join(['%s'] * len(regions)) - count_sql += f" AND rtp.region_id IN ({placeholders})" - count_params.extend(regions) - if themes: - placeholders = ', '.join(['%s'] * len(themes)) - count_sql += f" AND rtp.theme_id IN ({placeholders})" - count_params.extend(themes) - if departments: - placeholders = ', '.join(['%s'] * len(departments)) - count_sql += f" AND (ps.uploader_department_id IN ({placeholders}) OR ps.bound_department_id IN ({placeholders}))" - count_params.extend(departments * 2) - if search_text: - count_sql += " AND LOWER(p.name) LIKE LOWER(%s)" - count_params.append(f"%{search_text}%") - + with _lic_pg_conn() as conn: cur = conn.cursor() - cur.execute(count_sql, count_params) + cur.execute(count_sql, base_params) total = cur.fetchone()[0] return { @@ -5762,6 +5812,7 @@ def filter_permits_advanced( }, } + def _ensure_contact_info_column(conn: pg.Connection) -> None: "Ensure that the contact_info column exists in region_permit_details." # This check is now redundant since schema fix script was run, but kept for safety