import pandas as pd import os def generate_summary(): file_path = '许可事项三位一体对比表_v2.xlsx' if not os.path.exists(file_path): print(f"Error: {file_path} not found.") return df = pd.read_excel(file_path) # Debug: Print the exact column names to handle any hidden chars or spaces cols = df.columns.tolist() status_col = [c for c in cols if '状态' in c][0] client_req_col = '是否客户要求' received_col = '是否已有物理文件' in_db_col = '是否已入库' name_col = '系统需求事项名称' total = len(df) is_client_req = (df[client_req_col] == '是').sum() has_file = (df[received_col] == '√').sum() is_in_db = (df[in_db_col] == '√').sum() summary = [] summary.append("=" * 50) summary.append("许可事项三位一体对比统计") summary.append("=" * 50) summary.append(f"总计处理事项: {total}") summary.append(f"客户要求事项: {is_client_req}") summary.append(f"已接收物理文件: {has_file}") summary.append(f"数据库已入库: {is_in_db}") summary.append("-" * 50) status_counts = df[status_col].value_counts() summary.append("状态分布:") for status, count in status_counts.items(): summary.append(f" - {status}: {count}") summary.append("-" * 50) # Detail some important groups missing_all = df[df[status_col] == "缺失 (无文件无数据)"] if not missing_all.empty: summary.append(f"\n缺失 (无文件无数据) - 前10项:") for name in missing_all[name_col].head(10): summary.append(f" * {name}") to_import = df[df[status_col] == "待导入 (已有文件)"] if not to_import.empty: summary.append(f"\n待导入 (已有文件) - 前10项:") for name in to_import[name_col].head(10): summary.append(f" * {name}") missing_source = df[df[status_col] == "已入库但缺源文件"] if not missing_source.empty: summary.append(f"\n已入库但缺源文件 - 前10项:") for name in missing_source[name_col].head(10): summary.append(f" * {name}") report_text = "\n".join(summary) print(report_text) with open('对比简报.txt', 'w', encoding='utf-8') as f: f.write(report_text) if __name__ == "__main__": generate_summary()