import pandas as pd
import os

def generate_summary():
    file_path = '许可事项三位一体对比表_v2.xlsx'
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        return

    df = pd.read_excel(file_path)
    
    # Debug: Print the exact column names to handle any hidden chars or spaces
    cols = df.columns.tolist()
    status_col = [c for c in cols if '状态' in c][0]
    client_req_col = '是否客户要求'
    received_col = '是否已有物理文件'
    in_db_col = '是否已入库'
    name_col = '系统需求事项名称'

    total = len(df)
    is_client_req = (df[client_req_col] == '是').sum()
    has_file = (df[received_col] == '√').sum()
    is_in_db = (df[in_db_col] == '√').sum()
    
    summary = []
    summary.append("=" * 50)
    summary.append("许可事项三位一体对比统计")
    summary.append("=" * 50)
    summary.append(f"总计处理事项: {total}")
    summary.append(f"客户要求事项: {is_client_req}")
    summary.append(f"已接收物理文件: {has_file}")
    summary.append(f"数据库已入库: {is_in_db}")
    summary.append("-" * 50)
    
    status_counts = df[status_col].value_counts()
    summary.append("状态分布:")
    for status, count in status_counts.items():
        summary.append(f"  - {status}: {count}")
    
    summary.append("-" * 50)
    
    # Detail some important groups
    missing_all = df[df[status_col] == "缺失 (无文件无数据)"]
    if not missing_all.empty:
        summary.append(f"\n缺失 (无文件无数据) - 前10项:")
        for name in missing_all[name_col].head(10):
            summary.append(f"  * {name}")
            
    to_import = df[df[status_col] == "待导入 (已有文件)"]
    if not to_import.empty:
        summary.append(f"\n待导入 (已有文件) - 前10项:")
        for name in to_import[name_col].head(10):
            summary.append(f"  * {name}")

    missing_source = df[df[status_col] == "已入库但缺源文件"]
    if not missing_source.empty:
        summary.append(f"\n已入库但缺源文件 - 前10项:")
        for name in missing_source[name_col].head(10):
            summary.append(f"  * {name}")

    report_text = "\n".join(summary)
    print(report_text)
    
    with open('对比简报.txt', 'w', encoding='utf-8') as f:
        f.write(report_text)

if __name__ == "__main__":
    generate_summary()