""" 深度检查PDF签名中的证书数据 """ import pikepdf import re from pathlib import Path def inspect_certificate_data(pdf_path): """检查证书数据的内容""" print(f"\n{'='*80}") print(f"INSPECTING: {Path(pdf_path).name}") print(f"{'='*80}\n") try: with pikepdf.open(pdf_path) as pdf: if '/AcroForm' in pdf.Root: acroform = pdf.Root.AcroForm if '/Fields' in acroform: sig_count = 0 for field in acroform.Fields: if '/FT' in field and field.FT == '/Sig' and '/V' in field: sig_count += 1 if sig_count > 3: # 只检查前3个签名 break sig_value = field.V print(f"Signature #{sig_count - 1}:") print(f" Keys: {list(sig_value.keys())}") if '/Contents' in sig_value: contents = sig_value.Contents print(f" Contents type: {type(contents)}") # PikePDF Object需要转换为bytes try: if hasattr(contents, '__bytes__'): contents_bytes = bytes(contents) else: # 尝试直接访问 contents_bytes = contents._obj print(f" Contents bytes type: {type(contents_bytes)}") if isinstance(contents_bytes, (bytes, bytearray)): print(f" Certificate data size: {len(contents_bytes)} bytes") print(f" Certificate data (first 200 bytes, hex): {contents_bytes[:200].hex()}") print(f" Certificate data (first 200 bytes, repr): {repr(contents_bytes[:200])}") # 尝试UTF-8解码 try: decoded = contents_bytes.decode('utf-8', errors='ignore') print(f" UTF-8 decoded (first 500 chars): {decoded[:500]}") # 查找机构名称模式 patterns = [ r'(广东产品质量监督检验研究院)', r'(广东省?产品质量监督检验)', r'(质量监督检验)', r'O=([^,\n]+)', # X.509 Organization field r'CN=([^,\n]+)', # X.509 Common Name field ] for pattern in patterns: matches = re.findall(pattern, decoded) if matches: print(f" Pattern '{pattern}' found: {matches}") except Exception as e: print(f" UTF-8 decode error: {e}") # 检查是否包含特定的UTF-8编码字符串 target_institutions = [ "广东产品质量监督检验研究院", "广东产品质量监督检验", "广东省产品质量监督检验研究院", ] for inst in target_institutions: encoded = inst.encode('utf-8') if encoded in contents_bytes: print(f" FOUND IN CERTIFICATE DATA: {inst}") print(f" Encoded bytes: {encoded.hex()}") print(f" Position: {contents_bytes.find(encoded)}") else: print(f" Contents is NOT bytes/bytearray, type: {type(contents_bytes)}") print(f" Contents value: {contents_bytes}") except Exception as e: print(f" ERROR converting Contents to bytes: {e}") import traceback traceback.print_exc() if '/Reason' in sig_value: reason = str(sig_value.Reason) print(f" Reason: '{reason}' (length: {len(reason)})") if reason: try: print(f" Reason bytes: {reason.encode('utf-8')}") except: pass if '/Location' in sig_value: location = str(sig_value.Location) print(f" Location: '{location}' (length: {len(location)})") if location: try: print(f" Location bytes: {location.encode('utf-8')}") except: pass print() except Exception as e: print(f"ERROR: {e}") import traceback traceback.print_exc() def main(): test_pdfs = [ "src/test/resources/data/pdfs/YDQ25_002294.pdf", "src/test/resources/data/pdfs/YDQ23_001838.pdf", ] for pdf_path in test_pdfs: inspect_certificate_data(pdf_path) print("\n" + "="*80) print("INSPECTION COMPLETE") print("="*80) if __name__ == "__main__": main()