report-detect/archive/crt_tests/inspect_certificate_data.py

132 lines
6.3 KiB
Python

"""
深度检查PDF签名中的证书数据
"""
import pikepdf
import re
from pathlib import Path
def inspect_certificate_data(pdf_path):
"""检查证书数据的内容"""
print(f"\n{'='*80}")
print(f"INSPECTING: {Path(pdf_path).name}")
print(f"{'='*80}\n")
try:
with pikepdf.open(pdf_path) as pdf:
if '/AcroForm' in pdf.Root:
acroform = pdf.Root.AcroForm
if '/Fields' in acroform:
sig_count = 0
for field in acroform.Fields:
if '/FT' in field and field.FT == '/Sig' and '/V' in field:
sig_count += 1
if sig_count > 3: # 只检查前3个签名
break
sig_value = field.V
print(f"Signature #{sig_count - 1}:")
print(f" Keys: {list(sig_value.keys())}")
if '/Contents' in sig_value:
contents = sig_value.Contents
print(f" Contents type: {type(contents)}")
# PikePDF Object需要转换为bytes
try:
if hasattr(contents, '__bytes__'):
contents_bytes = bytes(contents)
else:
# 尝试直接访问
contents_bytes = contents._obj
print(f" Contents bytes type: {type(contents_bytes)}")
if isinstance(contents_bytes, (bytes, bytearray)):
print(f" Certificate data size: {len(contents_bytes)} bytes")
print(f" Certificate data (first 200 bytes, hex): {contents_bytes[:200].hex()}")
print(f" Certificate data (first 200 bytes, repr): {repr(contents_bytes[:200])}")
# 尝试UTF-8解码
try:
decoded = contents_bytes.decode('utf-8', errors='ignore')
print(f" UTF-8 decoded (first 500 chars): {decoded[:500]}")
# 查找机构名称模式
patterns = [
r'(广东产品质量监督检验研究院)',
r'(广东省?产品质量监督检验)',
r'(质量监督检验)',
r'O=([^,\n]+)', # X.509 Organization field
r'CN=([^,\n]+)', # X.509 Common Name field
]
for pattern in patterns:
matches = re.findall(pattern, decoded)
if matches:
print(f" Pattern '{pattern}' found: {matches}")
except Exception as e:
print(f" UTF-8 decode error: {e}")
# 检查是否包含特定的UTF-8编码字符串
target_institutions = [
"广东产品质量监督检验研究院",
"广东产品质量监督检验",
"广东省产品质量监督检验研究院",
]
for inst in target_institutions:
encoded = inst.encode('utf-8')
if encoded in contents_bytes:
print(f" FOUND IN CERTIFICATE DATA: {inst}")
print(f" Encoded bytes: {encoded.hex()}")
print(f" Position: {contents_bytes.find(encoded)}")
else:
print(f" Contents is NOT bytes/bytearray, type: {type(contents_bytes)}")
print(f" Contents value: {contents_bytes}")
except Exception as e:
print(f" ERROR converting Contents to bytes: {e}")
import traceback
traceback.print_exc()
if '/Reason' in sig_value:
reason = str(sig_value.Reason)
print(f" Reason: '{reason}' (length: {len(reason)})")
if reason:
try:
print(f" Reason bytes: {reason.encode('utf-8')}")
except:
pass
if '/Location' in sig_value:
location = str(sig_value.Location)
print(f" Location: '{location}' (length: {len(location)})")
if location:
try:
print(f" Location bytes: {location.encode('utf-8')}")
except:
pass
print()
except Exception as e:
print(f"ERROR: {e}")
import traceback
traceback.print_exc()
def main():
test_pdfs = [
"src/test/resources/data/pdfs/YDQ25_002294.pdf",
"src/test/resources/data/pdfs/YDQ23_001838.pdf",
]
for pdf_path in test_pdfs:
inspect_certificate_data(pdf_path)
print("\n" + "="*80)
print("INSPECTION COMPLETE")
print("="*80)
if __name__ == "__main__":
main()