132 lines
6.3 KiB
Python
132 lines
6.3 KiB
Python
"""
|
|
深度检查PDF签名中的证书数据
|
|
"""
|
|
import pikepdf
|
|
import re
|
|
from pathlib import Path
|
|
|
|
def inspect_certificate_data(pdf_path):
|
|
"""检查证书数据的内容"""
|
|
print(f"\n{'='*80}")
|
|
print(f"INSPECTING: {Path(pdf_path).name}")
|
|
print(f"{'='*80}\n")
|
|
|
|
try:
|
|
with pikepdf.open(pdf_path) as pdf:
|
|
if '/AcroForm' in pdf.Root:
|
|
acroform = pdf.Root.AcroForm
|
|
if '/Fields' in acroform:
|
|
sig_count = 0
|
|
for field in acroform.Fields:
|
|
if '/FT' in field and field.FT == '/Sig' and '/V' in field:
|
|
sig_count += 1
|
|
if sig_count > 3: # 只检查前3个签名
|
|
break
|
|
|
|
sig_value = field.V
|
|
print(f"Signature #{sig_count - 1}:")
|
|
print(f" Keys: {list(sig_value.keys())}")
|
|
|
|
if '/Contents' in sig_value:
|
|
contents = sig_value.Contents
|
|
print(f" Contents type: {type(contents)}")
|
|
|
|
# PikePDF Object需要转换为bytes
|
|
try:
|
|
if hasattr(contents, '__bytes__'):
|
|
contents_bytes = bytes(contents)
|
|
else:
|
|
# 尝试直接访问
|
|
contents_bytes = contents._obj
|
|
|
|
print(f" Contents bytes type: {type(contents_bytes)}")
|
|
|
|
if isinstance(contents_bytes, (bytes, bytearray)):
|
|
print(f" Certificate data size: {len(contents_bytes)} bytes")
|
|
print(f" Certificate data (first 200 bytes, hex): {contents_bytes[:200].hex()}")
|
|
print(f" Certificate data (first 200 bytes, repr): {repr(contents_bytes[:200])}")
|
|
|
|
# 尝试UTF-8解码
|
|
try:
|
|
decoded = contents_bytes.decode('utf-8', errors='ignore')
|
|
print(f" UTF-8 decoded (first 500 chars): {decoded[:500]}")
|
|
|
|
# 查找机构名称模式
|
|
patterns = [
|
|
r'(广东产品质量监督检验研究院)',
|
|
r'(广东省?产品质量监督检验)',
|
|
r'(质量监督检验)',
|
|
r'O=([^,\n]+)', # X.509 Organization field
|
|
r'CN=([^,\n]+)', # X.509 Common Name field
|
|
]
|
|
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, decoded)
|
|
if matches:
|
|
print(f" Pattern '{pattern}' found: {matches}")
|
|
except Exception as e:
|
|
print(f" UTF-8 decode error: {e}")
|
|
|
|
# 检查是否包含特定的UTF-8编码字符串
|
|
target_institutions = [
|
|
"广东产品质量监督检验研究院",
|
|
"广东产品质量监督检验",
|
|
"广东省产品质量监督检验研究院",
|
|
]
|
|
|
|
for inst in target_institutions:
|
|
encoded = inst.encode('utf-8')
|
|
if encoded in contents_bytes:
|
|
print(f" FOUND IN CERTIFICATE DATA: {inst}")
|
|
print(f" Encoded bytes: {encoded.hex()}")
|
|
print(f" Position: {contents_bytes.find(encoded)}")
|
|
else:
|
|
print(f" Contents is NOT bytes/bytearray, type: {type(contents_bytes)}")
|
|
print(f" Contents value: {contents_bytes}")
|
|
|
|
except Exception as e:
|
|
print(f" ERROR converting Contents to bytes: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if '/Reason' in sig_value:
|
|
reason = str(sig_value.Reason)
|
|
print(f" Reason: '{reason}' (length: {len(reason)})")
|
|
if reason:
|
|
try:
|
|
print(f" Reason bytes: {reason.encode('utf-8')}")
|
|
except:
|
|
pass
|
|
|
|
if '/Location' in sig_value:
|
|
location = str(sig_value.Location)
|
|
print(f" Location: '{location}' (length: {len(location)})")
|
|
if location:
|
|
try:
|
|
print(f" Location bytes: {location.encode('utf-8')}")
|
|
except:
|
|
pass
|
|
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f"ERROR: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def main():
|
|
test_pdfs = [
|
|
"src/test/resources/data/pdfs/YDQ25_002294.pdf",
|
|
"src/test/resources/data/pdfs/YDQ23_001838.pdf",
|
|
]
|
|
|
|
for pdf_path in test_pdfs:
|
|
inspect_certificate_data(pdf_path)
|
|
|
|
print("\n" + "="*80)
|
|
print("INSPECTION COMPLETE")
|
|
print("="*80)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|