Professional Documents
Culture Documents
import io
import os
import re
import pdfminer.high_level
from reportlab.pdfgen import canvas
import re
def redact_pii(text):
# Phone numbers
text = re.sub(r"\b\d{10}\b", "xxxxxx", text)
# Email addresses
text = re.sub(r"\S+@\S+\.\S+", "yyyyyy", text)
# Organization names
org_names = ["Google", "Microsoft", "Apple"]
for org_name in org_names:
text = re.sub(org_name, "ORGANIZATION_NAME", text, flags=re.IGNORECASE)
# Employee names
emp_names = ["Jane", "John", "Mary"]
for emp_name in emp_names:
text = re.sub(emp_name, "EMPLOYEE_NAME", text, flags=re.IGNORECASE)
return text