You are on page 1of 1

# import fitz # PyMuPDF

# import pandas as pd
#
# # Open the PDF file
# pdf_path = 'example.pdf'
# pdf_document = fitz.open(pdf_path)
#
# # Initialize an empty list to store extracted text
# text_data = []
#
# # Iterate through each page and extract text
# for page_number in range(len(pdf_document)):
# page = pdf_document.load_page(page_number)
# text = page.get_text()
# # Split text into rows by newline characters and then split each row into
columns by tab characters
# rows = [row.strip().split('\t') for row in text.strip().split('\n')]
# text_data.extend(rows)
#
# # Convert extracted text into a DataFrame
# df = pd.DataFrame(text_data)
#
# # Export DataFrame to Excel
# df.to_excel('output.xlsx', index=False, header=False)
#
# # Close the PDF document
# pdf_document.close()

You might also like