'Check/uncheck checkboxes in PDF with Python - pyPDF2 and with data from excel
I have written the code below that automatically fill in one of my pdf templates, getting data form an excel sheet and saves a copy of it in one of my folders. The full code is attached below. I have removed the folders and replaced the dictionary with example texts. And yes, I am a beginner:)
import os
import pandas as pd
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import (
BooleanObject,
NameObject,
IndirectObject,
NumberObject,
)
def set_need_appearances_writer(writer: PdfFileWriter):
try:
catalog = writer._root_object
if "/AcroForm" not in catalog:
writer._root_object.update(
{
NameObject("/AcroForm"): IndirectObject(
len(writer._objects), 0, writer
)
}
)
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(
True
)
return writer
except Exception as e:
print("set_need_appearances_writer() catch : ", repr(e))
return writer
def ReadOnlyPDFOutput(page, fields):
for j in range(0, len(page["/Annots"])):
writer_annot = page["/Annots"][j].getObject()
for field in fields:
if writer_annot.get("/T") == field:
writer_annot.update(
{
NameObject("/Ff"): NumberObject(1),
NameObject("/Ff"): NameObject(1),
}
)
if __name__ == "__main__":
xl_filename = "Example_excel_list"
pdf_filename = "Example_template"
xlin = os.path.normpath(
os.path.join(os.getcwd(), r"C:\.....", xl_filename)
)
pdfin = os.path.normpath(
os.path.join(os.getcwd(), r"C:\.....", pdf_filename)
)
pdfout = os.path.normpath(os.path.join(os.getcwd(), r"C:\....."))
data = pd.read_excel(xlin)
pdf = PdfFileReader(open(pdfin, "rb"), strict=False)
if "/AcroForm" in pdf.trailer["/Root"]:
pdf.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)}
)
pdf_fields = [
str(x) for x in pdf.getFields().keys()
] # List of all pdf field names
excel_fields = data.columns.tolist()
i = 0 # Filename numerical prefix
for j, rows in data.iterrows():
i += 1
pdf2 = PdfFileWriter()
set_need_appearances_writer(pdf2)
if "/AcroForm" in pdf2._root_object:
pdf2._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)}
)
# Below you must define the field names as keys in this dictionary
# Field names found by running and printing line 15
# Key = pdf_field_name : Value = csv_field_value
field_dictionary_1 = {
"Example1": str(rows["Example1"]),
"Example2": rows["Example2"],
"Example3": rows["Example3"],
"Example4": rows["Example4"],
"Example5": rows["Example5"],
"Example6": rows["Example6"],
"Checkbox1": rows["Checkbox1"],
"Checkbox2": rows["Checkbox2"],
}
temp_out_dir = os.path.normpath(
os.path.join(pdfout, str(i) + "out.pdf")
)
pdf2.addPage(pdf.getPage(0)) # Makes a copy of pdf template page
pdf2.updatePageFormFieldValues(
pdf2.getPage(0), field_dictionary_1
) # Updates fields
# Makes the pdf output file READ-ONLY
ReadOnlyPDFOutput(pdf2.getPage(0), field_dictionary_1)
outputStream = open(temp_out_dir, "wb")
pdf2.write(outputStream) # Saves copy of enhanced template
outputStream.close()
print(f"Process Complete: {i} PDFs Processed!")
What I want to improve is the possibility to "check" the checkboxes instead of doing it with textboxes with the letter "x" which I have done in this code. For example, if I write "Yes" in a specific Excel cell, the checkbox "checks". If nothing is displayed the checkboxes stays empty. I probably have to make a dictionary for the checkboxes itself, but I am not sure how I should proceed with the rest.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
