'Check/uncheck checkboxes in PDF with Python - pyPDF2 and with data from excel

I have written the code below that automatically fill in one of my pdf templates, getting data form an excel sheet and saves a copy of it in one of my folders. The full code is attached below. I have removed the folders and replaced the dictionary with example texts. And yes, I am a beginner:)

import os
import pandas as pd
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import (
    BooleanObject,
    NameObject,
    IndirectObject,
    NumberObject,
)


def set_need_appearances_writer(writer: PdfFileWriter):
    try:
        catalog = writer._root_object
        if "/AcroForm" not in catalog:
            writer._root_object.update(
                {
                    NameObject("/AcroForm"): IndirectObject(
                        len(writer._objects), 0, writer
                    )
                }
            )
        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(
            True
        )
        return writer

    except Exception as e:
        print("set_need_appearances_writer() catch : ", repr(e))
        return writer


def ReadOnlyPDFOutput(page, fields):
    for j in range(0, len(page["/Annots"])):
        writer_annot = page["/Annots"][j].getObject()
        for field in fields:
            if writer_annot.get("/T") == field:
                writer_annot.update(
                    {
                        NameObject("/Ff"): NumberObject(1),
                        NameObject("/Ff"): NameObject(1),
                    }
                )


if __name__ == "__main__":
    xl_filename = "Example_excel_list"
    pdf_filename = "Example_template"

    xlin = os.path.normpath(
        os.path.join(os.getcwd(), r"C:\.....", xl_filename)
    )
    pdfin = os.path.normpath(
        os.path.join(os.getcwd(), r"C:\.....", pdf_filename)
    )
    pdfout = os.path.normpath(os.path.join(os.getcwd(), r"C:\....."))
    data = pd.read_excel(xlin)
    pdf = PdfFileReader(open(pdfin, "rb"), strict=False)

    if "/AcroForm" in pdf.trailer["/Root"]:
        pdf.trailer["/Root"]["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)}
        )

    pdf_fields = [
        str(x) for x in pdf.getFields().keys()
    ]  # List of all pdf field names
    excel_fields = data.columns.tolist()

    i = 0  # Filename numerical prefix
    for j, rows in data.iterrows():
        i += 1
        pdf2 = PdfFileWriter()
        set_need_appearances_writer(pdf2)
        if "/AcroForm" in pdf2._root_object:
            pdf2._root_object["/AcroForm"].update(
                {NameObject("/NeedAppearances"): BooleanObject(True)}
            )

        # Below you must define the field names as keys in this dictionary
        # Field names found by running and printing line 15
        # Key = pdf_field_name : Value = csv_field_value
        field_dictionary_1 = {
            "Example1": str(rows["Example1"]),
            "Example2": rows["Example2"],
            "Example3": rows["Example3"],
            "Example4": rows["Example4"],
            "Example5": rows["Example5"],
            "Example6": rows["Example6"],
            "Checkbox1": rows["Checkbox1"],
            "Checkbox2": rows["Checkbox2"],
        }

        temp_out_dir = os.path.normpath(
            os.path.join(pdfout, str(i) + "out.pdf")
        )

        pdf2.addPage(pdf.getPage(0))  # Makes a copy of pdf template page
        pdf2.updatePageFormFieldValues(
            pdf2.getPage(0), field_dictionary_1
        )  # Updates fields

        # Makes the pdf output file READ-ONLY
        ReadOnlyPDFOutput(pdf2.getPage(0), field_dictionary_1)

        outputStream = open(temp_out_dir, "wb")
        pdf2.write(outputStream)  # Saves copy of enhanced template
        outputStream.close()

    print(f"Process Complete: {i} PDFs Processed!")

What I want to improve is the possibility to "check" the checkboxes instead of doing it with textboxes with the letter "x" which I have done in this code. For example, if I write "Yes" in a specific Excel cell, the checkbox "checks". If nothing is displayed the checkboxes stays empty. I probably have to make a dictionary for the checkboxes itself, but I am not sure how I should proceed with the rest.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'Check/uncheck checkboxes in PDF with Python - pyPDF2 and with data from excel

Sources

Related Questions