'How does one write Python code that merges 2 files created using wkhtmltopdf into 1 pdf file using pypdf2

I have an application with the back-end written in Python that converts html files to pdf files. To do this it implements wkhtmltopdf (https://wkhtmltopdf.org/). It currently works perfectly for creating a single PDF file from an html file and outputs that to the user.

However, I need to be able to create multiple separate PDF files and then merge the files together into a single PDF.

I have been trying to do this using Pypdf2 with the PdfFileMerger() function (https://pythonhosted.org/PyPDF2/PdfFileMerger.html) and haven't been able to do it. I keep getting 'bytes' object has no attribute 'seek'

Here is my current code:

def multi_test_sheet(request, equipment_id):
    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    from io import BytesIO
    from PyPDF2 import PdfFileReader, PdfFileMerger
    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    equipment = Equipment.objects.filter(pk=equipment_id).first()
    if not job:
        raise Http404("test sheet error. Error code: get job failed")
    pdf_write = PdfFileWriter()
    user_properties=UserProperties.objects.get(user=request.user)
    context = {
        "equipment": equipment,
        "job": equipment.equipments,
        "test_sheet": equipment.sheet_eq,
        "user_properties": user_properties,
        "now": datetime.now().strftime("%b-%d-%Y %H:%M"),
        "now_date": datetime.now().date()
    }
    
    html_sheet = render_to_string('jobs/test_sheet_gear1.html', context)
    html_sheet2 = render_to_string('jobs/test_sheet_gear2.html', context)
    pdf_content1 = pdfkit.from_string(html_sheet, None) 
    pdf_content2 = pdfkit.from_string(html_sheet2, None) 
    pdfadder = PdfFileMerger(strict=False)
    pdfadder.append(pdf_content1)
    pdfadder.append(pdf_content2)
    pdf_adder.write("combined_sheets.pdf")

    response = HttpResponse(pdf_adder, content_type="application/pdf")
    
    response["Content-Disposition"] = f"filename={equipment.site_id}.pdf"

    return response


Solution 1:[1]

I resolved this by hiring someone. The problem was that the objects being passed into the PyPDF2 function called PdfFileMerger() were not being recognized as pdf objects.

To resolve that, save the files (I place them in a folder called interim) using the second argument from the pdfkit.from_string() function, then assign the newly created files to independent variables using open() function, and finally proceed with the merging function by merging those variables.

def multi_test_sheet(request, equipment_id):
    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    from io import BytesIO
    from PyPDF2 import PdfFileReader, PdfFileMerger

    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    equipment = Equipment.objects.filter(pk=equipment_id).first()
    if not job:
        raise Http404("test sheet error. Error code: get job failed")
    page_quantity = 2 #temporary value for a property that will be added to either equipment or test sheet model
    pdf_file_object = BytesIO()
    stream = BytesIO()
    pdf_write = PdfFileWriter()
    user_properties=UserProperties.objects.get(user=request.user)
    today = datetime.now()
    now=today.strftime("%b-%d-%Y %H:%M")
    now_date = today.date()
    context = {
        "equipment": equipment,
        "job": equipment.equipments,
        "test_sheet": equipment.sheet_eq,
        "user_properties": user_properties,
        "now": now,
        "now_date": now_date
    }
    
    html_sheet = render_to_string('jobs/test_sheet_gear1.html', context)
    html_sheet2 = render_to_string('jobs/test_sheet_gear2.html', context)
    pdf_content1 = pdfkit.from_string(html_sheet, 'interm/test_sheet_gear1.pdf') 
    pdf_content2 = pdfkit.from_string(html_sheet2, 'interm/test_sheet_gear2.pdf')     
    pdfadder = PdfFileMerger(strict=False)
    pdf1_v=PdfFileReader(open('interm/test_sheet_gear1.pdf', 'rb'))
    pdf2_v=PdfFileReader(open('interm/test_sheet_gear2.pdf', 'rb'))
    pdfadder.append(pdf1_v, import_bookmarks=False)
    pdfadder.append(pdf2_v, import_bookmarks=False)
    pdfadder.write('interm/'+str(user_properties.pk)+'combined_sheets.pdf')
    output_file = open('interm/combined_sheets.pdf', 'rb')
    response = HttpResponse(output_file, content_type="application/pdf")
    
    response["Content-Disposition"] = f"filename={equipment.site_id}.pdf"


    return response

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 albertrw