'UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 63: invalid continuation byte

So I am trying to download a batch of emails for a friend that set up a webform that sent responses from a survey directly to their email rather than into a spreadsheet. The issue I am running into is that I can get all of one type of response, the people signing up to act as mentors, but none of the other responses, those that wish to be mentees. When I try to download any emails from mentee applicants I get the following error:

$ ./Email_Get_0.1.py 
Do you want to search for Mentor or Mentee forms? (enter: Mentor / Mentee) 
mentee
Traceback (most recent call last):
  File "/Users/tmac/Programing/AccelAi/Mentor-Matching/Email Script/./Email_Get_0.1.py", line 75, in <module>
    download_emails(MENTEE)
  File "/Users/tmac/Programing/AccelAi/Mentor-Matching/Email Script/./Email_Get_0.1.py", line 64, in download_emails
    output_file.write("Date: %s\n\nBody: \n\n%s" %(local_message_date, body.decode('utf-8')))
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 63: invalid continuation byte

Any idea what is missing here, why it works for mentors but not mentees, or what I can do to get around this and get these emails downloaded? Thanks for the help!

#!/usr/bin/env python3
""" Access email and get contents to csv """
import email, datetime, imaplib, os, re, html2text

# Credentials to log into gmail and search for specific labels
EMAIL_UN = '######################'
EMAIL_PW = '#######################'
MENTOR = 'Form Submission - Mentor Application'
MENTEE = 'Form Submission - Mentee Application'

# Current directory to save the text files
cwd = os.getcwd()
# Start date of current mentorship applicant pool, can be adjusted for follow on uses
sentSince = "SENTSINCE 01-Jan-2022"

def download_emails(SUBJECT):
    un = EMAIL_UN
    pw = EMAIL_PW
    url = 'imap.gmail.com'
    # Folder is the inbox document tree with custom label
    folder = "\"" + "Mentoring Program" + "\""
    complexSearch = sentSince + " Subject " + "\"" + SUBJECT + "\""
    detach_dir = cwd # directory where to save attachments (default: current)
    # connecting to the gmail imap server
    m = imaplib.IMAP4_SSL(url,993)
    m.login(un,pw)
    m.select(folder)

    # # This allows us to cycle through the folders and labels in gmail to find what we need
    # for items in m.list('/'):
    #     for item in items:
    #         print(item)
    
    resp, items = m.search(None, complexSearch)
    # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
    
    items = items[0].split() # getting the mails id

    for emailid in items:
        resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
        if resp != 'OK':
            raise Exception("Error reading email: {}".format(data))
        raw_email = data[0][1]
        raw_email_string = raw_email.decode('utf-8')
        email_message = email.message_from_string(raw_email_string)

        # Header Details
        date_tuple = email.utils.parsedate_tz(email_message['Date'])
        if date_tuple:
            local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
            local_message_date = "%s" %(str(local_date.strftime("%d %b %Y %H:%M:%S")))
            email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
            email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
            subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))

        # Body details
        for part in email_message.walk():
            if part.get_content_type() == "text/plain":
                body = part.get_payload(decode=True)
                file_name = subject + "_" + local_message_date + ".txt"
                output_file = open(file_name, 'w')
                output_file.write("Date: %s\n\nBody: \n\n%s" %(local_message_date, body.decode('latin-1')))
                output_file.close()
    
            else:
                continue

if __name__ == '__main__':
    download = input("Do you want to search for Mentor or Mentee forms? (enter: Mentor / Mentee) \n")
    if download.upper() == 'MENTOR':
        download_emails(MENTOR)
    elif download.upper() == 'MENTEE':
        download_emails(MENTEE)
    else:
        print('Usage: ./download_emails.py @ prompt enter Mentor or Mentee:')


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source