'Downloading multiple files from Amazon partners website

I daily execute a Python script to download several files from a a Amazon Partner Server. Such files has information about sales "done under my own special link" (Amazon Affiliates).

Anyway, the scrip run usually perfect but is not efficient. As Amazon Website is a authenticated server, and I am authenticating every download (instead of a single auth and several downloads, as a "person" would do).

How can I adjust my script to made it smarter?

    from dataclasses import replace
from bs4 import BeautifulSoup
from requests import get
from requests.auth import HTTPDigestAuth
from datetime import date, datetime, timedelta
from os import path, mkdir, remove
import gzip 


#dados de conexão
url = 'https://assoc-datafeeds-na.amazon.com/datafeed/listReports'
domain = 'https://assoc-datafeeds-na.amazon.com/datafeed/'
user, pw = "XXXXXXX", "YYYYY"
auth = HTTPDigestAuth(user, pw)
ws = get(url, auth=auth)
soup = BeautifulSoup(ws.text, 'html.parser')


#Global variables
Basefolder = path.dirname(path.realpath(__file__))
DtAtualização = str(date.today()).replace('-','.')
Newfolder = 'Data Download ' + DtAtualização
fullfolder = path.join(Basefolder,Newfolder)

#Download folder check
if path.exists(fullfolder) == False :
    mkdir (fullfolder)

#Website scrapping for links 
for link in soup.find_all('a'):
    url = link.get('href')
    
    #Setup file name 
    nome_do_arquivo = url.replace('getReport?filename=XXXX','')
         
    #Setup file data (bounty, orders e ernings)
    tipo_de_arquivo = (nome_do_arquivo.split('-')[0]).lower()

    #setup file format 
    comprimento_da_lista = len(nome_do_arquivo.split('-'))
    formatotemp = nome_do_arquivo.split('-')[comprimento_da_lista-1]
    formato = formatotemp.split('.')[1]
    
    #Setup file date
    datatemp = nome_do_arquivo.split('-')[comprimento_da_lista-1]
    datatemp2 = datatemp.split('.')[0]
    dia = int(datatemp2[-2:])
    mês = int(datatemp2[4:6])
    ano = int(datatemp2[:4])
    data = date (ano,mês,dia)
    datalimite = (date.today() - timedelta(days=61))

    #Select save folder by data type (Bounty, Earnings ou Orders)
    fullfolderAjustado = path.join(fullfolder, tipo_de_arquivo)
    if path.exists(fullfolderAjustado) == False :
        mkdir (fullfolderAjustado)

    #Check if I will download such file
    if ((tipo_de_arquivo in ['earnings','orders','bounty']) and (formato == 'xml' and (data >= datalimite)) ):
        url = (domain+url)
        
        Tamanho_nome = len(nome_do_arquivo)
        nome_do_arquivo_ajustado = nome_do_arquivo[0:(Tamanho_nome-3)]
        nomecompleto = path.join(fullfolderAjustado, nome_do_arquivo) 
        nomecompletoajustado = path.join(fullfolderAjustado, nome_do_arquivo_ajustado) 

        #Download and save
        with open(nomecompleto, 'wb') as gzfile:
            response = get(url, allow_redirects=True, auth=auth)
            gzfile.write(response.content)
            gzfile.close()

        #Unzip file 
        with open(nomecompletoajustado, 'wb') as xmlfile:
            unziped = gzip.open(nomecompleto,'rb').read() 
            xmlfile.write(unziped)
            xmlfile.close()             

        print (nomecompletoajustado)
        #Deleta o arquivo compactado   
        remove (nomecompleto)
                

    continue

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'Downloading multiple files from Amazon partners website

Sources

Related Questions