'Downloading multiple files from Amazon partners website
I daily execute a Python script to download several files from a a Amazon Partner Server. Such files has information about sales "done under my own special link" (Amazon Affiliates).
Anyway, the scrip run usually perfect but is not efficient. As Amazon Website is a authenticated server, and I am authenticating every download (instead of a single auth and several downloads, as a "person" would do).
How can I adjust my script to made it smarter?
from dataclasses import replace
from bs4 import BeautifulSoup
from requests import get
from requests.auth import HTTPDigestAuth
from datetime import date, datetime, timedelta
from os import path, mkdir, remove
import gzip
#dados de conexão
url = 'https://assoc-datafeeds-na.amazon.com/datafeed/listReports'
domain = 'https://assoc-datafeeds-na.amazon.com/datafeed/'
user, pw = "XXXXXXX", "YYYYY"
auth = HTTPDigestAuth(user, pw)
ws = get(url, auth=auth)
soup = BeautifulSoup(ws.text, 'html.parser')
#Global variables
Basefolder = path.dirname(path.realpath(__file__))
DtAtualização = str(date.today()).replace('-','.')
Newfolder = 'Data Download ' + DtAtualização
fullfolder = path.join(Basefolder,Newfolder)
#Download folder check
if path.exists(fullfolder) == False :
mkdir (fullfolder)
#Website scrapping for links
for link in soup.find_all('a'):
url = link.get('href')
#Setup file name
nome_do_arquivo = url.replace('getReport?filename=XXXX','')
#Setup file data (bounty, orders e ernings)
tipo_de_arquivo = (nome_do_arquivo.split('-')[0]).lower()
#setup file format
comprimento_da_lista = len(nome_do_arquivo.split('-'))
formatotemp = nome_do_arquivo.split('-')[comprimento_da_lista-1]
formato = formatotemp.split('.')[1]
#Setup file date
datatemp = nome_do_arquivo.split('-')[comprimento_da_lista-1]
datatemp2 = datatemp.split('.')[0]
dia = int(datatemp2[-2:])
mês = int(datatemp2[4:6])
ano = int(datatemp2[:4])
data = date (ano,mês,dia)
datalimite = (date.today() - timedelta(days=61))
#Select save folder by data type (Bounty, Earnings ou Orders)
fullfolderAjustado = path.join(fullfolder, tipo_de_arquivo)
if path.exists(fullfolderAjustado) == False :
mkdir (fullfolderAjustado)
#Check if I will download such file
if ((tipo_de_arquivo in ['earnings','orders','bounty']) and (formato == 'xml' and (data >= datalimite)) ):
url = (domain+url)
Tamanho_nome = len(nome_do_arquivo)
nome_do_arquivo_ajustado = nome_do_arquivo[0:(Tamanho_nome-3)]
nomecompleto = path.join(fullfolderAjustado, nome_do_arquivo)
nomecompletoajustado = path.join(fullfolderAjustado, nome_do_arquivo_ajustado)
#Download and save
with open(nomecompleto, 'wb') as gzfile:
response = get(url, allow_redirects=True, auth=auth)
gzfile.write(response.content)
gzfile.close()
#Unzip file
with open(nomecompletoajustado, 'wb') as xmlfile:
unziped = gzip.open(nomecompleto,'rb').read()
xmlfile.write(unziped)
xmlfile.close()
print (nomecompletoajustado)
#Deleta o arquivo compactado
remove (nomecompleto)
continue
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
