'How to download a higher resolution image from the wiki API with Python?
I modified the code and added to it the ability to download photos from the wiki. I am downloading the url of the photo from the main wiki page. The problem is that it is low resolution and I would like it to be larger (the size after clicking on the image). EXAMPLE:
Wiki URL to the website - https://pl.wikipedia.org/wiki/Zamek_Kr%C3%B3lewski_na_Wawelu
URL downloaded using my script - https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Royal_Castle%2C_Wawel_Hill%2C_4_Wawel%2C_Old_Town%2C_Krak%C3%B3w%2C_Poland.jpg/240px-Royal_Castle%2C_Wawel_Hill%2C_4_Wawel%2C_Old_Town%2C_Krak%C3%B3w%2C_Poland.jpg
URL address after clicking on the main photo - https://upload.wikimedia.org/wikipedia/commons/e/ea/Royal_Castle%2C_Wawel_Hill%2C_4_Wawel%2C_Old_Town%2C_Krak%C3%B3w%2C_Poland.jpg
As you can see the photos have a larger size after clicking and I would like to download such a link. I've been struggling with this code for two days and can't fix it. Anyone know what I'm doing wrong? Below is my script and call: Calling:
python fetch.py --directory output/england/Krakow --rating 4 --reviews 5000 --operator i --query "Zamek Królewski na Wawelu"
The code that is responsible for downloading url image
#url img for wikipedia
from bs4 import BeautifulSoup
if url:
urladress = url
soup = BeautifulSoup(requests.get(urladress).text,'html.parser')
imglinks = soup.find_all('a', attrs = {'class':'image'})[0]
for img in imglinks.find_all('img'):
wiki_link = (img['src'].replace('//','https://'))
if (wiki_link is not None):
img_link = wiki_link
else:
img_link = -1
Script:
#!/usr/bin/env python
import csv
import json
import pathlib
import operator
import requests
import argparse
import configparser
import re
import wikipedia
# Read the API key form the configuration.
config = configparser.ConfigParser()
config.read('secrets.ini')
API_KEY = '' #add API KEY Google
PLACES_TYPES = ['park', 'point_of_interest', 'establishment', 'museum', 'library', 'church', 'art_gallery', 'political']
# Search query operators.
OPERATORS = {
'i': operator.and_,
'lub': operator.or_
}
def fetch_place_detail(place_id):
place_raw = requests.get(f'https://maps.googleapis.com/maps/api/place/details/json?placeid={place_id}&key={API_KEY}')
try:
return json.loads(place_raw.text)['result']
except KeyError:
raise KeyError('Index \'result\' does not exist')
# Add parameters for the search query.
parser = argparse.ArgumentParser()
parser.add_argument('--query', type=str, help='Search query for Google Maps API')
parser.add_argument('--directory', type=str, help='Output directory')
parser.add_argument('--rating', type=float, help='Minimum rating of the place(s)')
parser.add_argument('--reviews', type=int, help='Minimum review count of the place(s)')
parser.add_argument('--operator', default='and', choices=OPERATORS.keys(), type=str,
help='Operation to perform between ratings and reviews count.')
parser.add_argument('--exclude', '-e', choices=PLACES_TYPES, nargs='+', type=str,
help='Exclude the places from the query result')
parser.add_argument('--language', default='pl', choices=['pl', 'fr', 'de'], type=str,
help='Language of the Wikipedia link')
parser.add_argument('--summary-length', type=int,
help='Limit the number of sentences in place summary.')
args = parser.parse_args()
# Fetch the data.
places = requests.get(f'https://maps.googleapis.com/maps/api/place/textsearch/json?query={args.query}&language=pl&key={API_KEY}')
# Convert the response to a JSON object.
places = json.loads(places.text)['results']
if not places:
raise Exception(f'No results found for query: {args.query}')
# Create the directory if it doesn't exist.
pathlib.Path(args.directory).mkdir(parents=True, exist_ok=True)
# Make the filename more readable, as this will appear as the layer title in Google My Maps.
query = args.query.split(' ')
filename = ' '.join([q.capitalize() for q in query])
# Set Wikipedia language.
wikipedia.set_lang(args.language)
columns = ['name', 'coordinates', 'types', 'rating', 'formatted_address', 'photo_reference', 'summary', 'url', 'reviews', 'img_link']
with open(args.directory + f'/{filename}.csv', 'w', encoding='utf-8') as out_file:
writer = csv.writer(out_file, delimiter='|')
writer.writerow(columns)
for place in places:
name = place['name']
formatted_address = place['formatted_address']
types = place['types']
if 'photo_reference' in place:
photo_reference = place['photo_reference']
else:
photo_reference = -1
if 'user_ratings_total' in place:
reviews = place['user_ratings_total']
else:
reviews = -1
if 'rating' in place:
rating = place['rating']
else:
rating = -1
try:
if args.summary_length:
wiki_page = wikipedia.page(name, sentences=args.summary_length)
else:
wiki_page = wikipedia.page(name)
url = wiki_page.url
summary = wiki_page.summary.replace('\n', '')
except KeyboardInterrupt:
exit(-1)
except:
url, summary = '', ''
#url img for wikipedia
from bs4 import BeautifulSoup
if url:
urladress = url
soup = BeautifulSoup(requests.get(urladress).text,'html.parser')
imglinks = soup.find_all('a', attrs = {'class':'image'})[0]
for img in imglinks.find_all('img'):
wiki_link = (img['src'].replace('//','https://'))
if (wiki_link is not None):
img_link = wiki_link
else:
img_link = -1
#else:
# img_link = wiki_link
# If item type is from the exlude list, skip it.
if args.exclude:
if list(set(args.exclude) & set(types)):
continue
# If an item doesn't satify the rating and review count criteria, skip it.
if args.rating and args.reviews:
rating = place['rating']
if not OPERATORS[args.operator](rating >= args.rating, reviews >= args.reviews):
continue
elif args.rating:
if not rating >= args.rating:
continue
elif args.reviews:
if not reviews >= args.reviews:
continue
lat, lng = place['geometry']['location']['lat'], place['geometry']['location']['lng']
data = [name, (lat, lng), ', '.join(types), rating, formatted_address, photo_reference, summary, url, reviews, img_link]
print(f'{filename} -> {data}')
writer.writerow(data)
Solution 1:[1]
You can use wiki_page.images[0] to extract the image url.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | ZorkNo |
