'Webscraping with Selenium using Python on Linux EC2 pulls up a blank webpage
Trying to run below Python code on AWS Linux returns a blank webpage. Same code works flawlessly on local (Mac). Running on EC2 in headless mode throws the error:
Traceback (most recent call last): File "zero_connect_EC2.py", line 93, in <module>
autologin() File "zero_connect_EC2.py", line 58, in autologin
username = driver.find_element(By.XPATH,'/html/body/div[1]/div/div[2]/div[1]/div/div/div[2]/form/div[1]/input') File "/home/ec2-user/.local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 1246, in find_element
'value': value})['value'] File "/home/ec2-user/.local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 424, in execute
self.error_handler.check_response(response) File "/home/ec2-user/.local/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 247, in check_response
raise exception_class(message, screen, stacktrace) selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"/html/body/div[1]/div/div[2]/div[1]/div/div/div[2]/form/div[1]/input"} (Session info: headless chrome=98.0.4758.102) Stacktrace:
#0 0x562cdd18bb33 <unknown>
#1 0x562cdcc546d8 <unknown>
#2 0x562cdcc8a6f1 <unknown>
#3 0x562cdcc8a8b1 <unknown>
#4 0x562cdccbd574 <unknown>
#5 0x562cdcca808d <unknown>
#6 0x562cdccbb2fb <unknown>
#7 0x562cdcca7f53 <unknown>
#8 0x562cdcc7da0a <unknown>
#9 0x562cdcc7ead5 <unknown>
#10 0x562cdd1bd2fd <unknown>
#11 0x562cdd1d64bb <unknown>
#12 0x562cdd1bf0d5 <unknown>
#13 0x562cdd1d7145 <unknown>
#14 0x562cdd1b2aaf <unknown>
#15 0x562cdd1f3ba8 <unknown>
#16 0x562cdd1f3d28 <unknown>
#17 0x562cdd20e48d <unknown>
#18 0x7f72b11bd44b <unknown>
How best to get it to load the webpage itself?
from kiteconnect import KiteConnect
from selenium import webdriver
import time
import os
from pyotp import TOTP
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import webbrowser
cwd = os.getcwd()
def autologin():
token_path = os.path.join(cwd, "api_key.txt")
key_secret = open(token_path,'r').read().split()
kite = KiteConnect(api_key=key_secret[0])
service = webdriver.chrome.service.Service('/usr/bin/chromedriver')
service.start()
options = webdriver.ChromeOptions()
options.add_argument('--disable-dev-shm-usage');
options.add_argument('--headless');
options.add_argument('user-agent=Chrome/98.0.4758.102');
options.add_argument("disable-blink-features=AutomationControlled");
options.add_argument('--remote-debugging-port=9222');
driver = webdriver.Remote(service.service_url, options=options)
driver.get(kite.login_url())
driver.implicitly_wait(10)
element_text = driver.page_source
print(element_text)
username = driver.find_element(By.XPATH,'/html/body/div[1]/div/div[2]/div[1]/div/div/div[2]/form/div[1]/input')
password = driver.find_element(By.XPATH,'/html/body/div[1]/div/div[2]/div[1]/div/div/div[2]/form/div[2]/input')
username.send_keys(key_secret[2])
password.send_keys(key_secret[3])
driver.find_element(By.XPATH, '/html/body/div[1]/div/div[2]/div[1]/div/div/div[2]/form/div[4]/button').click()
totp = driver.find_element(By.XPATH, '/html/body/div[1]/div/div[2]/div[1]/div/div/div[2]/form/div[2]/div/input')
totp_token = TOTP(key_secret[4])
token = totp_token.now()
totp.send_keys(token)
driver.find_element(By.XPATH, '/html/body/div[1]/div/div[2]/div[1]/div/div/div[2]/form/div[3]/button').click()
#time.sleep(10)
request_token=driver.current_url.split('request_token=')[1][:32]
with open(os.path.join(cwd,'request_token.txt'), 'w') as the_file:
the_file.write(request_token)
time.sleep(20)
driver.quit()
autologin()
Trying similar with a google.com and just printing the page source returns a blank webpage as well.
from kiteconnect import KiteConnect
from selenium import webdriver
import time
import os
from pyotp import TOTP
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
cwd = os.getcwd()
def autologin():
service = webdriver.chrome.service.Service('/usr/bin/chromedriver')
service.start()
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox');
options.add_argument('--disable-dev-shm-usage');
options.add_argument('--headless');
driver = webdriver.Remote(service.service_url, options=options)
driver.implicitly_wait(10)
driver.get('https://www.google.com')
time.sleep(10)
print ('Hello World')
element_text = driver.page_source
print(element_text)
print ('Success')
driver.quit()
autologin()
Solution 1:[1]
To close this question. The issue was solved by creating a completely new EC2 instance and running the same script.
Will put it down to the order in which chromedriver and Google Chrome were installed. Followed the instructions here second time around.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |
