'Scrapping twitter followers using selenium scroll issue
I have written a script which can scrape followers' usernames. But the issue is I am getting all the usernames in the first attempt but when I try to scroll the page using javascript the page keeps on going down instead of going once and scraping the ids and then going down.Although I am getting date till 34th username but later its just messed up. I am sharing the code here you can use your own username and password to check what is the issue with the code. If you copy paste this code (entering your username and password in the empty string) it will run on your PC completely fine.
import warnings
warnings.filterwarnings('ignore')
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from getpass import getpass
from time import sleep
chrome_options = Options()
driver=webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options)
driver.maximize_window()
website='https://twitter.com/i/flow/login'
driver.get(website)
print('website getting')
sleep(5)
username = driver.find_element_by_xpath('//input[@name="text"]')
username.send_keys('')
print('username running')
username.send_keys(Keys.RETURN)
sleep(3)
password = driver.find_element_by_xpath('//input[@name="password"]')
print('password running')
sleep(2)
password.send_keys('')
password.send_keys(Keys.RETURN)
website='https://twitter.com/MehroozW/followers'
driver.get(website)
print('got it')
import warnings
warnings.filterwarnings('ignore')
import csv
data = []
tweet_ids = set()
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
count = 1
i = 1
while scrolling:
for i in range(1,190):
try:
follower_username = driver.find_element_by_xpath(f'//div[@data-testid="primaryColumn"]/div[1]/section/div[1]/div[1]/div[{count}]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/a/div[1]/div[1]/span').text
print('index', count, follower_username)
data.append(follower_username)
count +=1
sleep(1)
except Exception:
pass
scroll_attempt = 0
while True:
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(2)
curr_position = driver.execute_script("return window.pageYOffset;")
print('curr_position',curr_position, last_position, scroll_attempt, scrolling)
if last_position == curr_position:
scroll_attempt += 1
# end of scroll region
if scroll_attempt >= 3:
scrolling = False
break
else:
sleep(2) # attempt another scroll
else:
last_position = curr_position
break
data
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
