'Selenium Selector is not consistent returning all options sometimes and other times not?
I apologize ahead of time for the length of this question but I want to give enough context.
I've been running in circles trying to figure out why this is happening. I'm indexing all the dropdown values of a bookstore and have done so successfully for 'departments' and 'course_nums' but when I try to do the same thing for 'sections' relative to the 'course_nums' it returns some of the lists of sections and fails to return others. I've seen alternative methods of getting options from Selector on Stack and Documentation but I've had no success with these methods.
When a dropdown is selected the attributes of the HTML get an additional id called
<option value="001" data-select2-id="703">001</option>
So I've tried to just use Selector without first clicking on the dropdown (by commenting out the DriverWait before the Selector in fill_sections()) but this does not work although the element is present in the DOM.
When run, it will sometimes return the corresponding course sections and other times an empty list of sections, but each course number should have at least 1 section. As I reviewed the automated input it's as if it goes too fast on some course numbers which might cause it to miss fetching all options, but I'm not sure. I'm stumped because this works for every other fetch for the departments and course numbers relative to the department.
Fair warning, let it run until it prints the arrays otherwise it starts this infinite loop for some reason when you hit
control-c and I honestly don't know why.
Upon request from the comments, here is the entire script relevant to indexing the departments, course_nums, and sections...
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from pprint import pprint
import sys
import time
import os
# path = '/usr/local/bin/chromedriver'
# sys.path.append(path)
URL = "https://gmu.bncollege.com/course-material/course-finder"
# page = requests.get(URL)
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument("--no-sandbox")
# options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
departments = []
courses = []
sections = []
def fill_departments():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div'))
)
element.click()
# print('selected department drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/span[2]/span/span[1]/input'))
)
selector = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/select"))
options = selector.options
for index in range(1, len(options)):
# (index - 1) to ensure proper indexing of departments
dep_dict = {"index": index-1, "department": options[index].text}
departments.append(dep_dict)
except:
return 1
return 0
def fill_course_nums(department, index):
try:
# /html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div'))
)
element.click()
# print('clicked on course drop down')
# /html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/select
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/span[2]/span/span[1]/input'))
)
selector = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/select"))
options = selector.options
courses = []
for ind in range(0, len(options)):
if (options[ind].text == "Select"):
continue
course_obj = {"index": ind - 1, "course_num": options[ind].text}
courses.append(course_obj)
departments[index]['courses'] = courses
except:
return 1
return 0
def fill_sections(dep_index, index):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div'))
)
element.click()
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/span[2]/span/span[1]/input'))
)
# selector_s = Select(WebDriverWait(driver, 5).until(EC.find_element((By.XPATH, "/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/select"))))
selector_s = Select(driver.find_element(by=By.XPATH, value="/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[4]/div/div/select"))
options_s = selector_s.options
# #
sections = []
for op in options_s:
if (op.text == "Select"):
continue
sections.append(op.text)
departments[dep_index]['courses'][index]['sections'] = sections
except:
return 1
return 0
def select_term(term):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[1]/div/div'))
)
element.click()
# print('selected term drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[1]/div/div/span[2]/span/span[2]/ul/li[2]'))
)
element.click()
# print('selected spring 2022')
except:
return 1
return 0
def clear_form():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[5]/div/a'))
)
element.click()
except:
return 1
return 0
def select_department(department):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div'))
)
element.click()
# print('selected department drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[2]/div/div/span[2]/span/span[1]/input'))
)
element.send_keys(department)
# print('typed department cs')
element.send_keys(Keys.ENTER)
# print('selected department cs')
except:
return 1
return 0
def select_course(course):
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div'))
)
element.click()
# print('clicked on course drop down')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[2]/div[2]/div[3]/div/div/span[2]/span/span[1]/input'))
)
element.send_keys(course)
element.send_keys(Keys.ENTER)
# print('selected course 321')
except:
return 1
return 0
def select_campus_info():
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located(
(By.XPATH, '/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[1]/span'))
)
element.click()
# print('found campus button')
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,
'/html/body/main/div[3]/div[2]/div/div/div/div[4]/div[2]/form/div/div[1]/span[2]/span/span[2]/ul/li[3]'))
)
element.click()
# print('selected fairfax campus')
except:
return 1
return 0
How these methods are run in main()
def main():
start = time.time()
driver.get("https://gmu.bncollege.com/course-material/course-finder")
while(select_campus_info()):
print('selecting campus')
while(select_term("spring")):
print('selecting term')
while(fill_departments()):
print("filling departments")
for dep in range(0, len(departments)):
if (dep != 0):
while(select_term("spring")):
print('selecting term')
while(select_department(departments[dep]['department'])):
print("trying (dep)")
while(fill_course_nums(departments[dep]['department'], departments[dep]['index'])):
print("filling courses")
while(clear_form()):
print("clearing")
# break here after 3 to limit filling all courses for debugging purposes
if (dep >= 3):
break
while(select_term("spring")):
print('selecting term')
for dep in range(0, 3):
if (dep != 0):
while(select_term("spring")):
print('selecting term')
while(select_department(departments[dep]['department'])):
print("trying (dep)")
for course in range(0, len(departments[dep]['courses'])):
while(select_course(departments[dep]['courses'][course]['course_num'])):
print("trying (cnum)")
while(fill_sections(dep, course)):
print('filling (sections)')
while(clear_form()):
print('clearing form.')
pprint(departments[0])
pprint(departments[1])
pprint(departments[2])
# fill_textbook_info('spring', 'CS', 310, '002')
# curUrl = driver.current_url
# print(curUrl)
time.sleep(100)
end = time.time()
print(end - start)
driver.close()
if __name__ == "__main__":
main()
Solution 1:[1]
I run your code and usually problem was to fast running code - and it needed time.sleep() in some places - especially after sending ENTER (it used 1 second because 0.5 second was too small)
I put full code because I organized code in different way (I use nested for-loops`) and I used different XPATH (I tried to make them shorter and similar with different functions).
I also put all function at the beginning and put in order of use in main()
I also don't use global list departaments but I return list from function and assign to local variable. And later I do the same with courses and sections.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from pprint import pprint
import time
# --- functions ---
def select_campus(driver, word="Tech"):
print('[select_campus] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '//div[@class="bned-campus-select"]//span[@class="selection"]/span'))
).click()
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, f'//li[contains(text(), "{word}")]'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_campus] Exception:', ex)
return False
return True
def select_term(driver, term):
print('[select_term] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "term")]//span[@class="selection"]/span'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, f'//div[contains(@class, "term")]//li[contains(text(), "{term}")]'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_term] Exception:', ex)
return False
return True
def get_all_departments(driver):
print('[get_all_departments] start')
departments = []
try:
#select [2]
all_options = driver.find_elements(By.XPATH, '((//div[@role="table"]//div[@role="row"])[2]//select)[2]//option')
for index, option in enumerate(all_options[1:], 1):
item = {"index": index, "department": option.text}
departments.append(item)
time.sleep(0.5) # time for JavaScript to create `<select>`
except Exception as ex:
print('[get_all_departments] Exception:', ex)
return departments
def select_department(driver, department):
print('[select_department] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[@role="table"]//div[@role="row"])[2]//div[contains(@class, "department")]//span[@class="selection"]/span'))
).click()
time.sleep(0.5) # time for JavaScript to create `<select>`
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[@role="table"]//div[@role="row"])[2]//div[contains(@class, "department")]//input'))
)
element.send_keys(department)
element.send_keys(Keys.ENTER)
time.sleep(1) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_department] Exception:', ex)
return False
return True
def get_all_courses(driver):
print('[get_all_courses] start')
courses = []
try:
#select [3]
all_options = driver.find_elements(By.XPATH, '((//div[@role="table"]//div[@role="row"])[2]//select)[3]//option')
for index, option in enumerate(all_options[1:], 1):
item = {"index": index, "section": option.text}
courses.append(item)
except Exception as ex:
print('[get_all_courses] Exception:', ex)
return courses
def select_course(driver, course):
print('[select_course] start')
try:
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[@role="table"]//div[@role="row"])[2]//div[contains(@class, "course")]//span[@class="selection"]/span'))
)
element.click()
element = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '(//div[@role="table"]//div[@role="row"])[2]//div[contains(@class, "course")]//input'))
)
element.send_keys(course)
element.send_keys(Keys.ENTER)
time.sleep(1) # time for JavaScript to create `<select>`
except Exception as ex:
print('[select_course] Exception:', ex)
return False
return True
def get_all_sections(driver):
print('[get_all_sections] start')
sections = []
try:
#select [4]
all_options = driver.find_elements(By.XPATH, '((//div[@role="table"]//div[@role="row"])[2]//select)[4]//option')
for index, option in enumerate(all_options[1:], 1):
#item = {"index": index, "course": option.text}
sections.append(option.text)
except Exception as ex:
print('[get_all_sections] Exception:', ex)
return sections
def clear_form(driver):
print('[clear_form] start')
try:
WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH,'//a[@class="js-clear-row"]'))
).click()
time.sleep(1) # time for JavaScript to clear elements
except Exception as ex:
print('[clear_form] Exception:', ex)
return False
return True
def main():
URL = "https://gmu.bncollege.com/course-material/course-finder"
options = webdriver.ChromeOptions()
#options.headless = True
#options.add_argument("--headless")
options.add_argument("--no-sandbox")
# options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
start = time.time()
driver.get(URL)
select_campus(driver)
select_term(driver, "Spring")
departments = get_all_departments(driver)
print('departments:')
pprint(departments[:4])
for dep in departments[:4]: # 3 to limit filling all courses for debugging purposes
print(dep)
select_department(driver, dep['department'])
print(dep)
dep['courses'] = get_all_courses(driver)
print('departments:')
pprint(departments[:4])
for course in dep['courses']:
select_course(driver, course['course'])
course['sections'] = get_all_sections(driver)
print('departments:')
pprint(departments[:4])
#clear_form(driver) # DON'T use it
# --- display ---
for dep in departments[:4]:
pprint(dep)
end = time.time()
print('time:', end - start)
input('Press ENTER to close') # to keep open browser and check elements in DevTools
driver.close()
if __name__ == "__main__":
main()
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | furas |
