'Iterate Opening a List of URL's with Selenium

I am having trouble iterating the action of opening a list containing URLs using selenium. the issue is in the part labeled #Second Part in my code. linklinkfin is a list of length 9 at the moment, but this length can change as more URLs are collected over time. when the code runs, it appears to open the very first URL over and over, and it does not appear to run the append action in the nested while loop since at the end when i print textreal_listing it is empty. As the code runs i see https://www.nj.gov/dobi/division_insurance/bfd/enforcement2014.htm opening/refreshing continually till the program ends. At the end of each while loop 1 should get added to browsercount and then the code repeats with the new URL but this doesn't appear to be occurring, any ideas?

my code:

#FIRST PART
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
textreal_listing=[]
browser = webdriver.Chrome(r'\\homedirpva1a01\USERSNC$\603225\chromedriver\chromedriver.exe')
time.sleep(5)
browser.get("link")
time.sleep(5)

linkslist=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/a")
linkslist2=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/font/a")
linklinkfin=linkslist+linkslist2

#SECOND PART
textcount=1
textpage=6
browsercount=2014
for i in linklinkfin:
    browser.get("link.htm".format(browsercount))
    time.sleep(2)
    if "404 Error" in browser.page_source:
        browser.get("link.html".format(browsercount))
        time.sleep(2)
        while len(textreal_listing)<100:
            texttreesing=browser.find_element_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount))
            textreal_listing.append(texttreesing.text)
            textcount+=1
            if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                textpage+=3
                textcount=2
                if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                    break
                browsercount+=1
        else:
            while len(textreal_listing)<100:
                texttreesing=browser.find_element_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount))
                textreal_listing.append(texttreesing.text)
                textcount+=1
                if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                    textpage+=3
                    textcount=2
                    if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                        break
                browsercount+=1

print(textreal_listing)

Solution 1:^[1]

this worked

#FIRST PART
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
textreal_listing=[]
browser = webdriver.Chrome(r'\\homedirpva1a01\USERSNC$\603225\chromedriver\chromedriver.exe')
time.sleep(5)
browser.get("https://www.nj.gov/dobi/division_insurance/bfd/enforcement.htm")
time.sleep(5)

linkslist=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/a")
linkslist2=browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[6]/td[1]/table/tbody/tr[2]/td/ul/li/font/font/a")

linktext=[]
for my_href in linkslist:
    linktext.append(my_href.get_attribute("href"))

for my_hrefs in linkslist2:
    linktext.append(my_hrefs.get_attribute("href"))

#SECOND Part
textcount=1
textpage=6
browsercount=2014
for i in linktext:
    browser.get(i)
    time.sleep(5)
    while len(textreal_listing)<100:
        texttreesing=browser.find_element_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount))
        textreal_listing.append(texttreesing.text)
        textcount+=1
        if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
            textpage+=3
            textcount=2
            if len(browser.find_elements_by_xpath("/html/body/div/div/table[2]/tbody/tr/td/table/tbody/tr[2]/td[3]/table/tbody/tr[{}]/td/p[{}]".format(textpage,textcount)))==0:
                break
    
print(textreal_listing)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1	ryanb603

'Iterate Opening a List of URL's with Selenium

Solution 1:[1]

Sources

Related Questions

Solution 1:^[1]