Would like this to print the job description for each job but it seems like it doesn’t find it:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
# Booting up chrome
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=chrome_options)
# Pointing to Google Search and navigating there
url = f'https://www.google.com/search?q=jobs+"director"+OR+"consultant"+OR+"analyst"+AND+"improvement"+OR+"change"+OR+"innovation"+OR+"power+platform"+OR+"implementation"+AND+Calgary'
driver.get(url)
# Find and click the "Jobs" Area
location_button = driver.find_element(By.ID, 'fMGJ3e')
location_button.click()
# Find all the list items
list_items = driver.find_elements(By.CSS_SELECTOR, 'ul > li')
hrefs = []
for item in list_items:
job_name = item.text
print(job_name + ":\n")
item.click()
# Wait for the page to load and check for another unordered list
time.sleep(2)
while True:
raw_html = driver.find_element(By.XPATH, '/html/body/div[2]/div/div[2]/div[1]/div/div/div[3]/div[2]/div/div[1]/div/div/g-scrolling-carousel/div[1]/div')
html_source = raw_html.get_attribute("innerHTML")
soup = BeautifulSoup(html_source, "html.parser")
next_ul = soup.find_next("ul")
if next_ul is None:
break
# Scroll down to load the next unordered list
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# Extract links from the current unordered list
links = soup.find_all("a", href=True)
# Create a list of links for the current list item
item_links = []
for link in links:
href = link.get("href")
if href and (href.startswith("http") or href.startswith("https")) and "job" in href:
item_links.append(href)
# Prioritize links and print the first one that matches the criteria
found_link = False
for link in item_links:
if "linkedin" in link:
print(link)
found_link = True
break
elif "indeed" in link:
print(link)
found_link = True
break
elif "ziprecruiter" in link:
print(link)
found_link = True
break
else:
print(link)
# Find the job description span element
try:
job_description_element = driver.find_element(By.XPATH, '//*[@id="gws-plugins-horizon-jobs__job_details_page"]/div/div[4]/span/text()')
job_description_text = job_description_element.text
print(job_description_text)
except:
print("Job description not found.")
# Append all links for the current list item to the global list
hrefs.extend(item_links)
print("\n")