You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
This is a great project but i am having issues integrating it in my existing code.
I was previously using the UndetectedChromeDriver and would like to replace it with Botasaurus.
The goals are to handle sign-in, get user profiles and complete some user flow (fill forms, upload documents and click buttons).
I have created classes to easily integrate each part in the program.
Here is the code for the helper class
import subprocess
import os
from pathlib import Path
import logging
# from os import path
# import random
from time import sleep
# import undetected_chromedriver as uc
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.chrome.service import Service
# from webdriver_manager.chrome import ChromeDriverManager
# from Tools.Bot.chrome_launcher_adapter import ChromeLauncherAdapter
# from Tools.Bot.create_stealth_driver import create_stealth_driver
from Tools.Bot.chrome_launcher_adapter import ChromeLauncherAdapter
from Tools.Bot.create_stealth_driver import create_stealth_driver
from selenium.webdriver.chrome.options import Options
from chromedriver_autoinstaller import install
from botasaurus import *
# from botasaurus_proxy_authentication import add_proxy_options
logger = logging.getLogger()
# COPIED FROM chrome-launcher code (https://github.com/GoogleChrome/chrome-launcher/blob/main/src/flags.ts), Mostly same but the extensions, media devices etc are not disabled to avoid detection
DEFAULT_FLAGS = [
# safe browsing service, upgrade detector, translate, UMA
"--disable-background-networking",
# Don't update the browser 'components' listed at chrome://components/
"--disable-component-update",
# Disables client-side phishing detection.
"--disable-client-side-phishing-detection",
# Disable syncing to a Google account
"--disable-sync",
# Disable reporting to UMA, but allows for collection
"--metrics-recording-only",
# Disable installation of default apps on first run
"--disable-default-apps",
# Disable the default browser check, do not prompt to set it as such
"--no-default-browser-check",
# Skip first run wizards
"--no-first-run",
# Disable backgrounding renders for occluded windows
"--disable-backgrounding-occluded-windows",
# Disable renderer process backgrounding
"--disable-renderer-backgrounding",
# Disable task throttling of timer tasks from background pages.
"--disable-background-timer-throttling",
# Disable the default throttling of IPC between renderer & browser processes.
"--disable-ipc-flooding-protection",
# Avoid potential instability of using Gnome Keyring or KDE wallet. crbug.com/571003 crbug.com/991424
"--password-store=basic",
# Use mock keychain on Mac to prevent blocking permissions dialogs
"--use-mock-keychain",
# Disable background tracing (aka slow reports & deep reports) to avoid 'Tracing already started'
"--force-fieldtrials=*BackgroundTracing/default/",
# Suppresses hang monitor dialogs in renderer processes. This flag may allow slow unload handlers on a page to prevent the tab from closing.
"--disable-hang-monitor",
# Reloading a page that came from a POST normally prompts the user.
"--disable-prompt-on-repost",
# Disables Domain Reliability Monitoring, which tracks whether the browser has difficulty contacting Google-owned sites and uploads reports to Google.
"--disable-domain-reliability",
]
class BotasaurusChromeHandler:
def __init__(self):
print("💡 ChromeHandler init")
sleep(5)
self._driver = self.launch_chrome("https://ca.yahoo.com/?p=us", [])
create_stealth_driver()
print("✅ UndetectedChromeHandler launched ➡️ (🌈 Google.com)")
def driver(self):
return self._driver
# @browser(profile='Profile 1',)
def launch_chrome(self,start_url, additional_args):
# Set Chrome options
chrome_options = Options(
# headless=True,
# add_argument(r"--user-data-dir=/Users/lifen/Library/Application Support/Google/Chrome/Profile 1"),
)
chrome_options.add_argument("--remote-debugging-port=9222")
# chrome_options.add_argument("--no-sandbox")
# chrome_options.add_argument("--disable-gpu")
# chrome_options.add_argument("--disable-extensions")
# chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--user-data-dir=/Users/lifen/Library/Application Support/Google/Chrome/Profile 1")
# add_proxy_options(chrome_options)
unique_flags = list(dict.fromkeys(DEFAULT_FLAGS + additional_args))
kwargs = {
"ignoreDefaultFlags": True,
"chromeFlags": unique_flags,
"userDataDir": "/Users/MacUser/Library/Application Support/Google/Chrome/Profile 1",
"port": 9222,
"headless": False,
"autoClose": True,
}
if start_url:
kwargs["startingUrl"] = start_url
instance = ChromeLauncherAdapter.launch(**kwargs)
return instance
Where the code is used:
import re
import logging
import random
from time import sleep
from configs.configs_model import ConfigsModel
from helpers.jobs_sql import JobsSQL
from helpers.html_page_handler import HTMLPageHandler
from helpers.shared import notification
from models.job_listing import JobListingModel
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.remote.webelement import WebElement
from helpers.botasaurus_chrome_handler import BotasaurusChromeHandler
from botasaurus import *
logger = logging.getLogger()
class IndeedChromeApplier:
def __init__(self, jobs_sql: JobsSQL, jobs: list):
print(f"💡 IndeedChromeApplier init ")
self.jobs = jobs
self.chrome = BotasaurusChromeHandler()
# self.chrome.driver().maximize_window()
driver = bt.create_driver()
self.driver = driver
self.page = HTMLPageHandler(driver=driver)
self.jobs_sql = jobs_sql
def get_uid(self):
configs = ConfigsModel()
uid = configs.user_id
return uid
# @browser
def check_auth(self):
# driver = self.chrome.driver()
driver = self.driver
driver.get("https://profile.indeed.com/")
sleep(2)
url = driver.current_url
substring = "secure"
print(f"🟢 🔴 {url=}")
if substring in url:
print("❌ Not Logged in")
# Get input of the user to try again after he logs in
notification(
message="Please log in to Indeed.com and try again (y/n): ")
_input = input("Please log in to Indeed.com and try again (y/n): ")
_input: str = "" + _input
if _input.lower().__contains__("y"):
return self.check_auth()
elif _input.lower().__contains__("n"):
return False
else:
sleep(20000)
elif "profile.indeed.com" in url:
print("✅ Logged in")
return True
def answer_questions(self):
# Define a WebDriverWait with a timeout of 10 seconds
wait = WebDriverWait(self.chrome.driver(), 10)
# Wait for the radio button for commuting/relocation to be clickable and select it
try:
commute_option: WebElement = wait.until(
EC.element_to_be_clickable(
(
By.XPATH,
"//label[@for='input-q_38d8e685bb4b5228c2494ac85bc44d69-0']",
)
)
)
commute_option.click()
sleep(random.uniform(0.7, 2.2))
except TimeoutException:
print("Failed to find or click the commute option.")
def replace_resume(self, job_title):
print("⏯️ replace_resume")
is_upload_resume = (
"Upload or build a resume for this application"
in self.chrome.driver().title
)
paths = self.get_paths()
if is_upload_resume:
print("✅ is_upload")
# Find the "Replace" link using the full link text
replace_link = self.page.try_find_element(
driver=self.chrome.driver(),
name="Replace",
by=By.CSS_SELECTOR,
value='[data-testid="ResumeFileInfoCardReplaceButton-button"]',
)
sleep(1)
if replace_link:
print("✅ replace_link")
sleep(1)
# Find the file input element
file_input: WebElement = WebDriverWait(self.chrome.driver(), 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, 'input[type="file"]')
)
)
# Send the file path to the file input element
file_input.send_keys(
f"{paths.output_resumes_pdf_dir}/RalphNduwimana-{job_title}.pdf"
)
sleep(random.uniform(0.9, 1.8))
# self.page.click_to_next_page(name="Continue",by=By.CLASS_NAME,value='ia-continueButton ia-Resume-continue css-vw73h2 e8ju0x51')
notification(message=f"Resume replaced by {job_title}")
self.page.click_to_go_to_page(
name="Continue",
by=By.XPATH,
value="//div[contains(text(), 'Continue')]",
)
def submit_application(self):
print("⏯️ review_application")
notification(message="Reviewing application")
sleep(1.7)
notification(message="No cover letter required!")
submit = self.page.click_to_go_to_page(
name="Submit your application",
by=By.XPATH,
value="//button[contains(@class, 'ia-continueButton')]",
)
if submit:
notification("Application Submitted")
else:
notification("Application Submitted", code=0)
# submit_application_button.click()
# Wait for 2 seconds for the submission to be completed
sleep(2)
# Check if the page contains "Application Submitted"
application_submitted = (
"Application Submitted" in self.chrome.driver().page_source
)
# Check if the submission was completed and return True if "Application Submitted" was found
if application_submitted:
notification("Application submitted successfully!")
return True
else:
print("Application submission failed.")
return False
def click_button(self):
# Logic to click on buttons
pass
def type_text(self):
# Logic to click on buttons
pass
def run(self):
print("⏯️ IndeedChromeApplier run")
driver = self.chrome.driver()
authenticated = self.check_auth()
jobs_row = self.jobs_sql.load_jobs_by_status(query_status="Generated")
jobs_data = [job_row for job_row in jobs_row]
print(f'✅ ✅ {str(jobs_data)[0:200]}')
if authenticated:
for data in jobs_data:
if not data:
print(f'🚫 No Data in jobs_data')
job_data = self.convert_tuple_to_dict(data)
job = JobListingModel(job_data)
url = job.jobUrl
print(f'✅ ✅ ✅ ✅ {job.jobUrl}')
page_loaded = self.page.go_to_page(url)
if not page_loaded:
print(f"🚫 {url} not loaded")
# continue
if page_loaded:
print('✅ page_loaded')
application_started = self.page.click_to_go_to_page(
name="Apply",
by=By.ID,
value="indeedApplyButton",
)
data = re.search(
"This job has expired on Indeed",
driver.page_source,
)
# Get True of False
expired = data is not None
print(f"📕 {expired=}")
# sleep(10000)
sleep(random.uniform(0.2, 0.5))
if not application_started:
print("🚫 Application not started")
sleep(1000)
if "indeed" not in driver.current_url:
print("Cannot apply on company websites (just indeed.com)")
sleep(10000)
pages = {
"questions": False,
"resume": False,
"review": False,
"work-experience": False,
"submitted": False,
}
try:
# there is a page that has not been completed
while (
False
in pages.values()
):
print('')
except NoSuchElementException:
print(
f"❌ Failed to get page ")
def log_in(self, username, password):
print(f"⏯️ Starting log_in {username} {password}")
page = self.page
try:
username_bar = page.try_find_element(
name="username_bar",
by=By.ID,
value="session_key",
driver=self.driver,
)
assert username_bar is not None
username_bar.send_keys(f"{username}")
password_bar = page.try_find_element(
name="password_bar", by=By.ID, value="session_password", driver=self.chrome.driver()
)
assert password_bar is not None
password_bar.send_keys(f"{password}")
password_bar.send_keys(Keys.ENTER)
print("✅ User logged-in")
except NoSuchElementException:
print("No such element found")
except Exception:
print("Other exception")
print(f"⏹️ Finished log_in {username} {password}")
def log_out(self):
url = self.chrome.driver().current_url
print(f"⏯️ Starting log_out from {url}")
xpath = (
"/html/body/div[5]/header/div/nav/ul/li[6]/div/button"
if "Home" in url
else "/html/body/header/div/div[2]/div/div/button"
)
page = self.page
icon_button = page.try_find_element(
driver=self.chrome.driver(),
name="Log-Out",
by=By.XPATH,
value=xpath,
element_type="button",
)
try:
print(f"{icon_button=}")
try:
sign_out_option: WebElement = WebDriverWait(
self.chrome.driver(), 10
).until(EC.presence_of_element_located((By.LINK_TEXT, "Sign Out")))
sign_out_option.click()
print("✅ User logged-out")
except:
print(f"Sign Out not found ")
except:
print("Avatar button not found")
print(f"⏹️ Finished log_out from {url}")
I would appreciate any guidance on how to integrate Botasaurus features in my code.
Thanks in advance!!!
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
This is a great project but i am having issues integrating it in my existing code.
I was previously using the UndetectedChromeDriver and would like to replace it with Botasaurus.
The goals are to handle sign-in, get user profiles and complete some user flow (fill forms, upload documents and click buttons).
I have created classes to easily integrate each part in the program.
Here is the code for the helper class
Where the code is used:
I would appreciate any guidance on how to integrate Botasaurus features in my code.
Thanks in advance!!!
Beta Was this translation helpful? Give feedback.
All reactions