diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index ee75438..4975099 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -33,6 +33,7 @@ jobs: - name: Set Chrome directory in path run: | CHROME_DIR=$(dirname "${{ steps.setup_chrome.outputs.chrome-path }}") + echo "CHROME_DIR: $CHROME_DIR" echo "$CHROME_DIR" >> $GITHUB_PATH - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 diff --git a/examples/get_sp500.mp.py b/examples/get_sp500.mp.py index 13e46a2..1739e72 100755 --- a/examples/get_sp500.mp.py +++ b/examples/get_sp500.mp.py @@ -42,6 +42,7 @@ def process_tickers(tickers, proxy): proxy=proxy, ) + logging.info(f"Processing tickers: {tickers}") results = [] @@ -61,17 +62,18 @@ def process_tickers(tickers, proxy): results.append((f"Ticker: {ticker}", valuations, financials)) + stock.driver.quit() return results # End of process_tickers def initializer(): """ensure the parent proc's database connections are not touched in the new connection pool""" - engine.dispose(close=False) + engine.dispose(close=False -# Use ProcessPoolExecutor to process tickers in parallel +# Use ProcessPoolExecutor to process tickers i parallel max_workers = 4 # Adjust max_workers as needed -chunk_size = 50 +chunk_size = 8 ticker_chunks = [sp500_tickers[i:i + chunk_size] for i in range(0, len(sp500_tickers), chunk_size)] with ProcessPoolExecutor(max_workers=max_workers, initializer=initializer) as executor: diff --git a/tests/test_stocks.py b/tests/test_stocks.py index 043ed49..a70eb18 100755 --- a/tests/test_stocks.py +++ b/tests/test_stocks.py @@ -7,9 +7,27 @@ import logging import pandas as pd +from selenium import webdriver +import undetected_chromedriver as uc + +# For Chrome driver +from webdriver_manager.chrome import ChromeDriverManager + +# For Firefox driver +from webdriver_manager.firefox import GeckoDriverManager + # Configure logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') +# It seems that undetected_chromedriver is not working properly, when user_multi_procs is set to True +# So let user_multi_procs to be False here to initialize the driver environment +nouse_driver = uc.Chrome( + version_main=126, + use_subprocess=True, + user_multi_procs=False, + service=webdriver.ChromeService(ChromeDriverManager(driver_version='126').install()), +) + def test_stocks(): logging.info("Starting test_stocks")