Skip to content

Commit

Permalink
Enlarge retry times to wait download file
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmysitu committed Aug 20, 2024
1 parent 0ff6af1 commit 9179503
Showing 1 changed file with 43 additions and 45 deletions.
88 changes: 43 additions & 45 deletions msfinance/stocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(self, debug=False, browser='firefox', session='msfinance.db3', prox
print("No supported proxy protocal")
exit(1)

# # May works for Chrome
# # May works for Chrome
# self.options.proxy = Proxy({
# 'proxyType': ProxyType.MANUAL,
# 'socksProxy': '127.0.0.1:1088',
Expand All @@ -106,7 +106,7 @@ def __init__(self, debug=False, browser='firefox', session='msfinance.db3', prox
os.makedirs(dir, exist_ok=True)
self.db = sqlite3.connect(session)
else:
self.db = None
self.db = None

# Setup proxies of requests
self.proxies = {
Expand Down Expand Up @@ -151,7 +151,7 @@ def _update_database(self, unique_id, df):
unique_id: Name of the table
Returns:
True if update is done, else False
True if update is done, else False
'''
if self.db:
df['Last Updated'] = datetime.now()
Expand All @@ -166,19 +166,19 @@ def _get_valuation(self, ticker, exchange, statistics, update=False):
# Compose an unique ID for database table and file name
unique_id = f"{ticker}_{exchange}_{statistics}".replace(' ', '_').lower()

# Not force to update, check database first
# Not force to update, check database first
if not update:
df = self._check_database(unique_id)
if df is not None:
return df

# Fetch data from website starts here
url = f"https://www.morningstar.com/stocks/{exchange}/{ticker}/valuation"
self.driver.get(url)

statistics_button = self.driver.find_element(By.XPATH, f"//button[contains(., '{statistics}')]")
statistics_button.click()

export_button = WebDriverWait(self.driver, 30).until(
EC.visibility_of_element_located((By.XPATH, "//button[contains(., 'Export Data')]"))
)
Expand All @@ -191,27 +191,27 @@ def _get_valuation(self, ticker, exchange, statistics, update=False):
(By.XPATH, f"//div[contains(., 'There is no {statistics} data available.')]")
)
)
return None
return None
except TimeoutException:
export_button.click()

# Wait download is done
tmp_string = statistics_filename[statistics]
tmp_file = self.download_dir + f"/{tmp_string}.xls"
retries = 5

retries = 10
while retries and (not os.path.exists(tmp_file) or os.path.getsize(tmp_file) == 0):
time.sleep(1)
retries = retries - 1
retries = retries - 1

if 0 == retries and (not os.path.exists(tmp_file)):
raise ValueError("Export data fail")

statistics_file = self.download_dir + f"/{unique_id}.xls"
os.rename(tmp_file, statistics_file)
time.sleep(1)

# Update database
# Update database
df = pd.read_excel(statistics_file)
if self.db:
self._update_database(unique_id, df)
Expand All @@ -220,11 +220,11 @@ def _get_valuation(self, ticker, exchange, statistics, update=False):

@retry(wait_fixed=2000, stop_max_attempt_number=3)
def _get_financials(self, ticker, exchange, statement, period='Annual', stage='Restated', update=False):

# Compose an unique ID for database table and file name
unique_id = f"{ticker}_{exchange}_{statement}_{period}_{stage}".replace(' ', '_').lower()
# Not force to update, check database first

# Not force to update, check database first
if not update:
df = self._check_database(unique_id)
if df is not None:
Expand All @@ -250,7 +250,7 @@ def _get_financials(self, ticker, exchange, statement, period='Annual', stage='R
period_button = self.driver.find_element(By.XPATH, "//span[contains(., 'Annual') and @class='mds-list-group__item-text__sal']")
else:
period_button = self.driver.find_element(By.XPATH, "//span[contains(., 'Quarterly') and @class='mds-list-group__item-text__sal']")

try:
period_button.click()
time.sleep(1)
Expand All @@ -259,7 +259,7 @@ def _get_financials(self, ticker, exchange, statement, period='Annual', stage='R

# Select statement stage
stage_list_button = self.driver.find_element(By.XPATH, "//button[contains(., 'As Originally Reported') and @aria-haspopup='true']")
try:
try:
stage_list_button.click()
time.sleep(1)
except ElementClickInterceptedException:
Expand All @@ -272,7 +272,7 @@ def _get_financials(self, ticker, exchange, statement, period='Annual', stage='R
else:
stage_button = self.driver.find_element(By.XPATH, "//span[contains(., 'Restated') and @class='mds-list-group__item-text__sal']")

try:
try:
stage_button.click()
time.sleep(1)
except ElementClickInterceptedException:
Expand All @@ -296,15 +296,15 @@ def _get_financials(self, ticker, exchange, statement, period='Annual', stage='R
tmp_file = self.download_dir + f"/{statement}_{period}_{stage}.xls"
while retries and (not os.path.exists(tmp_file)):
time.sleep(1)
retries = retries - 1
retries = retries - 1

if 0 == retries and (not os.path.exists(tmp_file)):
raise ValueError("Export data fail")

statement_file = self.download_dir + f"/{unique_id}.xls"
os.rename(tmp_file, statement_file)
time.sleep(1)

# Update datebase
df = pd.read_excel(statement_file)
if self.db:
Expand All @@ -316,13 +316,13 @@ def _get_us_exchange_tickers(self, exchange, update=False):

unique_id = f"us_exchange_{exchange}_tickers"

# Not force to update, check database first
# Not force to update, check database first
if not update:
df = self._check_database(unique_id)
if df is not None:
symbols = df['symbol'].tolist()
return symbols

# The api.nasdaq.com needs a request with headers, or it won't response
headers = {
'accept': 'application/json, text/plain, */*',
Expand Down Expand Up @@ -350,7 +350,7 @@ class Stock(StockBase):
def get_growth(self, ticker, exchange, update=False):
'''
Get growth statistics of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -360,11 +360,11 @@ def get_growth(self, ticker, exchange, update=False):
'''
statistics = 'Growth'
return self._get_valuation(ticker, exchange, statistics, update)

def get_operating_and_efficiency(self, ticker, exchange, update=False):
'''
Get operating and efficiency statistics of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -373,11 +373,11 @@ def get_operating_and_efficiency(self, ticker, exchange, update=False):
'''
statistics = 'Operating and Efficiency'
return self._get_valuation(ticker, exchange, statistics, update)

def get_financial_health(self, ticker, exchange, update=False):
'''
Get financial health statistics of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -386,11 +386,11 @@ def get_financial_health(self, ticker, exchange, update=False):
'''
statistics = 'Financial Health'
return self._get_valuation(ticker, exchange, statistics, update)

def get_cash_flow(self, ticker, exchange, update=False):
'''
Get cash flow statistics of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -403,7 +403,7 @@ def get_cash_flow(self, ticker, exchange, update=False):
def get_valuations(self, ticker, exchange, update=False):
'''
Get all valuations of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -415,13 +415,13 @@ def get_valuations(self, ticker, exchange, update=False):
for statistics in ['Growth', 'Operating and Efficiency', 'Financial Health','Cash Flow']:
df = self._get_valuation(ticker, exchange, statistics, update)
self.valuations.append(df)

return self.valuations

def get_income_statement(self, ticker, exchange, period='Annual', stage='Restated', update=False):
'''
Get income statement of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -436,7 +436,7 @@ def get_income_statement(self, ticker, exchange, period='Annual', stage='Restate
def get_balance_sheet_statement(self, ticker, exchange, period='Annual', stage='Restated', update=False):
'''
Get balance sheet statement of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -451,7 +451,7 @@ def get_balance_sheet_statement(self, ticker, exchange, period='Annual', stage='
def get_cash_flow_statement(self, ticker, exchange, period='Annual', stage='Restated', update=False):
'''
Get cash flow statement of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -466,7 +466,7 @@ def get_cash_flow_statement(self, ticker, exchange, period='Annual', stage='Rest
def get_financials(self, ticker, exchange, period='Annual', stage='As Originally Reported', update=False):
'''
Get all financials statements of stock
Args:
ticker: Stock symbol
exchange: Exchange name
Expand All @@ -489,7 +489,7 @@ def get_hsi_tickers(self):
Returns:
List of ticker with 5-digital number string
'''
'''
url = "https://en.wikipedia.org/wiki/Hang_Seng_Index"
response = requests.get(url, proxies=self.proxies)
tables = pd.read_html(response.text)
Expand All @@ -511,16 +511,16 @@ def get_sp500_tickers(self):
tables = pd.read_html(response.text)
symbols = tables[0]['Symbol'].tolist()
return symbols


def get_xnas_tickers(self):
'''
Get tickers of NASDAQ
Returns:
List of ticker names in NASDAQ
'''

exchange = 'nasdaq'
return self._get_us_exchange_tickers(exchange)

Expand All @@ -531,7 +531,7 @@ def get_xnys_tickers(self):
Returns:
List of ticker names in NYSE
'''

exchange = 'nyse'
return self._get_us_exchange_tickers(exchange)

Expand All @@ -542,10 +542,8 @@ def get_xase_tickers(self):
Returns:
List of ticker names in AMEX
'''

exchange = 'amex'
return self._get_us_exchange_tickers(exchange)



# End of class Stock
# End of class Stock

0 comments on commit 9179503

Please sign in to comment.