-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathforex_sentiment.py
93 lines (65 loc) · 3.75 KB
/
forex_sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from bs4 import BeautifulSoup
import requests
import re
# ------------------------------------------------------------------
# ------------------------------------------------------------------
# Extracted Forex Sentiment
# Website: https://www.myfxbook.com/community/outlook
# ------------------------------------------------------------------
# ------------------------------------------------------------------
url = "https://www.myfxbook.com/community/outlook"
result = requests.get(url).text
doc = BeautifulSoup(result, "html.parser")
tbody = doc.find(["tbody"], id="outlookSymbolsTableContent")
trow = doc.find_all(["tr"], class_="outlook-symbol-row")
# Entire Data Set
forex_sentiment = []
# -------------------------------------------
# Important Extracing Attributes of a Tag
# -------------------------------------------
# Extracting 79% from style in: <div class="progress-bar progress-bar-danger" style="width: 79%;"></div>
# Regular Expressions, r'width:\s*(\d+%);'
# 1) "width:" - a literal string that matches the characters "width:".
# 2) \s* - zero or more whitespace characters (spaces, tabs, or newlines).
# 3) (\d+%); - a capturing group that matches the following:
# i) \d+% - a string of one or more digits, followed by a percent symbol ("%").
# ii) ; - a literal semicolon.
# Regular Epression pattern is used to search for a string that has a width property defined in CSS syntax,
# followed by a percentage value (with the % symbol), and then terminated by a semicolon.
# For example, it would match strings like:
# i) width: 20%;
# ii) width: 50%; margin-left: 10px;
# iii) width:80%;height:100px;
# -------------------------------------------
# -------------------------------------------
# -------------------------------------------
# Dont use variables a1, a2, ...
for tr in trow:
symbol, a1, percentage, a2, popularity, a3, short_price_and_pips, a4, long_price_and_pips, a5, current_price = tr.contents[1:12]
symbol_value = symbol.a.string.lstrip() # Removing space in front
shorts_trend_div_tag = percentage.find(["div"], class_="progress-bar-danger") # Entire div
shorts_trend_style_content = shorts_trend_div_tag.get('style') # Value inside the style which is width: 77%
shorts_trend_value = re.search(r'width:\s*(\d+%);', shorts_trend_style_content).group(1) # Get exact width value using regular expression
longs_trend_div_tag = percentage.find(["div"], class_="progress-bar-success")
longs_trend_style_content = longs_trend_div_tag.get('style')
longs_trend_value = re.search(r'width:\s*(\d+%);', longs_trend_style_content).group(1)
popularity_div_tag = popularity.find(["div"], class_="progress-bar")
popularity_style_content = popularity_div_tag.get('style')
popularity_value = re.search(r'width:\s*(\d+%);', popularity_style_content).group(1)
avg_short_price = short_price_and_pips.find(["span"], class_="font12").string
short_pip_difference = short_price_and_pips.find(["span"], class_="font11").string
avg_long_price = long_price_and_pips.find(["span"], class_="font12").string
long_pip_difference = long_price_and_pips.find(["span"], class_="font11").string
current_price = current_price.span.string.strip()
record = {}
record["symbol"] = symbol_value
record["short_trend"] = shorts_trend_value
record["long_trend"] = longs_trend_value
record["popularity"] = popularity_value
record["avg_short_price"] = avg_short_price
record["short_pip_diff"] = short_pip_difference
record["avg_long_price"] = avg_long_price
record["long_pip_diff"] = long_pip_difference
record["current_price"] = current_price
forex_sentiment.append(record)
print(forex_sentiment)