-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaitm_A3_textblob_sentiment.py
120 lines (96 loc) · 5.17 KB
/
aitm_A3_textblob_sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 20 08:10:22 2023
@author: Adam
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from textblob import TextBlob
from datetime import datetime
combined_df = pd.read_csv('combined_df.csv')
combined_df['date_std'] = pd.to_datetime(combined_df['date_std'])
# Assuming your date_std column is already in datetime format
combined_df = combined_df[combined_df['date_std'] >= datetime(2013, 1, 1)]
def sentiment_score(text):
if isinstance(text, str):
return TextBlob(text).sentiment.polarity
else:
return 0
def sentiment_label(score, threshold=0):
if score > threshold:
return 'positive'
else:
return 'negitive'
# Sentiment Scoring
combined_df['headline_sentiment'] = combined_df['headline'].apply(sentiment_score)
combined_df['snippet_sentiment'] = combined_df['snippet_std'].apply(sentiment_score)
combined_df['body_sentiment'] = combined_df['body'].apply(sentiment_score)
combined_df['composite_sentiment'] = (combined_df['headline_sentiment'] + combined_df['snippet_sentiment'] + combined_df['body_sentiment']) / 3
# Sentiment labelling
combined_df['headline_label'] = combined_df['headline_sentiment'].apply(sentiment_label)
combined_df['snippet_label'] = combined_df['snippet_sentiment'].apply(sentiment_label)
combined_df['body_label'] = combined_df['body_sentiment'].apply(sentiment_label)
combined_df['composite_label'] = combined_df['composite_sentiment'].apply(sentiment_label)
combined_df['composite_text'] = combined_df['headline'].astype(str) + ' ' + combined_df['snippet_std'].astype(str) + ' ' + combined_df['body'].astype(str)
# OUTPUT CSV
combined_df.to_csv('combined_df_textblob_sentiment.csv',index = False)
# PLOTS
cumulative_sentiments = combined_df.groupby('date_std')[['headline_sentiment', 'snippet_sentiment', 'body_sentiment', 'composite_sentiment']].sum().cumsum()
plt.figure(figsize=(12, 6))
plt.plot(cumulative_sentiments.index, cumulative_sentiments['headline_sentiment'], label='Headline Sentiment')
plt.plot(cumulative_sentiments.index, cumulative_sentiments['snippet_sentiment'], label='Snippet Sentiment')
plt.plot(cumulative_sentiments.index, cumulative_sentiments['body_sentiment'], label='Body Sentiment')
plt.plot(cumulative_sentiments.index, cumulative_sentiments['composite_sentiment'], label='Composite Sentiment')
plt.title('Cumulative Sentiment Scores Over Time')
plt.xlabel('Date')
plt.ylabel('Cumulative Sentiment Score')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
plt.plot(cumulative_sentiments.index, cumulative_sentiments['headline_sentiment'], label='Headline Sentiment')
plt.plot(cumulative_sentiments.index, cumulative_sentiments['snippet_sentiment'], label='Snippet Sentiment')
plt.plot(cumulative_sentiments.index, cumulative_sentiments['body_sentiment'], label='Body Sentiment')
plt.plot(cumulative_sentiments.index, cumulative_sentiments['composite_sentiment'], label='Composite Sentiment')
# Fill between headline and snippet sentiment
plt.fill_between(cumulative_sentiments.index,
cumulative_sentiments['headline_sentiment'],
cumulative_sentiments['snippet_sentiment'],
where=(cumulative_sentiments['snippet_sentiment'] > cumulative_sentiments['headline_sentiment']),
color='lightblue', interpolate=True)
# First annotation
arrow_date_1 = datetime(2016, 11, 1)
arrow_y_1 = cumulative_sentiments.loc[arrow_date_1, 'composite_sentiment']
plt.annotate('Federal law change takes effect', xy=(arrow_date_1, arrow_y_1), xytext=(arrow_date_1 - pd.DateOffset(months=36), arrow_y_1 + 20),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=12, color='black')
# Second annotation
arrow_date_2 = datetime(2016, 2, 24)
arrow_y_2 = cumulative_sentiments.loc[arrow_date_2, 'composite_sentiment']
plt.annotate('Federal law amendment passed', xy=(arrow_date_2, arrow_y_2), xytext=(arrow_date_2 - pd.DateOffset(months=40), arrow_y_2 + 15),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=12, color='black')
# Third annotation
arrow_date_3 = datetime(2019, 9, 25)
arrow_y_3 = cumulative_sentiments.loc[arrow_date_3, 'composite_sentiment']
plt.annotate('ACT law amendment passed', xy=(arrow_date_3, arrow_y_3), xytext=(arrow_date_3 - pd.DateOffset(months=55), arrow_y_3 + 15),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=12, color='black')
plt.title('Cumulative Sentiment Scores Over Time')
plt.xlabel('Date')
plt.ylabel('Cumulative Sentiment Score')
plt.legend()
plt.show()
# Group the data by year and sentiment label, and count the occurrences of each label
sentiment_counts_by_year = combined_df.groupby([combined_df['date_std'].dt.year, 'composite_label']).size().unstack()
# Create a bar plot with separate bars for each sentiment label and custom colors
ax = sentiment_counts_by_year.plot(kind='bar', figsize=(15, 7), color=['red', 'green'], alpha=0.7)
# Set x-axis and y-axis labels
ax.set_xlabel('Year')
ax.set_ylabel('Count of Sentiment Labels')
# Add a title
ax.set_title('Counts of Positive and Negative Sentiments Per Year')
# Add a legend
ax.legend(['Negative', 'Positive'])
# Display the plot
plt.show()