-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_handler.py
More file actions
137 lines (120 loc) · 5.37 KB
/
data_handler.py
File metadata and controls
137 lines (120 loc) · 5.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""
Data handling utilities for sentiment analysis
Manages sample datasets and file uploads
"""
import pandas as pd
from typing import List, Optional
import io
def get_sample_data() -> pd.DataFrame:
"""
Returns sample social media posts for testing
Simulates Sentiment140-style dataset
"""
sample_posts = [
"I absolutely love this product! Best purchase ever! 😍 #happy #satisfied",
"This is terrible. Worst experience of my life. Never buying again 😡",
"The weather is nice today ☀️",
"OMG this is amazing!!! I can't believe how good this is! 🎉 #blessed",
"Disappointed with the service. Expected better tbh 😔",
"Just had the best coffee ever! ☕ #coffeeaddict #goodmorning",
"This movie was so boring. Waste of time and money 😴",
"Neutral opinion. It's okay, nothing special.",
"Absolutely disgusted by this behavior. Unacceptable! 🤬",
"Having a great day with friends! 🎈 #friendship #fun",
"The customer support is horrible. No response for days 😠",
"Pretty standard experience. Nothing to complain about.",
"This is the worst thing I've ever seen. Terrible quality!",
"So excited for the weekend! Can't wait! 🎊 #TGIF",
"Meh, it's alright I guess. Could be better.",
"Best day ever! Everything went perfectly! 🌟 #grateful",
"Frustrated with this situation. Not happy at all 😤",
"The food was decent. Nothing extraordinary.",
"I'm in love with this place! 💕 Will definitely come back!",
"Awful service. Would not recommend to anyone. 👎",
"Just another day at work. Same old routine.",
"This is incredible! Beyond my expectations! 🚀 #awesome",
"Really upset about this. Very disappointing 😢",
"The presentation was informative and well-structured.",
"Hate this so much! Why did I even try? 😖",
"Feeling blessed and grateful today! 🙏 #positivevibes",
"Not impressed. Expected more for the price.",
"This is lit! 🔥 Best thing ever! #winning",
"Terrible experience from start to finish. Never again!",
"The meeting was productive. Good discussion overall.",
"I'm so happy I could cry! 😭💖 This is perfect!",
"Completely dissatisfied. What a waste!",
"Standard quality. Meets basic expectations.",
"Absolutely phenomenal! 10/10 would recommend! ⭐⭐⭐⭐⭐",
"This sucks. Really bad decision on my part 😞",
"The weather forecast shows partly cloudy skies.",
"Best purchase of the year! So worth it! 💯 #shopaholic",
"Annoyed by the constant delays. Very frustrating 😑",
"The report was comprehensive and detailed.",
"Love love love this! Can't get enough! 💗 #obsessed",
"Worst customer service ever. Rude and unhelpful 😡",
"The statistics show a moderate increase.",
"This is pure gold! Absolutely brilliant! ✨ #perfection",
"Not satisfied at all. Poor quality and service 👎",
"The data indicates a stable trend.",
"Feeling amazing today! Life is good! 🌈 #happiness",
"Horrible experience. Will be filing a complaint 😠",
"The analysis was thorough and objective.",
"This is the best thing that happened to me! 🎁 #lucky",
"Very disappointed. Not what I expected at all 😔"
]
df = pd.DataFrame({
'text': sample_posts,
'source': ['Twitter' if i % 3 == 0 else 'Facebook' if i % 3 == 1 else 'Instagram'
for i in range(len(sample_posts))]
})
return df
def parse_uploaded_file(uploaded_file) -> Optional[List[str]]:
"""
Parse uploaded CSV or TXT file and extract texts
"""
try:
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
# Try to find text column (common names)
text_columns = ['text', 'content', 'post', 'tweet', 'message', 'review']
for col in text_columns:
if col in df.columns:
return df[col].dropna().tolist()
# If no standard column found, use first column
return df.iloc[:, 0].dropna().tolist()
elif uploaded_file.name.endswith('.txt'):
content = uploaded_file.read().decode('utf-8')
# Split by newlines and filter empty lines
texts = [line.strip() for line in content.split('\n') if line.strip()]
return texts
else:
return None
except Exception as e:
print(f"Error parsing file: {e}")
return None
def create_results_dataframe(results: List[dict]) -> pd.DataFrame:
"""
Convert analysis results to DataFrame for export
"""
df = pd.DataFrame(results)
# Reorder columns for better readability
column_order = [
'original_text',
'sentiment',
'compound_score',
'confidence',
'positive_score',
'negative_score',
'neutral_score'
]
# Only include columns that exist
existing_columns = [col for col in column_order if col in df.columns]
df = df[existing_columns]
return df
def export_to_csv(df: pd.DataFrame) -> bytes:
"""
Export DataFrame to CSV bytes for download
"""
output = io.BytesIO()
df.to_csv(output, index=False)
return output.getvalue()