-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patherror_analysis.py
95 lines (61 loc) · 3.07 KB
/
error_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import json
from pandas import read_csv
with open('read_cases_manualATM_text_list.json', 'r') as f:
read_cases_manualATM_text_list = json.load(f)
false_neg_id = 7319
print('preprocessed words in false_neg doc 7149', read_cases_manualATM_text_list[false_neg_id])
original_data = read_csv("case_scraping_01_1998_to_07_2022_noNaN.csv")
original_relevant_row = original_data.iloc[7319]
original_relevant_row.to_csv("noNaN_row_7149.csv")
data = read_csv("case_scraping_01_1998_to_07_2022_noNaN_all.csv")
data_relevant_columns = data[['uid', 'dm_family', 'dm2_asylum', 'full_text_x', 'decision_date']]
data_relevant_rows = data_relevant_columns[data_relevant_columns['uid'] == 7319]
data_relevant_rows .to_csv("noNaN_all_uid=7149.csv")
'''
with open('read_cases_manualATM_text_list.json', 'r') as f:
read_cases_manualATM_text_list = json.load(f)
false_neg_id = 7149
print('preprocessed words in false_neg doc 7149', read_cases_manualATM_text_list[false_neg_id])
original_data = read_csv("case_scraping_01_1998_to_07_2022_noNaN.csv")
original_relevant_row = original_data.iloc[7149]
original_relevant_row.to_csv("noNaN_row_7149.csv")
data = read_csv("case_scraping_01_1998_to_07_2022_noNaN_all.csv")
data_relevant_columns = data[['uid', 'dm_family', 'dm2_asylum', 'full_text_x', 'decision_date']]
data_relevant_rows = data_relevant_columns[data_relevant_columns['uid'] == 7149]
data_relevant_rows .to_csv("noNaN_all_uid=7149.csv")
'''
'''
original_data = read_csv("case_scraping_01_1998_to_07_2022_noNaN.csv")
original_relevant_row = original_data[original_data['decision_date'] == '05. Oktober 1999']
original_relevant_row.to_csv("original_relevant_row_Oct5_1999.csv")
original_data = read_csv("case_scraping_01_1998_to_07_2022_noNaN.csv")
original_relevant_row = original_data.iloc[814]
original_relevant_row.to_csv("original_relevant_row_814.csv")
list_of_cols = data.columns.tolist()
dm_list = []
for col in list_of_cols:
if col[0:2] == "dm":
dm_list += [col]
id_dm_list = ["id"] + dm_list
print("id_dm_list:", id_dm_list)
data = read_csv("case_scraping_01_1998_to_07_2022_noNaN_all.csv")
#format: 05. Januar 1998
#date of case 814: 05. Oktober 1999
#814, 6158
data_relevant_columns = data[['uid', 'dm_family', 'dm2_military', 'full_text_x', 'decision_date']]
data_relevant_rows = data_relevant_columns[data_relevant_columns['uid'] == 814]
print('case 814:', str(data_relevant_rows['full_text_x']))
data_relevant_rows.to_csv("false_cases.csv")
#print(data_relevant_rows.head)
#print('relevant_data:', relevant_data)
with open('read_cases_manualATM_text_list.json', 'r') as f:
read_cases_manualATM_text_list = json.load(f)
false_neg_id = 7149
print('preprocessed words in false_neg doc 813', read_cases_manualATM_text_list[false_neg_id])
false_neg_id = 814
print('preprocessed words in false_neg doc 814 ', read_cases_manualATM_text_list[false_neg_id])
false_neg_id = 815
print('preprocessed words in false_neg doc 815', read_cases_manualATM_text_list[false_neg_id])
#false_pos_id = 6158
#print('preprocessed words in false_pos doc ', read_cases_manualATM_text_list[false_pos_id])
'''