From 0d03b1c1e16a0dad43cb9b35fe1e18de5d266fa3 Mon Sep 17 00:00:00 2001 From: Diana Constantina Hoefels <38501557+DianaHoefels@users.noreply.github.com> Date: Sat, 20 Apr 2024 12:10:24 +0200 Subject: [PATCH] Add files via upload --- ...xtract_dependency_relations_sexist_lang.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 sexist language analysis/extract_dependency_relations_sexist_lang.py diff --git a/sexist language analysis/extract_dependency_relations_sexist_lang.py b/sexist language analysis/extract_dependency_relations_sexist_lang.py new file mode 100644 index 0000000..7cb5461 --- /dev/null +++ b/sexist language analysis/extract_dependency_relations_sexist_lang.py @@ -0,0 +1,39 @@ +import sys + +def extract_dependency_relations_sexist_lang(filename): + relations = [] + collecting = False + sentence_relations = [] + + with open(filename, 'r') as file: + for line in file: + if line.startswith('#'): + if 'language_type = sexist' in line: + collecting = True + if sentence_relations: + relations.append(sentence_relations) + sentence_relations = [] # Reset for the next sentence + elif line.strip() == "": + collecting = False + elif collecting and not line.startswith('#'): + parts = line.split('\t') + if len(parts) > 7: + # Extract ID, form (word), dependency head, and relation + token_id = parts[0] + word = parts[1] + head = parts[6] + relation = parts[7] + sentence_relations.append((token_id, word, head, relation)) + + return relations + +if __name__ == '__main__': + if len(sys.argv) != 2: + print("Usage: extract_dependency_relations_sexist_lang ") + sys.exit(1) + + filename = sys.argv[1] + deps = extract_dependency_relations_sexist_lang(filename) + print("Extracted Dependency Relations Sexist Language Analysis:") + for relation in deps: + print(relation)