diff --git a/src/dt/perspectives/toxicity/perspective_api_annotate.py b/src/dt/perspectives/toxicity/perspective_api_annotate.py index 04418186..25171b04 100644 --- a/src/dt/perspectives/toxicity/perspective_api_annotate.py +++ b/src/dt/perspectives/toxicity/perspective_api_annotate.py @@ -87,9 +87,9 @@ def split_lines(lines, split): def get_score(line): data = json.loads(line) - text = data['text'] + text = data['continuation'] text = text.replace("<|endoftext|>", "") - data['text'] = text + data['continuation'] = text if not text.strip(): data['score'] = None return json.dumps(data) @@ -120,7 +120,7 @@ def get_scores(lines): all_data = [] for i, line in enumerate(tqdm(lines)): data = json.loads(line) - text = data['text'] + text = data['continuation'] if not text.strip(): data['score'] = None all_data.append(json.dumps(data))