diff --git a/src/__init__.py b/src/__init__.py index b537c09..7223475 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -88,6 +88,35 @@ def match_main_subjects_from_sparql(args: argparse.Namespace = None, console.print("Got 0 results. Try another query or debug it using --debug") +def export_jobs_to_dataframe(): + logger = logging.getLogger(__name__) + logger.info("Exporting jobs to DataFrame. All jobs are appended to one frame") + jobs = parse_job_pickle() + if jobs is not None: + number_of_jobs = len(jobs) + if jobs is not None and number_of_jobs > 0: + logger.info(f"Found {number_of_jobs} jobs") + df = pd.DataFrame() + count = 1 + for job in jobs: + count += 1 + logger.info(f"Working on job {count}/{number_of_jobs}") + job_df = pd.DataFrame() + for item in job.items.list: + job_df = job_df.append(pd.DataFrame(data=[dict( + qid=item.id, + label=item.label, + description=item.description + )])) + df = df.append(job_df) + logger.debug(f"Added {len(job.items.list)} items to the dataframe") + logger.debug(f"Exporting {len(df)} rows to pickle") + pickle_filename = "dataframe.pkl.gz" + df.to_pickle(pickle_filename) + console.print(f"Wrote to {pickle_filename} in the current directory") + else: + console.print("No jobs found. Create a job list first by using '--prepare-jobs'") + def export_jobs_to_quickstatements(): logger = logging.getLogger(__name__) logger.info("Exporting jobs to QuickStatements V1 commands. One file for each job.") @@ -148,6 +177,8 @@ def main(): remove_job_pickle(hash=file_hash) if args.export_job_list_to_quickstatements: export_jobs_to_quickstatements() + elif args.export_jobs_to_dataframe: + export_jobs_to_dataframe() elif args.match_existing_main_subjects is True: match_existing_main_subjects(args=args, jobs=jobs) elif args.sparql: diff --git a/src/helpers/argparse_setup.py b/src/helpers/argparse_setup.py index 55c7251..3b9cfb0 100644 --- a/src/helpers/argparse_setup.py +++ b/src/helpers/argparse_setup.py @@ -104,4 +104,10 @@ def setup_argparse_and_return_args(): help='Export the prepared job list to QuickStatements.', default=False ) + parser.add_argument( + '--export-jobs-to-dataframe', + action='store_true', + help='Export the prepared job list to a Pandas DataFrame.', + default=False + ) return parser.parse_args()