Skip to content
This repository has been archived by the owner on Jan 23, 2024. It is now read-only.

Commit

Permalink
Merge pull request #39 from dpriskorn/export_dataframe
Browse files Browse the repository at this point in the history
Support exporting to dataframe
  • Loading branch information
dpriskorn authored Feb 9, 2022
2 parents 934636d + 5bda59a commit ec0209a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
31 changes: 31 additions & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,35 @@ def match_main_subjects_from_sparql(args: argparse.Namespace = None,
console.print("Got 0 results. Try another query or debug it using --debug")


def export_jobs_to_dataframe():
logger = logging.getLogger(__name__)
logger.info("Exporting jobs to DataFrame. All jobs are appended to one frame")
jobs = parse_job_pickle()
if jobs is not None:
number_of_jobs = len(jobs)
if jobs is not None and number_of_jobs > 0:
logger.info(f"Found {number_of_jobs} jobs")
df = pd.DataFrame()
count = 1
for job in jobs:
count += 1
logger.info(f"Working on job {count}/{number_of_jobs}")
job_df = pd.DataFrame()
for item in job.items.list:
job_df = job_df.append(pd.DataFrame(data=[dict(
qid=item.id,
label=item.label,
description=item.description
)]))
df = df.append(job_df)
logger.debug(f"Added {len(job.items.list)} items to the dataframe")
logger.debug(f"Exporting {len(df)} rows to pickle")
pickle_filename = "dataframe.pkl.gz"
df.to_pickle(pickle_filename)
console.print(f"Wrote to {pickle_filename} in the current directory")
else:
console.print("No jobs found. Create a job list first by using '--prepare-jobs'")

def export_jobs_to_quickstatements():
logger = logging.getLogger(__name__)
logger.info("Exporting jobs to QuickStatements V1 commands. One file for each job.")
Expand Down Expand Up @@ -148,6 +177,8 @@ def main():
remove_job_pickle(hash=file_hash)
if args.export_job_list_to_quickstatements:
export_jobs_to_quickstatements()
elif args.export_jobs_to_dataframe:
export_jobs_to_dataframe()
elif args.match_existing_main_subjects is True:
match_existing_main_subjects(args=args, jobs=jobs)
elif args.sparql:
Expand Down
6 changes: 6 additions & 0 deletions src/helpers/argparse_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,10 @@ def setup_argparse_and_return_args():
help='Export the prepared job list to QuickStatements.',
default=False
)
parser.add_argument(
'--export-jobs-to-dataframe',
action='store_true',
help='Export the prepared job list to a Pandas DataFrame.',
default=False
)
return parser.parse_args()

0 comments on commit ec0209a

Please sign in to comment.