From 2d62c026535ea6f60fb33f681713c3b962be4d72 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Wed, 27 Apr 2022 14:03:29 -0400 Subject: [PATCH 1/7] allow pipe errors, fix some migrations warehouse --- bin/migrate/migrate_ati_2019_11.py | 2 +- bin/migrate/migrate_ati_2020_12.py | 8 +- bin/migrate/migrate_contracts_2019_11.py | 115 ++++++++++++----------- bin/warehouse/migrate_all.py | 15 +-- 4 files changed, 74 insertions(+), 66 deletions(-) diff --git a/bin/migrate/migrate_ati_2019_11.py b/bin/migrate/migrate_ati_2019_11.py index 31b8bc848..3a86d682c 100755 --- a/bin/migrate/migrate_ati_2019_11.py +++ b/bin/migrate/migrate_ati_2019_11.py @@ -21,7 +21,7 @@ out_csv.writerow(line) -except KeyError: +except (KeyError, ValueError): if 'warehouse' in sys.argv: sys.exit(85) else: diff --git a/bin/migrate/migrate_ati_2020_12.py b/bin/migrate/migrate_ati_2020_12.py index 48a838262..d6564ea87 100755 --- a/bin/migrate/migrate_ati_2020_12.py +++ b/bin/migrate/migrate_ati_2020_12.py @@ -77,4 +77,10 @@ line['comments_en'] = '' line['comments_fr'] = '' - out_csv.writerow(line) + try: + out_csv.writerow(line) + except (KeyError, ValueError): + if 'warehouse' in sys.argv: + sys.exit(85) + else: + raise diff --git a/bin/migrate/migrate_contracts_2019_11.py b/bin/migrate/migrate_contracts_2019_11.py index c348db692..219bb44f4 100755 --- a/bin/migrate/migrate_contracts_2019_11.py +++ b/bin/migrate/migrate_contracts_2019_11.py @@ -19,59 +19,66 @@ out_csv.writeheader() -for line in in_csv: - line['vendor_postal_code'] = '' - line['buyer_name'] = '' - line['contract_value'] = line['contract_value'].replace('$','').replace(',','') - line['original_value'] = line['original_value'].replace('$','').replace(',','') - line['amendment_value'] = line['amendment_value'].replace('$','').replace(',','') - line['trade_agreement'] = '' - line['land_claims'] = '' - line['commodity_type'] = line.pop('commodity_type_code') - line['solicitation_procedure'] = line.pop('solicitation_procedure_code') - line['limited_tendering_reason'] = line.pop('limited_tendering_reason_code') - line['trade_agreement_exceptions'] = line.pop('exemption_code') - line['aboriginal_business_incidental'] = line.pop('aboriginal_business') - line['aboriginal_business'] = '' - line['intellectual_property'] = line.pop('intellectual_property_code') - line['contracting_entity'] = line.pop('standing_offer') - line['instrument_type'] = line.pop('document_type_code') - line['country_of_vendor'] = line.pop('country_of_origin') - line['number_of_bids'] = '' - line['article_6_exceptions'] = '' - line['award_criteria'] = '' - line['socioeconomic_indicator'] = '' - line['user_modified'] = '*' # special "we don't know" value +try: + for line in in_csv: + line['vendor_postal_code'] = '' + line['buyer_name'] = '' + line['contract_value'] = line['contract_value'].replace('$','').replace(',','') + line['original_value'] = line['original_value'].replace('$','').replace(',','') + line['amendment_value'] = line['amendment_value'].replace('$','').replace(',','') + line['trade_agreement'] = '' + line['land_claims'] = '' + line['commodity_type'] = line.pop('commodity_type_code') + line['solicitation_procedure'] = line.pop('solicitation_procedure_code') + line['limited_tendering_reason'] = line.pop('limited_tendering_reason_code') + line['trade_agreement_exceptions'] = line.pop('exemption_code') + line['aboriginal_business_incidental'] = line.pop('aboriginal_business') + line['aboriginal_business'] = '' + line['intellectual_property'] = line.pop('intellectual_property_code') + line['contracting_entity'] = line.pop('standing_offer') + line['instrument_type'] = line.pop('document_type_code') + line['country_of_vendor'] = line.pop('country_of_origin') + line['number_of_bids'] = '' + line['article_6_exceptions'] = '' + line['award_criteria'] = '' + line['socioeconomic_indicator'] = '' + line['user_modified'] = '*' # special "we don't know" value - # clean up some common mistakes - if line['contracting_entity'] == 'PSPCSOSA': # code changed in 2016! - line['contracting_entity'] = 'PWSOSA' - if line['contracting_entity'] in ('N/A', 'N', 'NUL'): - line['contracting_entity'] = '' - line['country_of_vendor'] = line['country_of_vendor'].upper().strip() - if line['country_of_vendor'].startswith('CAN'): - line['country_of_vendor'] = 'CA' - if line['country_of_vendor'].startswith('USA'): - line['country_of_vendor'] = 'US' - line['instrument_type'] = line['instrument_type'].upper().strip() - line['intellectual_property'] = line['intellectual_property'].upper().strip() - if ':' in line['intellectual_property']: - line['intellectual_property'] = line['intellectual_property'].split(':')[0] - if line['intellectual_property'] == 'N/A': - line['intellectual_property'] = 'NA' - line['commodity_type'] = line['commodity_type'].upper().strip() - if line['commodity_type'].startswith('GOOD'): - line['commodity_type'] = 'G' - if line['commodity_type'].startswith('SERVICE'): - line['commodity_type'] = 'S' - if ':' in line['commodity_type']: - line['commodity_type'] = line['commodity_type'].split(':')[0] - line['solicitation_procedure'] = line['solicitation_procedure'].upper().strip() - if ':' in line['solicitation_procedure']: - line['solicitation_procedure'] = line['solicitation_procedure'].split(':')[0] - if line['solicitation_procedure'].startswith('NON-COMPET'): - line['solicitation_procedure'] = 'TN' - if ':' in line['limited_tendering_reason']: - line['limited_tendering_reason'] = line['limited_tendering_reason'].split(':')[0] + # clean up some common mistakes + if line['contracting_entity'] == 'PSPCSOSA': # code changed in 2016! + line['contracting_entity'] = 'PWSOSA' + if line['contracting_entity'] in ('N/A', 'N', 'NUL'): + line['contracting_entity'] = '' + line['country_of_vendor'] = line['country_of_vendor'].upper().strip() + if line['country_of_vendor'].startswith('CAN'): + line['country_of_vendor'] = 'CA' + if line['country_of_vendor'].startswith('USA'): + line['country_of_vendor'] = 'US' + line['instrument_type'] = line['instrument_type'].upper().strip() + line['intellectual_property'] = line['intellectual_property'].upper().strip() + if ':' in line['intellectual_property']: + line['intellectual_property'] = line['intellectual_property'].split(':')[0] + if line['intellectual_property'] == 'N/A': + line['intellectual_property'] = 'NA' + line['commodity_type'] = line['commodity_type'].upper().strip() + if line['commodity_type'].startswith('GOOD'): + line['commodity_type'] = 'G' + if line['commodity_type'].startswith('SERVICE'): + line['commodity_type'] = 'S' + if ':' in line['commodity_type']: + line['commodity_type'] = line['commodity_type'].split(':')[0] + line['solicitation_procedure'] = line['solicitation_procedure'].upper().strip() + if ':' in line['solicitation_procedure']: + line['solicitation_procedure'] = line['solicitation_procedure'].split(':')[0] + if line['solicitation_procedure'].startswith('NON-COMPET'): + line['solicitation_procedure'] = 'TN' + if ':' in line['limited_tendering_reason']: + line['limited_tendering_reason'] = line['limited_tendering_reason'].split(':')[0] - out_csv.writerow(line) + out_csv.writerow(line) + +except KeyError: + if 'warehouse' in sys.argv: + sys.exit(85) + else: + raise diff --git a/bin/warehouse/migrate_all.py b/bin/warehouse/migrate_all.py index bea1e091b..2040f24af 100644 --- a/bin/warehouse/migrate_all.py +++ b/bin/warehouse/migrate_all.py @@ -22,8 +22,8 @@ def run_scripts(infile, outfile, matching_files): proc_array.append(subprocess.Popen(["python", matching_files[0], 'warehouse'], stdin=subprocess.PIPE, stdout=outfile)) else: - for matching_file in matching_files: - print("Starting process: {0} with {1}".format(matching_files.index(matching_file), matching_file)) + for i, matching_file in enumerate(matching_files): + print("Starting process: {0} with {1}".format(i, matching_file)) if len(proc_array) == 0: proc_array.append(subprocess.Popen(['python', matching_file, 'warehouse'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)) elif matching_file == matching_files[-1]: @@ -37,15 +37,10 @@ def run_scripts(infile, outfile, matching_files): infile.seek(0) - try: # writing, flushing, whatever goes here - for chunk in iter(lambda: infile.read(1000), ''): - proc_array[0].stdin.write(chunk) - proc_array[0].stdin.close() - except IOError as e: - # skip if it's just a SIGPIPE signal exception - if e.errno != errno.EPIPE: - raise + for chunk in iter(lambda: infile.read(1000), ''): + proc_array[0].stdin.write(chunk) + proc_array[0].stdin.close() while proc_array[0].poll() is None: pass From b3fa257db6ae2176be75720eea196620801fba5b Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Wed, 27 Apr 2022 15:42:54 -0400 Subject: [PATCH 2/7] migrate-all: consultations fix --- ..._consultations_2019_02.py => migrate_consultations_2019_02.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bin/migrate/{migrate_legacy_consultations_2019_02.py => migrate_consultations_2019_02.py} (100%) diff --git a/bin/migrate/migrate_legacy_consultations_2019_02.py b/bin/migrate/migrate_consultations_2019_02.py similarity index 100% rename from bin/migrate/migrate_legacy_consultations_2019_02.py rename to bin/migrate/migrate_consultations_2019_02.py From d5aec8ae33d585c36eb4fcc1f4680cde91a7187b Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 3 May 2022 12:48:16 -0400 Subject: [PATCH 3/7] standardize writing BOM in migrate scripts --- bin/migrate/migrate_ati_2020_12.py | 1 + bin/migrate/migrate_ati_nil_2019_11.py | 1 + bin/migrate/migrate_consultations_2018_07.py | 2 +- bin/migrate/migrate_consultations_2019_02.py | 4 +++- bin/migrate/migrate_consultations_2019_04.py | 5 +++-- bin/migrate/migrate_contracts_2016_06.py | 2 +- bin/migrate/migrate_contracts_2016_08.py | 2 +- bin/migrate/migrate_contractsa_2019_06.py | 1 + bin/migrate/migrate_grants_2018_04.py | 2 +- bin/migrate/migrate_grants_2019_11.py | 1 + bin/migrate/migrate_grants_nil_2018_04.py | 1 + bin/migrate/migrate_hospitalityq_2016_09.py | 3 +-- bin/migrate/migrate_hospitalityq_2019_06.py | 2 +- bin/migrate/migrate_hospitalityq_nil_2019_06.py | 1 + bin/migrate/migrate_inventory_2017_04.py | 1 + bin/migrate/migrate_reclassification_2020_01.py | 1 + bin/migrate/migrate_reclassification_nil_2020_01.py | 1 + bin/migrate/migrate_service_2018_12.py | 1 + bin/migrate/migrate_travela_2018_10.py | 1 + bin/migrate/migrate_travelq_2019_06.py | 6 +++--- bin/migrate/migrate_travelq_nil_2019_06.py | 1 + bin/migrate/migrate_wrongdoing_2020_01.py | 1 + bin/warehouse/migrate_all.py | 11 +++++++---- 23 files changed, 35 insertions(+), 17 deletions(-) diff --git a/bin/migrate/migrate_ati_2020_12.py b/bin/migrate/migrate_ati_2020_12.py index d6564ea87..edb1a30dc 100755 --- a/bin/migrate/migrate_ati_2020_12.py +++ b/bin/migrate/migrate_ati_2020_12.py @@ -6,6 +6,7 @@ import json assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') if 'report' in sys.argv: diff --git a/bin/migrate/migrate_ati_nil_2019_11.py b/bin/migrate/migrate_ati_nil_2019_11.py index 316e5babf..e1b6d1b31 100755 --- a/bin/migrate/migrate_ati_nil_2019_11.py +++ b/bin/migrate/migrate_ati_nil_2019_11.py @@ -8,6 +8,7 @@ FIELDNAMES = 'year,month,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_consultations_2018_07.py b/bin/migrate/migrate_consultations_2018_07.py index e8f48a5ea..f3b49b411 100755 --- a/bin/migrate/migrate_consultations_2018_07.py +++ b/bin/migrate/migrate_consultations_2018_07.py @@ -8,10 +8,10 @@ FIELDNAMES = 'registration_number,publishable,partner_departments,subjects,title_en,title_fr,description_en,description_fr,target_participants_and_audience,start_date,end_date,status,profile_page_en,profile_page_fr,report_available_online,report_link_en,report_link_fr,contact_email,policy_program_lead_email,remarks_en,remarks_fr,high_profile,rationale,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') -sys.stdout.write(codecs.BOM_UTF8) out_csv.writeheader() RATIONALE = { diff --git a/bin/migrate/migrate_consultations_2019_02.py b/bin/migrate/migrate_consultations_2019_02.py index 8e4d98892..17f92a7df 100755 --- a/bin/migrate/migrate_consultations_2019_02.py +++ b/bin/migrate/migrate_consultations_2019_02.py @@ -119,6 +119,9 @@ def dt(legacy_date): return d.strftime('%Y-%m-%d') def main(): + assert sys.stdin.read(3) == codecs.BOM_UTF8 + sys.stdout.write(codecs.BOM_UTF8) + try: in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') @@ -153,7 +156,6 @@ def main(): except KeyError as err: sys.stderr.write(line['ID'] + ': ' + str(err) + '\n') - sys.stdout.write(codecs.BOM_UTF8) out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') out_csv.writeheader() for o in sorted(orgs): diff --git a/bin/migrate/migrate_consultations_2019_04.py b/bin/migrate/migrate_consultations_2019_04.py index 404dcefac..339408ffd 100644 --- a/bin/migrate/migrate_consultations_2019_04.py +++ b/bin/migrate/migrate_consultations_2019_04.py @@ -2,7 +2,7 @@ """ This is an "update" script not a migrate script because it only outputs records to be updated in-place, not a complete migrated -copy of the data +copy of the data, unless "warehouse" parameter is given """ import unicodecsv @@ -10,6 +10,7 @@ import codecs assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=in_csv.fieldnames, encoding='utf-8') @@ -18,7 +19,7 @@ try: for line in in_csv: val = line['subjects'].split(',') - if 'AP' not in val: + if 'AP' not in val and 'warehouse' not in sys.argv: continue line['subjects'] = u','.join( 'IP' if v == 'AP' else v for v in val) diff --git a/bin/migrate/migrate_contracts_2016_06.py b/bin/migrate/migrate_contracts_2016_06.py index 3583b19ea..89bbc5eb3 100755 --- a/bin/migrate/migrate_contracts_2016_06.py +++ b/bin/migrate/migrate_contracts_2016_06.py @@ -7,9 +7,9 @@ FIELDNAMES = 'unique_identifier,ref_number,vendor_name,contract_date,economic_object_code,description_en,description_fr,contract_period_start,delivery_date,contract_value,original_value,amendment_value,comments_en,comments_fr,additional_comments_en,additional_comments_fr,agreement_type_code,commodity_type_code,commodity_code,country_of_origin,solicitation_procedure_code,limited_tendering_reason_code,derogation_code,aboriginal_business,intellectual_property_code,potential_commercial_exploitation,former_public_servant,standing_offer,standing_offer_number,document_type_code,reporting_period,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') -sys.stdout.write(codecs.BOM_UTF8) out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') out_csv.writeheader() diff --git a/bin/migrate/migrate_contracts_2016_08.py b/bin/migrate/migrate_contracts_2016_08.py index 691d02f50..28e953e01 100755 --- a/bin/migrate/migrate_contracts_2016_08.py +++ b/bin/migrate/migrate_contracts_2016_08.py @@ -7,9 +7,9 @@ FIELDNAMES = 'reference_number,procurement_id,vendor_name,contract_date,economic_object_code,description_en,description_fr,contract_period_start,delivery_date,contract_value,original_value,amendment_value,comments_en,comments_fr,additional_comments_en,additional_comments_fr,agreement_type_code,commodity_type_code,commodity_code,country_of_origin,solicitation_procedure_code,limited_tendering_reason_code,derogation_code,aboriginal_business,intellectual_property_code,potential_commercial_exploitation,former_public_servant,standing_offer,standing_offer_number,document_type_code,reporting_period,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') -sys.stdout.write(codecs.BOM_UTF8) out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') out_csv.writeheader() diff --git a/bin/migrate/migrate_contractsa_2019_06.py b/bin/migrate/migrate_contractsa_2019_06.py index 9194373f0..61bf55290 100755 --- a/bin/migrate/migrate_contractsa_2019_06.py +++ b/bin/migrate/migrate_contractsa_2019_06.py @@ -8,6 +8,7 @@ FIELDNAMES = 'year,contract_goods_number_of,contracts_goods_original_value,contracts_goods_amendment_value,contract_service_number_of,contracts_service_original_value,contracts_service_amendment_value,contract_construction_number_of,contracts_construction_original_value,contracts_construction_amendment_value,acquisition_card_transactions_number_of,acquisition_card_transactions_total_value,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_grants_2018_04.py b/bin/migrate/migrate_grants_2018_04.py index ae1b8c377..1947a8f9e 100755 --- a/bin/migrate/migrate_grants_2018_04.py +++ b/bin/migrate/migrate_grants_2018_04.py @@ -11,10 +11,10 @@ FIELDNAMES = 'ref_number,amendment_number,amendment_date,agreement_type,recipient_type,recipient_business_number,recipient_legal_name,recipient_operating_name,research_organization_name,recipient_country,recipient_province,recipient_city,recipient_postal_code,federal_riding_name_en,federal_riding_name_fr,federal_riding_number,prog_name_en,prog_name_fr,prog_purpose_en,prog_purpose_fr,agreement_title_en,agreement_title_fr,agreement_number,agreement_value,foreign_currency_type,foreign_currency_value,agreement_start_date,agreement_end_date,coverage,description_en,description_fr,naics_identifier,expected_results_en,expected_results_fr,additional_information_en,additional_information_fr,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') -sys.stdout.write(codecs.BOM_UTF8) out_csv.writeheader() def norm_date(d): diff --git a/bin/migrate/migrate_grants_2019_11.py b/bin/migrate/migrate_grants_2019_11.py index 9cab1d201..8b0c4ebe2 100755 --- a/bin/migrate/migrate_grants_2019_11.py +++ b/bin/migrate/migrate_grants_2019_11.py @@ -5,6 +5,7 @@ import codecs assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) reader = csv.DictReader(sys.stdin) writer = csv.DictWriter(sys.stdout, fieldnames=reader.fieldnames) diff --git a/bin/migrate/migrate_grants_nil_2018_04.py b/bin/migrate/migrate_grants_nil_2018_04.py index 21be9299b..cead04ee1 100755 --- a/bin/migrate/migrate_grants_nil_2018_04.py +++ b/bin/migrate/migrate_grants_nil_2018_04.py @@ -11,6 +11,7 @@ FIELDNAMES = 'fiscal_year,quarter,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_hospitalityq_2016_09.py b/bin/migrate/migrate_hospitalityq_2016_09.py index 29e58ceea..60f467a31 100755 --- a/bin/migrate/migrate_hospitalityq_2016_09.py +++ b/bin/migrate/migrate_hospitalityq_2016_09.py @@ -7,10 +7,9 @@ FIELDNAMES = 'ref_number,name,title_en,title_fr,description_en,description_fr,start_date,end_date,employee_attendees,guest_attendees,location_en,location_fr,total,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') - -sys.stdout.write(codecs.BOM_UTF8) out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') out_csv.writeheader() diff --git a/bin/migrate/migrate_hospitalityq_2019_06.py b/bin/migrate/migrate_hospitalityq_2019_06.py index dbd8d80a0..256d6face 100755 --- a/bin/migrate/migrate_hospitalityq_2019_06.py +++ b/bin/migrate/migrate_hospitalityq_2019_06.py @@ -19,7 +19,7 @@ } assert sys.stdin.read(3) == codecs.BOM_UTF8 - +sys.stdout.write(codecs.BOM_UTF8) def norm_date(d, prefer_format): "handle some creative thinking about what constitutes a date" diff --git a/bin/migrate/migrate_hospitalityq_nil_2019_06.py b/bin/migrate/migrate_hospitalityq_nil_2019_06.py index 75ce7f199..f0ebc19ac 100755 --- a/bin/migrate/migrate_hospitalityq_nil_2019_06.py +++ b/bin/migrate/migrate_hospitalityq_nil_2019_06.py @@ -7,6 +7,7 @@ FIELDNAMES = 'year,month,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_inventory_2017_04.py b/bin/migrate/migrate_inventory_2017_04.py index 45a80d583..fa76af079 100755 --- a/bin/migrate/migrate_inventory_2017_04.py +++ b/bin/migrate/migrate_inventory_2017_04.py @@ -7,6 +7,7 @@ FIELDNAMES = ['ref_number', 'title_en', 'title_fr', 'description_en', 'description_fr', 'publisher_en', 'publisher_fr', 'date_published', 'language', 'size', 'eligible_for_release', 'program_alignment_architecture_en', 'program_alignment_architecture_fr', 'date_released', 'portal_url_en', 'portal_url_fr', 'user_votes', 'owner_org', 'owner_org_title'] assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_reclassification_2020_01.py b/bin/migrate/migrate_reclassification_2020_01.py index 84055af8e..4d1c790a3 100644 --- a/bin/migrate/migrate_reclassification_2020_01.py +++ b/bin/migrate/migrate_reclassification_2020_01.py @@ -8,6 +8,7 @@ FIELDNAMES = 'ref_number,job_number,pos_number,date,pos_title_en,pos_title_fr,old_class_group_code,old_class_level,new_class_group_code,new_class_level,old_differential,new_differential,reason_en,reason_fr,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_reclassification_nil_2020_01.py b/bin/migrate/migrate_reclassification_nil_2020_01.py index fcff48c44..7fabbdad3 100644 --- a/bin/migrate/migrate_reclassification_nil_2020_01.py +++ b/bin/migrate/migrate_reclassification_nil_2020_01.py @@ -8,6 +8,7 @@ FIELDNAMES = 'year,quarter,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_service_2018_12.py b/bin/migrate/migrate_service_2018_12.py index b1cb8e6ce..f3dab796e 100755 --- a/bin/migrate/migrate_service_2018_12.py +++ b/bin/migrate/migrate_service_2018_12.py @@ -8,6 +8,7 @@ FIELDNAMES = 'fiscal_yr,service_id,service_name_en,service_name_fr,external_internal,service_type,special_designations,service_description_en,service_description_fr,authority_en,authority_fr,service_url_en,service_url_fr,program_name_en,program_name_fr,program_id_code,client_target_groups,service_fee,cra_business_number,use_of_sin,online_applications,web_visits_info_service,calls_received,in_person_applications,email_applications,fax_applications,postal_mail_applications,e_registration,e_authentication,e_application,e_decision,e_issuance,e_feedback,client_feedback,special_remarks_en,special_remarks_fr,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_travela_2018_10.py b/bin/migrate/migrate_travela_2018_10.py index af5d7b492..51359c623 100755 --- a/bin/migrate/migrate_travela_2018_10.py +++ b/bin/migrate/migrate_travela_2018_10.py @@ -9,6 +9,7 @@ FIELDNAMES = ['year', 'mandate_description_en', 'mandate_description_fr', 'operational_activities_kdollars', 'key_stakeholders_kdollars', 'training_kdollars', 'other_kdollars', 'internal_governance_kdollars', 'non_public_servants_kdollars', 'public_servants_kdollars', 'hospitality_kdollars', 'conference_fees_kdollars', 'minister_kdollars', 'travel_compared_fiscal_year_en', 'travel_compared_fiscal_year_fr', 'hospitality_compared_fiscal_year_en', 'hospitality_compared_fiscal_year_fr', 'conference_fees_compared_fiscal_year_en', 'conference_fees_compared_fiscal_year_fr', 'minister_compared_fiscal_year_en', 'minister_compared_fiscal_year_fr', 'record_created', 'record_modified', 'user_modified', 'owner_org', 'owner_org_title'] assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_travelq_2019_06.py b/bin/migrate/migrate_travelq_2019_06.py index 05ce1b48d..a02e66e2c 100755 --- a/bin/migrate/migrate_travelq_2019_06.py +++ b/bin/migrate/migrate_travelq_2019_06.py @@ -18,9 +18,6 @@ 'dnd-mdn': '%d/%m/%Y', } -assert sys.stdin.read(3) == codecs.BOM_UTF8 - - def norm_date(d, prefer_format): "handle some creative thinking about what constitutes a date" d = d.replace('.', '-').strip() @@ -48,6 +45,9 @@ def norm_date(d, prefer_format): return from_excel(int(d)) +assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) + in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') out_csv.writeheader() diff --git a/bin/migrate/migrate_travelq_nil_2019_06.py b/bin/migrate/migrate_travelq_nil_2019_06.py index 4ac89bf26..d42d10071 100755 --- a/bin/migrate/migrate_travelq_nil_2019_06.py +++ b/bin/migrate/migrate_travelq_nil_2019_06.py @@ -7,6 +7,7 @@ FIELDNAMES = 'year,month,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/migrate/migrate_wrongdoing_2020_01.py b/bin/migrate/migrate_wrongdoing_2020_01.py index 787d39708..498d0b351 100644 --- a/bin/migrate/migrate_wrongdoing_2020_01.py +++ b/bin/migrate/migrate_wrongdoing_2020_01.py @@ -8,6 +8,7 @@ FIELDNAMES = 'ref_number,file_id_number,file_id_date,case_description_en,case_description_fr,findings_conclusions,recommendations_corrective_measures_en,recommendations_corrective_measures_fr,record_created,record_modified,user_modified,owner_org,owner_org_title'.split(',') assert sys.stdin.read(3) == codecs.BOM_UTF8 +sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') diff --git a/bin/warehouse/migrate_all.py b/bin/warehouse/migrate_all.py index 2040f24af..028b2fd7f 100644 --- a/bin/warehouse/migrate_all.py +++ b/bin/warehouse/migrate_all.py @@ -15,6 +15,8 @@ def run_scripts(infile, outfile, matching_files): # Remove any dead procedures from previous calls to this method if proc_array: + for p in proc_array: + p.wait() proc_array[:] = [] # Covers the case where there is only one migration script for the given type @@ -42,8 +44,8 @@ def run_scripts(infile, outfile, matching_files): proc_array[0].stdin.write(chunk) proc_array[0].stdin.close() - while proc_array[0].poll() is None: - pass + for p in proc_array: + p.wait() inpath = sys.argv[1] @@ -56,12 +58,12 @@ def run_scripts(infile, outfile, matching_files): # Check if the input csv file is a *-nil data type, and retrieve only the nil migration scripts if "nil" not in pd_type: - search_pd = '*_{0}_*'.format(pd_type) + search_pd = 'migrate_{0}_*'.format(pd_type) matching_files = sorted([mf for mf in glob.glob('../migrate/'+search_pd) if "nil" not in mf]) else: pd_type = pd_type.replace("-", "_") - search_pd = '*_{0}_*'.format(pd_type) + search_pd = 'migrate_{0}_*'.format(pd_type) matching_files = sorted(glob.glob('../migrate/' + search_pd)) while matching_files: @@ -74,6 +76,7 @@ def run_scripts(infile, outfile, matching_files): raise infile.seek(0) outfile.seek(0) + print('skipping {0}'.format(matching_files[0])) matching_files = matching_files[1:] # if there are no migration scripts to run, write the csv file to output file else: From 0ae4c18b6bdefbb988ebe5e52e58f34721ec4f4d Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Wed, 4 May 2022 12:02:25 -0400 Subject: [PATCH 4/7] hospitality migration: warehouse fix --- bin/migrate/migrate_hospitalityq_2019_06.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/migrate/migrate_hospitalityq_2019_06.py b/bin/migrate/migrate_hospitalityq_2019_06.py index 256d6face..6cd9dd333 100755 --- a/bin/migrate/migrate_hospitalityq_2019_06.py +++ b/bin/migrate/migrate_hospitalityq_2019_06.py @@ -76,7 +76,7 @@ def error(msg, value=''): line['start_date'] = norm_date( line['start_date'], ORG_PREFER_FORMAT.get(line['owner_org'])) - if line['start_date'] >= datetime(2019, 6, 21): + if 'warehouse' not in sys.argv and line['start_date'] >= datetime(2019, 6, 21): error('start_date in the future', line['start_date']) continue except ValueError: From a973a24e2a447ef3a3de30db14f12ed033db53b0 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Mon, 9 May 2022 16:40:31 -0400 Subject: [PATCH 5/7] ati pd migrate fix --- bin/migrate/migrate_ati_2020_12.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/migrate/migrate_ati_2020_12.py b/bin/migrate/migrate_ati_2020_12.py index edb1a30dc..f1f0e224e 100755 --- a/bin/migrate/migrate_ati_2020_12.py +++ b/bin/migrate/migrate_ati_2020_12.py @@ -9,6 +9,10 @@ sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') + +if 'warehouse' in sys.argv and 'comments_en' in in_csv.fieldnames: + sys.exit(85) + if 'report' in sys.argv: out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=['old_disposition', 'new_disposition']) seen = set() From 88f4ea3d40673d8c22ac53040e29a51d3076d36d Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Mon, 9 May 2022 17:05:07 -0400 Subject: [PATCH 6/7] pd warehouse contracts nil fix --- bin/migrate/migrate_contracts_nil_2019_11.py | 7 ++++++- bin/warehouse/migrate_all.py | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bin/migrate/migrate_contracts_nil_2019_11.py b/bin/migrate/migrate_contracts_nil_2019_11.py index fbcfb32be..babc641a7 100755 --- a/bin/migrate/migrate_contracts_nil_2019_11.py +++ b/bin/migrate/migrate_contracts_nil_2019_11.py @@ -10,10 +10,15 @@ sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') + +if 'warehouse' in sys.argv and 'user_modified' in in_csv.fieldnames: + sys.exit(85) + out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=FIELDNAMES, encoding='utf-8') out_csv.writeheader() for line in in_csv: - line['user_modified'] = '*' # special "we don't know" value + if 'warehouse' not in sys.argv: + line['user_modified'] = '*' # special "we don't know" value out_csv.writerow(line) diff --git a/bin/warehouse/migrate_all.py b/bin/warehouse/migrate_all.py index 028b2fd7f..a3e3eef6d 100644 --- a/bin/warehouse/migrate_all.py +++ b/bin/warehouse/migrate_all.py @@ -44,7 +44,11 @@ def run_scripts(infile, outfile, matching_files): proc_array[0].stdin.write(chunk) proc_array[0].stdin.close() - for p in proc_array: + if proc_array[0].wait() == 85: + # sometimes IOError needs a little help + raise IOError + + for p in proc_array[1:]: p.wait() From 665b0db3b112bad3237e8ecaeb447e0678c73bd2 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 10 May 2022 12:34:21 -0400 Subject: [PATCH 7/7] consultations warehouse fix: one migration not a migration --- ...consultations_2019_02.py => import_consultations_2019_02.py} | 0 bin/migrate/migrate_consultations_2019_04.py | 2 ++ 2 files changed, 2 insertions(+) rename bin/migrate/{migrate_consultations_2019_02.py => import_consultations_2019_02.py} (100%) diff --git a/bin/migrate/migrate_consultations_2019_02.py b/bin/migrate/import_consultations_2019_02.py similarity index 100% rename from bin/migrate/migrate_consultations_2019_02.py rename to bin/migrate/import_consultations_2019_02.py diff --git a/bin/migrate/migrate_consultations_2019_04.py b/bin/migrate/migrate_consultations_2019_04.py index 339408ffd..7571fca9f 100644 --- a/bin/migrate/migrate_consultations_2019_04.py +++ b/bin/migrate/migrate_consultations_2019_04.py @@ -13,6 +13,8 @@ sys.stdout.write(codecs.BOM_UTF8) in_csv = unicodecsv.DictReader(sys.stdin, encoding='utf-8') +if not in_csv.fieldnames and 'warehouse' in sys.argv: + sys.exit(85) out_csv = unicodecsv.DictWriter(sys.stdout, fieldnames=in_csv.fieldnames, encoding='utf-8') out_csv.writeheader()