From 703582da508ce978df366c6ef06f0584b182f1eb Mon Sep 17 00:00:00 2001 From: Aaron Brethorst Date: Tue, 19 Mar 2024 22:10:07 -0700 Subject: [PATCH 1/4] Delete `tmp` test files before and after tests run --- test_e2e.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test_e2e.py b/test_e2e.py index 1a7b97f..4907d5a 100644 --- a/test_e2e.py +++ b/test_e2e.py @@ -5,6 +5,21 @@ import os class TestEndToEnd(unittest.TestCase): + @staticmethod + def remove_test_files(): + _ = subprocess.run(["rm", "-f", "./tests/tmp/regions.xml"]) + _ = subprocess.run(["rm", "-f", "./tests/tmp/regions-v3.xml"]) + _ = subprocess.run(["rm", "-f", "./tests/tmp/regions.json"]) + _ = subprocess.run(["rm", "-f", "./tests/tmp/regions-v3.json"]) + + def setUp(self) -> None: + self.remove_test_files() + return super().setUp() + + def tearDown(self) -> None: + self.remove_test_files() + return super().tearDown() + @staticmethod def read_file_as_string(file_path): # Determine the directory of the current script (test_e2e.py) @@ -36,6 +51,7 @@ def test_update_regions_output(self): regions_json_v3_output = self.read_file_as_string('tests/tmp/regions-v3.json') self.assertEqual(regions_json_v3_fixture, regions_json_v3_output, "regions-v3.json should be identical") + # This allows the test to be run from the command line if __name__ == '__main__': unittest.main() From 8da8d3c0c8a563d90f67eb6bdc00997a2704b2db Mon Sep 17 00:00:00 2001 From: Aaron Brethorst Date: Tue, 19 Mar 2024 22:14:11 -0700 Subject: [PATCH 2/4] Move serializers into their own file --- src/serializers.py | 290 ++++++++++++++++++++++++++++++++++++++++++++ update_regions.py | 293 +-------------------------------------------- 2 files changed, 291 insertions(+), 292 deletions(-) create mode 100644 src/serializers.py diff --git a/src/serializers.py b/src/serializers.py new file mode 100644 index 0000000..b8b7aae --- /dev/null +++ b/src/serializers.py @@ -0,0 +1,290 @@ +from xml.dom.minidom import getDOMImplementation +import json + +class BaseSerializer(object): + _open311BaseUrls = '' + _open311JurisdictionIds = '' + + def __init__(self, **kwargs): + self.pretty = kwargs.get('pretty') + + def _normalize_float(self, val): + return round(float(val), 12) + + def _bounds(self, bounds_str): + def _map_bound(bound): + lat, lon, latSpan, lonSpan = bound.split(':') + return { + 'lat': self._normalize_float(lat), + 'lon': self._normalize_float(lon), + 'latSpan': self._normalize_float(latSpan), + 'lonSpan': self._normalize_float(lonSpan) + } + + if not bounds_str: + return [] + + bounds = bounds_str.split('|') + return [_map_bound(b) for b in bounds] + + def _open311ApiKeys(self, apikeys_str): + def _map_api_keys(apiKey, endpoint, jurisdiction): + return { + 'juridisctionId': jurisdiction or None, + 'apiKey': apiKey, + 'baseUrl': endpoint + } + + if not apikeys_str: + return [] + endpoints = self._open311BaseUrls.split('|') + apikeys = apikeys_str.split('|') + + if not self._open311JurisdictionIds: + return [_map_api_keys(a, e, None) for (a,e) in zip(apikeys, endpoints)] + else: + jurisdictionIds = self._open311JurisdictionIds.split('|') + return [_map_api_keys(a, e, j) for (a,e,j) in zip(apikeys, endpoints, jurisdictionIds)] + + def region_id(self, bundle, value): + bundle['id'] = int(value) + + def active(self, bundle, value): + bundle['active'] = self._bool(value) + + def bounds(self, bundle, value): + bundle['bounds'] = self._bounds(value) + + def open311BaseUrls(self, bundle, value): + self._open311BaseUrls = value + + def open311JurisdictionId(self, bundle, value): + self._open311JurisdictionIds =value + + def open311ApiKeys(self, bundle, value): + bundle['open311Servers'] = self._open311ApiKeys(value) + + def supportsSiriRealtimeApis(self, bundle, value): + bundle['supportsSiriRealtimeApis'] = self._bool(value) + + def supportsObaDiscoveryApis(self, bundle, value): + bundle['supportsObaDiscoveryApis'] = self._bool(value) + + def supportsObaRealtimeApis(self, bundle, value): + bundle['supportsObaRealtimeApis'] = self._bool(value) + + def experimental(self, bundle, value): + bundle['experimental'] = self._bool(value) + + def alter_bundle(self, bundle): + return bundle + + def supportsEmbeddedSocial(self, bundle, value): + bundle['supportsEmbeddedSocial'] = self._bool(value) + + def supportsOtpBikeshare(self, bundle, value): + bundle['supportsOtpBikeshare'] = self._bool(value) + + def travelBehaviorDataCollectionEnabled(self, bundle, value): + bundle['travelBehaviorDataCollectionEnabled'] = self._bool(value) + + def enrollParticipantsInStudy(self, bundle, value): + bundle['enrollParticipantsInStudy'] = self._bool(value) + +class JSONSerializer(BaseSerializer): + def __init__(self, **kwargs): + super(JSONSerializer, self).__init__(**kwargs) + + def _bool(self, value): + if value == 'TRUE': + return True + elif value == 'FALSE': + return False + else: + raise ValueError("Invalid value for active") + + # The base URLs want to be serialized as null in JSON, + # not the empty string. + + def obaBaseUrl(self, bundle, value): + bundle['obaBaseUrl'] = value or None + + def siriBaseUrl(self, bundle, value): + bundle['siriBaseUrl'] = value or None + + def stopInfoUrl(self, bundle, value): + bundle['stopInfoUrl'] = value or None + + def otpBaseUrl(self, bundle, value): + bundle['otpBaseUrl'] = value or None + + def otpContactEmail(self, bundle, value): + bundle['otpContactEmail'] = value or None + + def paymentAndroidAppId(self, bundle, value): + bundle['paymentAndroidAppId'] = value or None + + def paymentWarningTitle(self, bundle, value): + bundle['paymentWarningTitle'] = value or None + + def paymentWarningBody(self, bundle, value): + bundle['paymentWarningBody'] = value or None + + def paymentiOSAppStoreIdentifier(self, bundle, value): + bundle['paymentiOSAppStoreIdentifier'] = value or None + + def paymentiOSAppUrlScheme(self, bundle, value): + bundle['paymentiOSAppUrlScheme'] = value or None + + def alter_list_bundle(self, list_bundle, version): + return { + 'version': version, + 'code': 200, + 'text': 'OK', + 'data': {'list': list_bundle} + } + + def serialize(self, list_bundle): + if self.pretty: + return json.dumps(list_bundle, indent=2) + else: + return json.dumps(list_bundle) + + +class XMLSerializer(BaseSerializer): + def __init__(self, **kwargs): + super(XMLSerializer, self).__init__(**kwargs) + self.dom = getDOMImplementation() + self.doc = self.dom.createDocument(None, "response", None) + + def _bool(self, value): + if value in ('TRUE', 'FALSE'): + return value.lower() + else: + raise ValueError("Invalid value for active") + + def _node(self, tag, text): + elem = self.doc.createElement(tag) + text_elem = self.doc.createTextNode(str(text)) + elem.appendChild(text_elem) + return elem + + def bounds(self, bundle, value): + bounds = self._bounds(value) + # We need to convert this to a element here + l = self.doc.createElement('bounds') + + for b in bounds: + elem = self.doc.createElement('bound') + for key, value in b.items(): + child = self._node(key, value) + elem.appendChild(child) + l.appendChild(elem) + + bundle['bounds'] = l + + def open311ApiKeys(self, bundle, value): + open311ApiKeys = self._open311ApiKeys(value) + # We need to convert this to a element here + l = self.doc.createElement('open311Servers') + + for o in open311ApiKeys: + elem = self.doc.createElement('open311Server') + for key, value in o.items(): + if not value: + value = '' + + child = self._node(key, value) + elem.appendChild(child) + + l.appendChild(elem) + + bundle['open311ApiKeys'] = l + + def alter_bundle(self, bundle): + # Each item in the bundle should be converted to a text + # node, if it isn't already a node (which it would be for bounds) + elem = self.doc.createElement('region') + for key, value in bundle.items(): + if key == 'bounds' or key == 'open311ApiKeys': + elem.appendChild(value) + else: + child = self._node(key, value) + elem.appendChild(child) + + return elem + + def alter_list_bundle(self, list_bundle, version): + top = self.doc.documentElement + top.appendChild(self._node('version', version)) + top.appendChild(self._node('code', 200)) + top.appendChild(self._node('text', 'OK')) + + # Create the data and list nodes + data = self.doc.createElement('data') + l = self.doc.createElement('list') + for elem in list_bundle: + l.appendChild(elem) + + data.appendChild(l) + top.appendChild(data) + return list_bundle + + def serialize(self, list_bundle): + if self.pretty: + return self.doc.toprettyxml(indent=' ') + else: + return self.doc.toxml() + + +def serialize(regions, serializer, version): + """ + This does the following: + 1. Map each spreadsheet name into a suitable python function. + 2. Use the serializer class to bundle up the spreadhsheet values + into a serializable form (with proper typing, etc) + 3. Allow the serializer to add any other header information, etc. + 4. Convert to the serialized format. + """ + def _key(name): + # Remove the '?' and replace _ with a space, convert to title + name = name.replace('?', '').replace('_', ' ').title() + # Convert to lower camel + name = name[0].lower() + name[1:] + # Keep "iOS" + name = name.replace('Ios', 'iOS') + # Remove spaces + return name.replace(' ', '') + + def _to_bundle(index, region): + bundle = {} + serializer.region_id(bundle, index) + for k, v in region.items(): + key = _key(k) + f = getattr(serializer, key, None) + if f: + f(bundle, v) + else: + # Convenience for strings, and things that need no conversion + bundle[key] = v + bundle = serializer.alter_bundle(bundle) + return bundle + + list_bundle = [] + for i, region in enumerate(regions): + try: + # For v2, don't include experimental servers + if version == 2: + if region["Experimental?"] == 'TRUE': + print("Skipping %s as experimental for v2" % (region["Region_Name"])) + else: + list_bundle.append(_to_bundle(i, region)) + else: + list_bundle.append(_to_bundle(i, region)) + except ValueError: + print("*** ERROR: Invalid region specification: " + str(region), file=sys.stderr) + raise + + list_bundle = serializer.alter_list_bundle(list_bundle, version) + serialized = serializer.serialize(list_bundle) + return serialized diff --git a/update_regions.py b/update_regions.py index d774f9d..1065735 100755 --- a/update_regions.py +++ b/update_regions.py @@ -18,13 +18,11 @@ import argparse import csv -import json import os import io import sys import urllib.request, urllib.error, urllib.parse -from xml.dom.minidom import getDOMImplementation - +from src.serializers import JSONSerializer, XMLSerializer, serialize DESCRIPTION = """Generate and update OneBusAway regions list. @@ -79,295 +77,6 @@ def get_csv_from_url(url): return list(reader) -class BaseSerializer(object): - _open311BaseUrls = '' - _open311JurisdictionIds = '' - - def __init__(self, **kwargs): - self.pretty = kwargs.get('pretty') - - def _normalize_float(self, val): - return round(float(val), 12) - - def _bounds(self, bounds_str): - def _map_bound(bound): - lat, lon, latSpan, lonSpan = bound.split(':') - return { - 'lat': self._normalize_float(lat), - 'lon': self._normalize_float(lon), - 'latSpan': self._normalize_float(latSpan), - 'lonSpan': self._normalize_float(lonSpan) - } - - if not bounds_str: - return [] - - bounds = bounds_str.split('|') - return [_map_bound(b) for b in bounds] - - def _open311ApiKeys(self, apikeys_str): - def _map_api_keys(apiKey, endpoint, jurisdiction): - return { - 'juridisctionId': jurisdiction or None, - 'apiKey': apiKey, - 'baseUrl': endpoint - } - - if not apikeys_str: - return [] - endpoints = self._open311BaseUrls.split('|') - apikeys = apikeys_str.split('|') - - if not self._open311JurisdictionIds: - return [_map_api_keys(a, e, None) for (a,e) in zip(apikeys, endpoints)] - else: - jurisdictionIds = self._open311JurisdictionIds.split('|') - return [_map_api_keys(a, e, j) for (a,e,j) in zip(apikeys, endpoints, jurisdictionIds)] - - def region_id(self, bundle, value): - bundle['id'] = int(value) - - def active(self, bundle, value): - bundle['active'] = self._bool(value) - - def bounds(self, bundle, value): - bundle['bounds'] = self._bounds(value) - - def open311BaseUrls(self, bundle, value): - self._open311BaseUrls = value - - def open311JurisdictionId(self, bundle, value): - self._open311JurisdictionIds =value - - def open311ApiKeys(self, bundle, value): - bundle['open311Servers'] = self._open311ApiKeys(value) - - def supportsSiriRealtimeApis(self, bundle, value): - bundle['supportsSiriRealtimeApis'] = self._bool(value) - - def supportsObaDiscoveryApis(self, bundle, value): - bundle['supportsObaDiscoveryApis'] = self._bool(value) - - def supportsObaRealtimeApis(self, bundle, value): - bundle['supportsObaRealtimeApis'] = self._bool(value) - - def experimental(self, bundle, value): - bundle['experimental'] = self._bool(value) - - def alter_bundle(self, bundle): - return bundle - - def supportsEmbeddedSocial(self, bundle, value): - bundle['supportsEmbeddedSocial'] = self._bool(value) - - def supportsOtpBikeshare(self, bundle, value): - bundle['supportsOtpBikeshare'] = self._bool(value) - - def travelBehaviorDataCollectionEnabled(self, bundle, value): - bundle['travelBehaviorDataCollectionEnabled'] = self._bool(value) - - def enrollParticipantsInStudy(self, bundle, value): - bundle['enrollParticipantsInStudy'] = self._bool(value) - -class JSONSerializer(BaseSerializer): - def __init__(self, **kwargs): - super(JSONSerializer, self).__init__(**kwargs) - - def _bool(self, value): - if value == 'TRUE': - return True - elif value == 'FALSE': - return False - else: - raise ValueError("Invalid value for active") - - # The base URLs want to be serialized as null in JSON, - # not the empty string. - - def obaBaseUrl(self, bundle, value): - bundle['obaBaseUrl'] = value or None - - def siriBaseUrl(self, bundle, value): - bundle['siriBaseUrl'] = value or None - - def stopInfoUrl(self, bundle, value): - bundle['stopInfoUrl'] = value or None - - def otpBaseUrl(self, bundle, value): - bundle['otpBaseUrl'] = value or None - - def otpContactEmail(self, bundle, value): - bundle['otpContactEmail'] = value or None - - def paymentAndroidAppId(self, bundle, value): - bundle['paymentAndroidAppId'] = value or None - - def paymentWarningTitle(self, bundle, value): - bundle['paymentWarningTitle'] = value or None - - def paymentWarningBody(self, bundle, value): - bundle['paymentWarningBody'] = value or None - - def paymentiOSAppStoreIdentifier(self, bundle, value): - bundle['paymentiOSAppStoreIdentifier'] = value or None - - def paymentiOSAppUrlScheme(self, bundle, value): - bundle['paymentiOSAppUrlScheme'] = value or None - - def alter_list_bundle(self, list_bundle, version): - return { - 'version': version, - 'code': 200, - 'text': 'OK', - 'data': {'list': list_bundle} - } - - def serialize(self, list_bundle): - if self.pretty: - return json.dumps(list_bundle, indent=2) - else: - return json.dumps(list_bundle) - - -class XMLSerializer(BaseSerializer): - def __init__(self, **kwargs): - super(XMLSerializer, self).__init__(**kwargs) - self.dom = getDOMImplementation() - self.doc = self.dom.createDocument(None, "response", None) - - def _bool(self, value): - if value in ('TRUE', 'FALSE'): - return value.lower() - else: - raise ValueError("Invalid value for active") - - def _node(self, tag, text): - elem = self.doc.createElement(tag) - text_elem = self.doc.createTextNode(str(text)) - elem.appendChild(text_elem) - return elem - - def bounds(self, bundle, value): - bounds = self._bounds(value) - # We need to convert this to a element here - l = self.doc.createElement('bounds') - - for b in bounds: - elem = self.doc.createElement('bound') - for key, value in b.items(): - child = self._node(key, value) - elem.appendChild(child) - l.appendChild(elem) - - bundle['bounds'] = l - - def open311ApiKeys(self, bundle, value): - open311ApiKeys = self._open311ApiKeys(value) - # We need to convert this to a element here - l = self.doc.createElement('open311Servers') - - for o in open311ApiKeys: - elem = self.doc.createElement('open311Server') - for key, value in o.items(): - if not value: - value = '' - - child = self._node(key, value) - elem.appendChild(child) - - l.appendChild(elem) - - bundle['open311ApiKeys'] = l - - def alter_bundle(self, bundle): - # Each item in the bundle should be converted to a text - # node, if it isn't already a node (which it would be for bounds) - elem = self.doc.createElement('region') - for key, value in bundle.items(): - if key == 'bounds' or key == 'open311ApiKeys': - elem.appendChild(value) - else: - child = self._node(key, value) - elem.appendChild(child) - - return elem - - def alter_list_bundle(self, list_bundle, version): - top = self.doc.documentElement - top.appendChild(self._node('version', version)) - top.appendChild(self._node('code', 200)) - top.appendChild(self._node('text', 'OK')) - - # Create the data and list nodes - data = self.doc.createElement('data') - l = self.doc.createElement('list') - for elem in list_bundle: - l.appendChild(elem) - - data.appendChild(l) - top.appendChild(data) - return list_bundle - - def serialize(self, list_bundle): - if self.pretty: - return self.doc.toprettyxml(indent=' ') - else: - return self.doc.toxml() - - -def serialize(regions, serializer, version): - """ - This does the following: - 1. Map each spreadsheet name into a suitable python function. - 2. Use the serializer class to bundle up the spreadhsheet values - into a serializable form (with proper typing, etc) - 3. Allow the serializer to add any other header information, etc. - 4. Convert to the serialized format. - """ - def _key(name): - # Remove the '?' and replace _ with a space, convert to title - name = name.replace('?', '').replace('_', ' ').title() - # Convert to lower camel - name = name[0].lower() + name[1:] - # Keep "iOS" - name = name.replace('Ios', 'iOS') - # Remove spaces - return name.replace(' ', '') - - def _to_bundle(index, region): - bundle = {} - serializer.region_id(bundle, index) - for k, v in region.items(): - key = _key(k) - f = getattr(serializer, key, None) - if f: - f(bundle, v) - else: - # Convenience for strings, and things that need no conversion - bundle[key] = v - bundle = serializer.alter_bundle(bundle) - return bundle - - list_bundle = [] - for i, region in enumerate(regions): - try: - # For v2, don't include experimental servers - if version == 2: - if region["Experimental?"] == 'TRUE': - print("Skipping %s as experimental for v2" % (region["Region_Name"])) - else: - list_bundle.append(_to_bundle(i, region)) - else: - list_bundle.append(_to_bundle(i, region)) - except ValueError: - print("*** ERROR: Invalid region specification: " + str(region), file=sys.stderr) - raise - - list_bundle = serializer.alter_list_bundle(list_bundle, version) - serialized = serializer.serialize(list_bundle) - return serialized - - def output_stdout(_fmt, output, _opts): print(output) From c411edcc295795e89aab2aba2ce3dd2a7c3ed520 Mon Sep 17 00:00:00 2001 From: Aaron Brethorst Date: Tue, 19 Mar 2024 22:40:56 -0700 Subject: [PATCH 3/4] Move `output_*` methods into writers.py --- .gitignore | 11 +++++----- src/writers.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ test_e2e.py | 9 +++++++- update_regions.py | 56 +---------------------------------------------- 4 files changed, 68 insertions(+), 62 deletions(-) create mode 100644 src/writers.py diff --git a/.gitignore b/.gitignore index 27b4350..149be94 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ __pycache__ -./regions.xml -./regions.json -./regions-v3.xml -./regions-v3.json -tests/tmp/*.json -tests/tmp/*.xml \ No newline at end of file +regions.xml +regions.json +regions-v3.xml +regions-v3.json +!tests/fixtures/regions-v3.json \ No newline at end of file diff --git a/src/writers.py b/src/writers.py new file mode 100644 index 0000000..94ac28d --- /dev/null +++ b/src/writers.py @@ -0,0 +1,54 @@ +import os + +def output_stdout(_fmt, output, _opts): + print(output) + + +def output_file(fmt, output, opts, version): + if version == 2: + file_name = 'regions.' + else: + file_name = 'regions-v' + str(version) + '.' + path = os.path.join(opts.output_dir, file_name + fmt) + print('Writing %s' % path) + with open(path, 'w+') as f: + f.write(output) + + +def output_s3(fmt, output, opts, version): + try: + from boto.s3.connection import S3Connection + from boto.s3.key import Key + except ImportError: + print("Unable to publish to S3: Boto not installed.", file=sys.stderr) + return + + # Verify the S3 configuration + bucket_name = opts.output_s3 + access_key = opts.aws_access_key or os.environ.get('AWS_ACCESS_KEY_ID') + secret_key = opts.aws_secret_key or os.environ.get('AWS_SECRET_ACCESS_KEY') + + if not access_key or not secret_key: + print("We need an AWS access key and AWS secret key", file=sys.stderr) + return + + conn = S3Connection(access_key, secret_key) + bucket = conn.get_bucket(bucket_name) + k = Key(bucket) + if version == 2: + file_name = 'regions.' + else: + file_name = 'regions-v' + version + '.' + + k.key = file_name + fmt + + # Set a content type + content_types = { + 'json': 'application/json', + 'xml': 'text/xml' + } + if fmt in content_types: + k.set_metadata('Content-Type', content_types[fmt]) + + print('Writing %s/%s' % (bucket_name, k.key)) + k.set_contents_from_string(output) \ No newline at end of file diff --git a/test_e2e.py b/test_e2e.py index 4907d5a..922488e 100644 --- a/test_e2e.py +++ b/test_e2e.py @@ -4,6 +4,8 @@ import unittest import os +DEBUG_COMMAND_OUTPUT = False + class TestEndToEnd(unittest.TestCase): @staticmethod def remove_test_files(): @@ -33,7 +35,12 @@ def read_file_as_string(file_path): def test_update_regions_output(self): command = ["python", "update_regions.py", "--input-file", "./tests/fixtures/server_directory.csv", "--output-dir", "./tests/tmp", "--pretty"] - _ = subprocess.run(command, capture_output=True, text=True) + out = subprocess.run(command, capture_output=True, text=True) + + if DEBUG_COMMAND_OUTPUT: + print(f"stdout:\n{out.stdout}") + print(f"stderr:\n{out.stdout}") + print(out.stderr) regions_xml_fixture = self.read_file_as_string('tests/fixtures/regions.xml') regions_xml_output = self.read_file_as_string('tests/tmp/regions.xml') diff --git a/update_regions.py b/update_regions.py index 1065735..f5d2532 100755 --- a/update_regions.py +++ b/update_regions.py @@ -18,11 +18,11 @@ import argparse import csv -import os import io import sys import urllib.request, urllib.error, urllib.parse from src.serializers import JSONSerializer, XMLSerializer, serialize +from src.writers import output_file, output_s3, output_stdout DESCRIPTION = """Generate and update OneBusAway regions list. @@ -77,60 +77,6 @@ def get_csv_from_url(url): return list(reader) -def output_stdout(_fmt, output, _opts): - print(output) - - -def output_file(fmt, output, opts, version): - if version == 2: - file_name = 'regions.' - else: - file_name = 'regions-v' + str(version) + '.' - path = os.path.join(opts.output_dir, file_name + fmt) - print('Writing %s' % path) - with open(path, 'w+') as f: - f.write(output) - - -def output_s3(fmt, output, opts, version): - try: - from boto.s3.connection import S3Connection - from boto.s3.key import Key - except ImportError: - print("Unable to publish to S3: Boto not installed.", file=sys.stderr) - return - - # Verify the S3 configuration - bucket_name = opts.output_s3 - access_key = opts.aws_access_key or os.environ.get('AWS_ACCESS_KEY_ID') - secret_key = opts.aws_secret_key or os.environ.get('AWS_SECRET_ACCESS_KEY') - - if not access_key or not secret_key: - print("We need an AWS access key and AWS secret key", file=sys.stderr) - return - - conn = S3Connection(access_key, secret_key) - bucket = conn.get_bucket(bucket_name) - k = Key(bucket) - if version == 2: - file_name = 'regions.' - else: - file_name = 'regions-v' + version + '.' - - k.key = file_name + fmt - - # Set a content type - content_types = { - 'json': 'application/json', - 'xml': 'text/xml' - } - if fmt in content_types: - k.set_metadata('Content-Type', content_types[fmt]) - - print('Writing %s/%s' % (bucket_name, k.key)) - k.set_contents_from_string(output) - - def main(): class Options(object): pass From 034f87ba04767455e6d55ee16a299fdbd156ac21 Mon Sep 17 00:00:00 2001 From: Aaron Brethorst Date: Tue, 19 Mar 2024 22:46:34 -0700 Subject: [PATCH 4/4] Move csv reader functionality into its own file --- src/csv_readers.py | 23 +++++++++++++++++++++++ update_regions.py | 25 +------------------------ 2 files changed, 24 insertions(+), 24 deletions(-) create mode 100644 src/csv_readers.py diff --git a/src/csv_readers.py b/src/csv_readers.py new file mode 100644 index 0000000..209fe85 --- /dev/null +++ b/src/csv_readers.py @@ -0,0 +1,23 @@ +import urllib.request, urllib.error, urllib.parse +import csv +import io + +# In both reading cases, we need to read all data into memory +# because there's no way of seeking back in a CSV reader. + +def get_csv_from_file(path): + with open(path) as f: + reader = csv.DictReader(open(path)) + return list(reader) + + +def get_csv_from_url(url): + "Returns a list of regions from the specified spreadsheet URL." + opener = urllib.request.build_opener( + urllib.request.HTTPCookieProcessor(), + urllib.request.HTTPRedirectHandler() + ) + urllib.request.install_opener(opener) + response = urllib.request.urlopen(url) + reader = csv.DictReader(io.TextIOWrapper(response, encoding='utf-8')) + return list(reader) \ No newline at end of file diff --git a/update_regions.py b/update_regions.py index f5d2532..e542eb4 100755 --- a/update_regions.py +++ b/update_regions.py @@ -17,10 +17,8 @@ """ import argparse -import csv -import io import sys -import urllib.request, urllib.error, urllib.parse +from src.csv_readers import get_csv_from_file, get_csv_from_url from src.serializers import JSONSerializer, XMLSerializer, serialize from src.writers import output_file, output_s3, output_stdout @@ -56,27 +54,6 @@ help='Make the output files pretty and readable with indentation.') -# In both reading cases, we need to read all data into memory -# because there's no way of seeking back in a CSV reader. - -def get_csv_from_file(path): - with open(path) as f: - reader = csv.DictReader(open(path)) - return list(reader) - - -def get_csv_from_url(url): - "Returns a list of regions from the specified spreadsheet URL." - opener = urllib.request.build_opener( - urllib.request.HTTPCookieProcessor(), - urllib.request.HTTPRedirectHandler() - ) - urllib.request.install_opener(opener) - response = urllib.request.urlopen(url) - reader = csv.DictReader(io.TextIOWrapper(response, encoding='utf-8')) - return list(reader) - - def main(): class Options(object): pass