From 125fd42fdf2f3f3bcdc323a1d1228171ca4bb866 Mon Sep 17 00:00:00 2001 From: Chris Clark Date: Mon, 15 Jul 2024 10:11:03 -0400 Subject: [PATCH] refactor and handling different json docs --- explorer/ee/db_connections/create_sqlite.py | 46 ++++++++ explorer/ee/db_connections/mime.py | 54 +++++++++ explorer/ee/db_connections/type_infer.py | 115 +++++++++++++++++++ explorer/ee/db_connections/utils.py | 118 +------------------- explorer/ee/db_connections/views.py | 52 ++------- explorer/tests/test_db_connection_utils.py | 69 +----------- explorer/tests/test_mime.py | 71 ++++++++++++ explorer/tests/test_type_infer.py | 58 ++++++++++ 8 files changed, 354 insertions(+), 229 deletions(-) create mode 100644 explorer/ee/db_connections/create_sqlite.py create mode 100644 explorer/ee/db_connections/mime.py create mode 100644 explorer/ee/db_connections/type_infer.py create mode 100644 explorer/tests/test_mime.py create mode 100644 explorer/tests/test_type_infer.py diff --git a/explorer/ee/db_connections/create_sqlite.py b/explorer/ee/db_connections/create_sqlite.py new file mode 100644 index 00000000..67d16069 --- /dev/null +++ b/explorer/ee/db_connections/create_sqlite.py @@ -0,0 +1,46 @@ +import logging +import os + +from .mime import is_csv, is_json, is_json_list, is_sqlite +from explorer.ee.db_connections.type_infer import json_to_typed_df, json_list_to_typed_df, csv_to_typed_df +from explorer.ee.db_connections.utils import pandas_to_sqlite + + +logger = logging.getLogger(__name__) + + +def get_bytes_and_name_for_upload(file): + if is_csv(file): + df_fun = csv_to_typed_df + elif is_json_list(file): # must go before is_json, as it is a subset + df_fun = json_list_to_typed_df + elif is_json(file): + df_fun = json_to_typed_df + elif is_sqlite(file): + df_fun = None + else: + logger.error(f'File {file.name} is not a csv, json, or sqlite file.') + raise TypeError(f'File {file.name} is not a csv, json, or sqlite file.') + + try: + return parse_to_sqlite(file, df_fun) + except ValueError as e: + logger.error(f'Error parsing {file.name}: {e}') + raise e + + +def parse_to_sqlite(file, df_parser): + f_name = file.name + f_bytes = file.read() + if df_parser: + df = df_parser(f_bytes) + try: + f_bytes = pandas_to_sqlite(df) + except Exception as e: # noqa + logger.exception(f"Exception while parsing file {f_name}: {e}") + raise ValueError("Error while parsing the file.") + # replace the previous extension with .db, as it is now a sqlite file + name, _ = os.path.splitext(f_name) + f_name = f"{name}.db" + return f_bytes, f_name + diff --git a/explorer/ee/db_connections/mime.py b/explorer/ee/db_connections/mime.py new file mode 100644 index 00000000..5ac7bb2f --- /dev/null +++ b/explorer/ee/db_connections/mime.py @@ -0,0 +1,54 @@ +import csv +import json + +# These are 'shallow' checks. They are just to understand if the upload appears valid at surface-level. +# A deeper check will happen when pandas tries to parse the file. +# This is designed to be quick, and simply assigned the right (full) parsing function to the uploaded file. + + +def is_csv(file): + if file.content_type != "text/csv": + return False + try: + # Check if the file content can be read as a CSV + file.seek(0) + sample = file.read(1024).decode('utf-8') + csv.Sniffer().sniff(sample) + file.seek(0) + return True + except csv.Error: + return False + + +def is_json(file): + if file.content_type != "application/json": + return False + if not file.name.lower().endswith('.json'): + return False + return True + + +def is_json_list(file): + if not file.name.lower().endswith('.json'): + return False + file.seek(0) + first_line = file.readline() + file.seek(0) + try: + json.loads(first_line.decode('utf-8')) + return True + except ValueError: + return False + + +def is_sqlite(file): + if file.content_type != "application/x-sqlite3": + return False + try: + # Check if the file starts with the SQLite file header + file.seek(0) + header = file.read(16) + file.seek(0) + return header == b'SQLite format 3\x00' + except Exception as e: # noqa + return False diff --git a/explorer/ee/db_connections/type_infer.py b/explorer/ee/db_connections/type_infer.py new file mode 100644 index 00000000..537a6bdc --- /dev/null +++ b/explorer/ee/db_connections/type_infer.py @@ -0,0 +1,115 @@ +import io +import json + + +MAX_TYPING_SAMPLE_SIZE = 10000 +SHORTEST_PLAUSIBLE_DATE_STRING = 5 + + +def csv_to_typed_df(csv_bytes, delimiter=",", has_headers=True): + import pandas as pd + csv_file = io.BytesIO(csv_bytes) + df = pd.read_csv(csv_file, sep=delimiter, header=0 if has_headers else None) + return df_to_typed_df(df) + + +def json_list_to_typed_df(json_bytes): + import pandas as pd + data = [] + for line in io.BytesIO(json_bytes).readlines(): + data.append(json.loads(line.decode('utf-8'))) + + df = pd.json_normalize(data) + return df_to_typed_df(df) + + +def json_to_typed_df(json_bytes): + import pandas as pd + json_file = io.BytesIO(json_bytes) + json_content = json.load(json_file) + df = pd.json_normalize(json_content) + return df_to_typed_df(df) + + +def atof_custom(value): + # Remove any thousands separators and convert the decimal point + if "," in value and "." in value: + if value.index(",") < value.index("."): + # 0,000.00 format + value = value.replace(",", "") + else: + # 0.000,00 format + value = value.replace(".", "").replace(",", ".") + elif "," in value: + # No decimal point, only thousands separator + value = value.replace(",", "") + return float(value) + + + +def df_to_typed_df(df): # noqa + import pandas as pd + from dateutil import parser + try: + + for column in df.columns: + values = df[column].dropna().unique() + if len(values) > MAX_TYPING_SAMPLE_SIZE: + values = pd.Series(values).sample(MAX_TYPING_SAMPLE_SIZE, random_state=42).to_numpy() + + is_date = False + is_integer = True + is_float = True + + for value in values: + try: + float_val = atof_custom(str(value)) + if float_val == int(float_val): + continue # This is effectively an integer + else: + is_integer = False + except ValueError: + is_integer = False + is_float = False + break + + if is_integer: + is_float = False + + if not is_integer and not is_float: + is_date = True + + # The dateutil parser is very aggressive and will interpret many short strings as dates. + # For example "12a" will be interpreted as 12:00 AM on the current date. + # That is not the behavior anyone wants. The shortest plausible date string is e.g. 1-1-23 + try_parse = [v for v in values if len(str(v)) > SHORTEST_PLAUSIBLE_DATE_STRING] + if len(try_parse) > 0: + for value in try_parse: + try: + parser.parse(str(value)) + except (ValueError, TypeError, OverflowError): + is_date = False + break + else: + is_date = False + + if is_date: + df[column] = pd.to_datetime(df[column], errors="coerce", utc=True) + elif is_integer: + df[column] = df[column].apply(lambda x: int(atof_custom(str(x))) if pd.notna(x) else x) + # If there are NaN / blank values, the column will be converted to float + # Convert it back to integer + df[column] = df[column].astype("Int64") + elif is_float: + df[column] = df[column].apply(lambda x: atof_custom(str(x)) if pd.notna(x) else x) + else: + inferred_type = pd.api.types.infer_dtype(values) + if inferred_type == "integer": + df[column] = pd.to_numeric(df[column], errors="coerce", downcast="integer") + elif inferred_type == "floating": + df[column] = pd.to_numeric(df[column], errors="coerce") + + return df + + except pd.errors.ParserError as e: + return str(e) diff --git a/explorer/ee/db_connections/utils.py b/explorer/ee/db_connections/utils.py index afa2b0e0..35bcefef 100644 --- a/explorer/ee/db_connections/utils.py +++ b/explorer/ee/db_connections/utils.py @@ -1,6 +1,6 @@ from django.db import DatabaseError from django.db.utils import load_backend -import os, json +import os import sqlite3 import io @@ -102,119 +102,3 @@ def pandas_to_sqlite(df, local_path="local_database.db"): # Delete the local SQLite database file # Finally block to ensure we don't litter files around os.remove(local_path) - - -def json_list_to_typed_df(json_bytes): - import pandas as pd - data = [] - for line in io.BytesIO(json_bytes).readlines(): - data.append(json.loads(line.decode('utf-8'))) - - df = pd.json_normalize(data) - return df_to_typed_df(df) - - -MAX_TYPING_SAMPLE_SIZE = 10000 -SHORTEST_PLAUSIBLE_DATE_STRING = 5 - - -def atof_custom(value): - # Remove any thousands separators and convert the decimal point - if "," in value and "." in value: - if value.index(",") < value.index("."): - # 0,000.00 format - value = value.replace(",", "") - else: - # 0.000,00 format - value = value.replace(".", "").replace(",", ".") - elif "," in value: - # No decimal point, only thousands separator - value = value.replace(",", "") - return float(value) - - -def csv_to_typed_df(csv_bytes, delimiter=",", has_headers=True): - import pandas as pd - csv_file = io.BytesIO(csv_bytes) - df = pd.read_csv(csv_file, sep=delimiter, header=0 if has_headers else None) - return df_to_typed_df(df) - - -def df_to_typed_df(df): # noqa - import pandas as pd - from dateutil import parser - try: - - for column in df.columns: - values = df[column].dropna().unique() - if len(values) > MAX_TYPING_SAMPLE_SIZE: - values = pd.Series(values).sample(MAX_TYPING_SAMPLE_SIZE, random_state=42).to_numpy() - - is_date = False - is_integer = True - is_float = True - - for value in values: - try: - float_val = atof_custom(str(value)) - if float_val == int(float_val): - continue # This is effectively an integer - else: - is_integer = False - except ValueError: - is_integer = False - is_float = False - break - - if is_integer: - is_float = False - - if not is_integer and not is_float: - is_date = True - - # The dateutil parser is very aggressive and will interpret many short strings as dates. - # For example "12a" will be interpreted as 12:00 AM on the current date. - # That is not the behavior anyone wants. The shortest plausible date string is e.g. 1-1-23 - try_parse = [v for v in values if len(str(v)) > SHORTEST_PLAUSIBLE_DATE_STRING] - if len(try_parse) > 0: - for value in try_parse: - try: - parser.parse(str(value)) - except (ValueError, TypeError, OverflowError): - is_date = False - break - else: - is_date = False - - if is_date: - df[column] = pd.to_datetime(df[column], errors="coerce", utc=True) - elif is_integer: - df[column] = df[column].apply(lambda x: int(atof_custom(str(x))) if pd.notna(x) else x) - # If there are NaN / blank values, the column will be converted to float - # Convert it back to integer - df[column] = df[column].astype("Int64") - elif is_float: - df[column] = df[column].apply(lambda x: atof_custom(str(x)) if pd.notna(x) else x) - else: - inferred_type = pd.api.types.infer_dtype(values) - if inferred_type == "integer": - df[column] = pd.to_numeric(df[column], errors="coerce", downcast="integer") - elif inferred_type == "floating": - df[column] = pd.to_numeric(df[column], errors="coerce") - - return df - - except pd.errors.ParserError as e: - return str(e) - - -def is_csv(file): - return file.content_type == "text/csv" - - -def is_json(file): - return file.content_type == "application/json" - - -def is_sqlite(file): - return file.content_type == "application/x-sqlite3" diff --git a/explorer/ee/db_connections/views.py b/explorer/ee/db_connections/views.py index 9b7abf79..0811650e 100644 --- a/explorer/ee/db_connections/views.py +++ b/explorer/ee/db_connections/views.py @@ -7,14 +7,9 @@ from explorer.models import DatabaseConnection from explorer.ee.db_connections.utils import ( upload_sqlite, - create_connection_for_uploaded_sqlite, - is_csv, - is_json, - is_sqlite, - csv_to_typed_df, - json_list_to_typed_df, - pandas_to_sqlite + create_connection_for_uploaded_sqlite ) +from explorer.ee.db_connections.create_sqlite import get_bytes_and_name_for_upload from explorer import app_settings from explorer.app_settings import EXPLORER_MAX_UPLOAD_SIZE from explorer.ee.db_connections.forms import DatabaseConnectionForm @@ -27,37 +22,6 @@ logger = logging.getLogger(__name__) -def handle_json(file): - f_name = file.name - f_bytes = file.read() - df = json_list_to_typed_df(f_bytes) - try: - f_bytes = pandas_to_sqlite(df) - except Exception as e: # noqa - logger.exception(f"Exception while parsing file {f_name}: {e}") - return JsonResponse({"error": "Error while parsing the file."}, status=400) - - f_name = f_name.replace("json", "db") - return f_bytes, f_name - - -def handle_csv(file): - f_name = file.name - f_bytes = file.read() - df = csv_to_typed_df(f_bytes) - try: - f_bytes = pandas_to_sqlite(df) - except Exception as e: # noqa - logger.exception(f"Exception while parsing file {f_name}: {e}") - return JsonResponse({"error": "Error while parsing the file."}, status=400) - - f_name = f_name.replace("csv", "db") - - -def handle_sqlite(file): - return file.read(), file.name - - class UploadDbView(PermissionRequiredMixin, View): permission_required = "connections_permission" @@ -69,14 +33,12 @@ def post(self, request): friendly = EXPLORER_MAX_UPLOAD_SIZE / (1024 * 1024) return JsonResponse({"error": f"File size exceeds the limit of {friendly} MB"}, status=400) - if is_json(file): - f_bytes, f_name = handle_json(file) - elif is_csv(file): - f_bytes, f_name = handle_csv(file) - elif is_sqlite(file): - f_bytes, f_name = handle_sqlite(file) - else: + try: + f_bytes, f_name = get_bytes_and_name_for_upload(file) + except ValueError as e: return JsonResponse({"error": "File was not csv, json, or sqlite."}, status=400) + except TypeError as e: + return JsonResponse({"error": "Error parsing file."}, status=400) try: s3_path = f"user_dbs/user_{request.user.id}/{f_name}" diff --git a/explorer/tests/test_db_connection_utils.py b/explorer/tests/test_db_connection_utils.py index d4576675..fa6d7cf4 100644 --- a/explorer/tests/test_db_connection_utils.py +++ b/explorer/tests/test_db_connection_utils.py @@ -1,6 +1,5 @@ from django.test import TestCase from unittest import skipIf -from django.core.files.uploadedfile import SimpleUploadedFile from explorer.app_settings import EXPLORER_USER_UPLOADS_ENABLED if EXPLORER_USER_UPLOADS_ENABLED: import pandas as pd @@ -11,58 +10,10 @@ from explorer.ee.db_connections.utils import ( get_sqlite_for_connection, create_django_style_connection, - pandas_to_sqlite, - is_csv, - csv_to_typed_df + pandas_to_sqlite ) -def _get_csv(csv_name): - current_script_dir = os.path.dirname(os.path.abspath(__file__)) - file_path = os.path.join(current_script_dir, "csvs", csv_name) - - # Open the file in binary mode and read its contents - with open(file_path, "rb") as file: - csv_bytes = file.read() - - return csv_bytes - - -@skipIf(not EXPLORER_USER_UPLOADS_ENABLED, "User uploads not enabled") -class TestCsvToTypedDf(TestCase): - - def test_mixed_types(self): - df = csv_to_typed_df(_get_csv("mixed.csv")) - self.assertTrue(pd.api.types.is_object_dtype(df["Value1"])) - self.assertTrue(pd.api.types.is_object_dtype(df["Value2"])) - self.assertTrue(pd.api.types.is_object_dtype(df["Value3"])) - - def test_all_types(self): - df = csv_to_typed_df(_get_csv("all_types.csv")) - self.assertTrue(pd.api.types.is_datetime64_ns_dtype(df["Dates"])) - print(df["Integers"].dtype) - self.assertTrue(pd.api.types.is_integer_dtype(df["Integers"])) - self.assertTrue(pd.api.types.is_float_dtype(df["Floats"])) - self.assertTrue(pd.api.types.is_object_dtype(df["Strings"])) - - def test_integer_parsing(self): - df = csv_to_typed_df(_get_csv("integers.csv")) - self.assertTrue(pd.api.types.is_integer_dtype(df["Integers"])) - self.assertTrue(pd.api.types.is_integer_dtype(df["More_integers"])) - - def test_float_parsing(self): - df = csv_to_typed_df(_get_csv("floats.csv")) - self.assertTrue(pd.api.types.is_float_dtype(df["Floats"])) - - def test_date_parsing(self): - - # Will not handle these formats: - # Unix Timestamp: 1706232300 (Seconds since Unix Epoch - 1970-01-01 00:00:00 UTC) - # ISO 8601 Week Number: 2024-W04-3 (Year-WWeekNumber-Weekday) - # Day of Year: 2024-024 (Year-DayOfYear) - - df = csv_to_typed_df(_get_csv("dates.csv")) - self.assertTrue(pd.api.types.is_datetime64_ns_dtype(df["Dates"])) @skipIf(not EXPLORER_USER_UPLOADS_ENABLED, "User uploads not enabled") @@ -167,7 +118,7 @@ def test_pandas_to_sqlite(self): con = sqlite3.connect(temp_db_path) try: cursor = con.cursor() - cursor.execute("SELECT * FROM data") + cursor.execute("SELECT * FROM data") # noqa rows = cursor.fetchall() # Verify the content of the SQLite database @@ -180,19 +131,3 @@ def test_pandas_to_sqlite(self): os.remove(temp_db_path) -class TestIsCsvFunction(TestCase): - - def test_is_csv_with_csv_file(self): - # Create a SimpleUploadedFile with content_type set to "text/csv" - csv_file = SimpleUploadedFile("test.csv", b"column1,column2\n1,A\n2,B", content_type="text/csv") - self.assertTrue(is_csv(csv_file)) - - def test_is_csv_with_non_csv_file(self): - # Create a SimpleUploadedFile with content_type set to "text/plain" - txt_file = SimpleUploadedFile("test.txt", b"Just some text", content_type="text/plain") - self.assertFalse(is_csv(txt_file)) - - def test_is_csv_with_empty_content_type(self): - # Create a SimpleUploadedFile with an empty content_type - empty_file = SimpleUploadedFile("test.csv", b"column1,column2\n1,A\n2,B", content_type="") - self.assertFalse(is_csv(empty_file)) diff --git a/explorer/tests/test_mime.py b/explorer/tests/test_mime.py new file mode 100644 index 00000000..f4c395cc --- /dev/null +++ b/explorer/tests/test_mime.py @@ -0,0 +1,71 @@ +from django.test import TestCase +from django.core.files.uploadedfile import SimpleUploadedFile +from explorer.ee.db_connections.mime import is_sqlite, is_json, is_json_list, is_csv + + +class TestIsCsvFunction(TestCase): + + def test_is_csv_with_csv_file(self): + # Create a SimpleUploadedFile with content_type set to "text/csv" + csv_file = SimpleUploadedFile("test.csv", b"column1,column2\n1,A\n2,B", content_type="text/csv") + self.assertTrue(is_csv(csv_file)) + + def test_is_csv_with_non_csv_file(self): + # Create a SimpleUploadedFile with content_type set to "text/plain" + txt_file = SimpleUploadedFile("test.txt", b"Just some text", content_type="text/plain") + self.assertFalse(is_csv(txt_file)) + + def test_is_csv_with_empty_content_type(self): + # Create a SimpleUploadedFile with an empty content_type + empty_file = SimpleUploadedFile("test.csv", b"column1,column2\n1,A\n2,B", content_type="") + self.assertFalse(is_csv(empty_file)) + + +class TestIsJsonFunction(TestCase): + + def test_is_json_with_valid_json(self): + long_json = '{"key1": "value1", "key2": {"subkey1": "subvalue1", "subkey2": "subvalue2"}, "key3": [1, 2, 3, 4]}' + json_file = SimpleUploadedFile("test.json", long_json.encode('utf-8'), content_type="application/json") + self.assertTrue(is_json(json_file)) + + def test_is_json_with_non_json_file(self): + txt_file = SimpleUploadedFile("test.txt", b'Just some text', content_type="text/plain") + self.assertFalse(is_json(txt_file)) + + def test_is_json_with_wrong_extension(self): + long_json = '{"key1": "value1", "key2": {"subkey1": "subvalue1", "subkey2": "subvalue2"}, "key3": [1, 2, 3, 4]}' + json_file = SimpleUploadedFile("test.txt", long_json.encode('utf-8'), content_type="application/json") + self.assertFalse(is_json(json_file)) + + def test_is_json_with_empty_content_type(self): + long_json = '{"key1": "value1", "key2": {"subkey1": "subvalue1", "subkey2": "subvalue2"}, "key3": [1, 2, 3, 4]}' + json_file = SimpleUploadedFile("test.json", long_json.encode('utf-8'), content_type="") + self.assertFalse(is_json(json_file)) + + +class TestIsJsonListFunction(TestCase): + + def test_is_json_list_with_valid_json_lines(self): + json_lines = b'{"key1": "value1"}\n{"key2": "value2"}\n{"key3": {"subkey1": "subvalue1"}}\n' + json_file = SimpleUploadedFile("test.json", json_lines, content_type="application/json") + self.assertTrue(is_json_list(json_file)) + + def test_is_json_list_with_non_json_file(self): + txt_file = SimpleUploadedFile("test.txt", b'Just some text', content_type="text/plain") + self.assertFalse(is_json_list(txt_file)) + + def test_is_json_list_with_invalid_json_lines(self): + # This is actually going to *pass* the check, because it's a shallow file-type check, not a comprehensive + # one. That's ok! This type of error will get caught later, when pandas tries to parse it + invalid_json_lines = b'{"key1": "value1"}\nNot a JSON content\n{"key3": {"subkey1": "subvalue1"}}\n' + json_file = SimpleUploadedFile("test.json", invalid_json_lines, content_type="application/json") + self.assertTrue(is_json_list(json_file)) + + def test_is_json_list_with_wrong_extension(self): + json_lines = b'{"key1": "value1"}\n{"key2": "value2"}\n{"key3": {"subkey1": "subvalue1"}}\n' + json_file = SimpleUploadedFile("test.txt", json_lines, content_type="application/json") + self.assertFalse(is_json_list(json_file)) + + def test_is_json_list_with_empty_file(self): + json_file = SimpleUploadedFile("test.json", b'', content_type="application/json") + self.assertFalse(is_json_list(json_file)) diff --git a/explorer/tests/test_type_infer.py b/explorer/tests/test_type_infer.py new file mode 100644 index 00000000..50c789da --- /dev/null +++ b/explorer/tests/test_type_infer.py @@ -0,0 +1,58 @@ +from django.test import TestCase +from unittest import skipIf +from explorer.app_settings import EXPLORER_USER_UPLOADS_ENABLED +if EXPLORER_USER_UPLOADS_ENABLED: + import pandas as pd +import os +from explorer.ee.db_connections.type_infer import csv_to_typed_df + + + +def _get_csv(csv_name): + current_script_dir = os.path.dirname(os.path.abspath(__file__)) + file_path = os.path.join(current_script_dir, "csvs", csv_name) + + # Open the file in binary mode and read its contents + with open(file_path, "rb") as file: + csv_bytes = file.read() + + return csv_bytes + + +@skipIf(not EXPLORER_USER_UPLOADS_ENABLED, "User uploads not enabled") +class TestCsvToTypedDf(TestCase): + + def test_mixed_types(self): + df = csv_to_typed_df(_get_csv("mixed.csv")) + self.assertTrue(pd.api.types.is_object_dtype(df["Value1"])) + self.assertTrue(pd.api.types.is_object_dtype(df["Value2"])) + self.assertTrue(pd.api.types.is_object_dtype(df["Value3"])) + + def test_all_types(self): + df = csv_to_typed_df(_get_csv("all_types.csv")) + self.assertTrue(pd.api.types.is_datetime64_ns_dtype(df["Dates"])) + print(df["Integers"].dtype) + self.assertTrue(pd.api.types.is_integer_dtype(df["Integers"])) + self.assertTrue(pd.api.types.is_float_dtype(df["Floats"])) + self.assertTrue(pd.api.types.is_object_dtype(df["Strings"])) + + def test_integer_parsing(self): + df = csv_to_typed_df(_get_csv("integers.csv")) + self.assertTrue(pd.api.types.is_integer_dtype(df["Integers"])) + self.assertTrue(pd.api.types.is_integer_dtype(df["More_integers"])) + + def test_float_parsing(self): + df = csv_to_typed_df(_get_csv("floats.csv")) + self.assertTrue(pd.api.types.is_float_dtype(df["Floats"])) + + def test_date_parsing(self): + + # Will not handle these formats: + # Unix Timestamp: 1706232300 (Seconds since Unix Epoch - 1970-01-01 00:00:00 UTC) + # ISO 8601 Week Number: 2024-W04-3 (Year-WWeekNumber-Weekday) + # Day of Year: 2024-024 (Year-DayOfYear) + + df = csv_to_typed_df(_get_csv("dates.csv")) + self.assertTrue(pd.api.types.is_datetime64_ns_dtype(df["Dates"])) + +