Skip to content

Commit b0810c1

Browse files
Enable and support text labels (#168)
1 parent f26406f commit b0810c1

File tree

1 file changed

+2
-16
lines changed

1 file changed

+2
-16
lines changed

aixplain/processes/data_onboarding/process_media_files.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,7 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
4646
Returns:
4747
Tuple[List[File], int, int, int]: list of s3 links; data, start and end columns index, and number of rows
4848
"""
49-
if metadata.dtype == DataType.LABEL:
50-
assert (
51-
metadata.storage_type != StorageType.TEXT
52-
), f'Data Asset Onboarding Error: Column "{metadata.name}" of type "{metadata.dtype}" can not be stored in text. Label data should be stored in a JSON file.'
53-
else:
49+
if metadata.dtype != DataType.LABEL:
5450
assert (
5551
metadata.storage_type != StorageType.TEXT
5652
), f'Data Asset Onboarding Error: Column "{metadata.name}" of type "{metadata.dtype}" can not be stored in text.'
@@ -108,11 +104,7 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
108104
elif metadata.dtype == DataType.LABEL:
109105
assert (
110106
os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
111-
), f'Data Asset Onboarding Error: JSON file with labels "{media_path}" exceeds the size limit of 25 MB.'
112-
_, extension = os.path.splitext(media_path)
113-
assert (
114-
extension == ".json"
115-
), f'Data Asset Onboarding Error: Label data should be stored in a JSON file and "{media_path}" is not one.'
107+
), f'Data Asset Onboarding Error: Local label file "{media_path}" exceeds the size limit of 25 MB.'
116108
else:
117109
assert (
118110
os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
@@ -123,12 +115,6 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
123115
shutil.copy2(media_path, new_path)
124116
batch.append(fname)
125117
else:
126-
if metadata.dtype == DataType.LABEL:
127-
path = urlparse(media_path).path
128-
_, extension = os.path.splitext(path)
129-
assert (
130-
extension == ".json"
131-
), f'Data Asset Onboarding Error: Label data should be stored in a JSON file and "{media_path}" is not one.'
132118
batch.append(media_path)
133119

134120
# crop intervals can not be used with interval data types

0 commit comments

Comments
 (0)