Skip to content

Commit dcad35c

Browse files
Vitalii BulyzhynVitalii Bulyzhyn
authored andcommitted
Add scrip to import dataset from azure
1 parent b7ee751 commit dcad35c

File tree

2 files changed

+153
-0
lines changed

2 files changed

+153
-0
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import os
2+
import ast
3+
import pandas as pd
4+
from dotenv import load_dotenv
5+
from diffgram import Project
6+
import time
7+
from azure.storage.blob import BlobServiceClient, __version__
8+
9+
load_dotenv()
10+
11+
connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
12+
blob_url = os.getenv('BLOCK_CONTAINER_URL')
13+
bucket_name = os.getenv('BUCKET_NAME')
14+
15+
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
16+
container_client = blob_service_client.get_container_client('bandmac')
17+
18+
azure_image_list = container_client.list_blobs()
19+
20+
image_list = []
21+
for image in azure_image_list:
22+
image_list.append(image.name)
23+
24+
25+
project = Project(
26+
project_string_id = os.getenv('PROJECT_STRING_ID'),
27+
client_id = os.getenv('CLIENT_ID'),
28+
client_secret = os.getenv('CLIENT_SECRET'),
29+
host = os.getenv('HOST')
30+
)
31+
32+
list = project.directory.get(name="Default").list_files()
33+
34+
for file in list:
35+
original_filename = file.__dict__['original_filename']
36+
initia_filename = original_filename.replace('_', ' (').replace('.', ').')
37+
if initia_filename in image_list:
38+
image_list.remove(initia_filename)
39+
40+
shema_list = project.get_label_schema_list()
41+
42+
number_of_images = None
43+
while True:
44+
try:
45+
number_of_images_to_import = input("How many images do you want to import? (blank to import all) ")
46+
if number_of_images_to_import == '':
47+
number_of_images = len(image_list)
48+
break
49+
number_of_images = int(number_of_images_to_import)
50+
break
51+
except:
52+
print("Invalid input: please input positive number")
53+
54+
image_list = image_list[:number_of_images]
55+
56+
new_schema_name = None
57+
imported_label_traker = []
58+
lables_objects = {}
59+
while True:
60+
try:
61+
new_schema_name = input("Shema name (if shema with this name already exists - it will be used, otherwise new shema will be created): ")
62+
shema_list = project.get_label_schema_list()
63+
schema = [existing_schema for existing_schema in shema_list if existing_schema.get('name') == new_schema_name]
64+
if not schema:
65+
schema = project.new_schema(name=new_schema_name)
66+
print("Schema successfully created")
67+
else:
68+
schema = schema[0]
69+
schema_label_list = project.get_label_list(schema.get('id'))
70+
for label in schema_label_list:
71+
imported_label_traker.append(label['label']['name'])
72+
lables_objects[label['label']['name']] = label
73+
pass
74+
break
75+
except:
76+
print("Seems like schema with this name already exists")
77+
78+
df = None
79+
while True:
80+
try:
81+
annotation_file_name = input("What is the name of the file with annotations? (leave blank to use default Overhead-Distribution-Labels.csv)")
82+
if not annotation_file_name:
83+
df = pd.read_csv ('Overhead-Distribution-Labels.csv')
84+
break
85+
df = pd.read_csv (annotation_file_name)
86+
break
87+
except:
88+
print("Seems like annotation file is not here")
89+
90+
succeslully_imported = []
91+
import_errors = []
92+
93+
for image in image_list:
94+
image_relate_df = df[df['External ID'] == image]
95+
labels = image_relate_df['Label']
96+
external_id = image_relate_df['External ID']
97+
98+
instance_list = []
99+
100+
for label in labels:
101+
label_dict = ast.literal_eval(label)
102+
103+
for object in label_dict['objects']:
104+
label = {}
105+
106+
if object['value'] not in imported_label_traker:
107+
label = project.label_new({'name': object['value']}, schema.get('id'))
108+
lables_objects[label['label']['name']] = label
109+
else:
110+
label = lables_objects[object['value']]
111+
112+
polygone = object.get('polygon')
113+
line = object.get('line')
114+
115+
if polygone:
116+
instance_list.append({
117+
"type": 'polygon',
118+
"points": polygone,
119+
"label_file_id": label['id']
120+
})
121+
elif line:
122+
pass
123+
else:
124+
pass
125+
126+
imported_label_traker.append(object['value'])
127+
128+
try:
129+
result = project.file.from_local(
130+
path=f'./images/{image}',
131+
instance_list = instance_list,
132+
convert_names_to_label_files=False
133+
)
134+
135+
succeslully_imported.append(image)
136+
137+
print(f'{image} has been imported with {len(instance_list)} annotation(s)')
138+
except:
139+
import_errors.append(image)
140+
print(f'Error ocurred while importing {image}')
141+
142+
print(f"Successfully imported {len(succeslully_imported)} file(s): ", succeslully_imported)
143+
print(f"Errors while importing {len(succeslully_imported)} file(s): ", import_errors)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
1+
azure-core==1.24.2
2+
azure-storage-blob==12.13.0
13
certifi==2022.6.15
4+
cffi==1.15.1
25
charset-normalizer==2.1.0
6+
cryptography==37.0.4
37
diffgram==0.8.5
48
idna==3.3
59
imageio==2.19.5
10+
isodate==0.6.1
11+
msrest==0.7.1
612
numpy==1.23.1
13+
oauthlib==3.2.0
714
pandas==1.4.3
815
Pillow==9.2.0
16+
pycparser==2.21
917
python-dateutil==2.8.2
1018
python-dotenv==0.20.0
1119
pytz==2022.1
1220
requests==2.28.1
21+
requests-oauthlib==1.3.1
1322
scipy==1.8.1
1423
six==1.16.0
24+
typing_extensions==4.3.0
1525
urllib3==1.26.10

0 commit comments

Comments
 (0)