Skip to content

Latest commit

 

History

History
182 lines (147 loc) · 7.17 KB

temp.md

File metadata and controls

182 lines (147 loc) · 7.17 KB
!ls
%cd /content/gdrive/"My Drive"/data/

# 혹시 이전에 스크립트릴 실행했을 수 있으니, 다시 실행시키려면 그 전에 미리 잘 지워줍시다.
!rm -r helmetclassification 

# 경로들을 만들어 줍시다.
!mkdir helmetclassification
!mkdir helmetclassification/helmet
!mkdir helmetclassification/non_helmet

# 압축 해제!
!unzip helmet_crop.zip -d /content/gdrive/"My Drive"/data/helmetclassification/helmet
!unzip nonhelmet_crop.zip -d /content/gdrive/"My Drive"/data/helmetclassification/non_helmet
!unzip nonhelmet2.zip -d /content/gdrive/"My Drive"/data/helmetclassification/non_helmet

# 현재 디렉터리 이동
%cd /content/gdrive/"My Drive"/data/helmetclassification/

# /content/gdrive/"My Drive"/data/helmetclassification/ 에 train 과 test 폴더를 만들어 줍니다.
!mkdir train
!mkdir train/helmet
!mkdir train/non_helmet
!mkdir test
!mkdir test/helmet
!mkdir test/non_helmet

# /content/gdrive/"My Drive"/data/helmetclassification/helmet 경로부터 작업해 줍시다.
%cd /content/gdrive/"My Drive"/data/helmetclassification/helmet

helmet_data_directory = os.getcwd()
file_list = os.listdir(helmet_data_directory)
print('Count of file :', len(file_list))

import random
sampled_helmet_file_list = random.sample(file_list, int(len(file_list) * 0.4))
print('Count of sampled file :', len(sampled_helmet_file_list))

# 이제 sample 된 데이터를 /content/gdrive/"My Drive"/data/helmetclassification/test/helmet 으로 옮겨 줄 차례입니다.
import shutil
target_dir = os.path.join(os.path.join(data_dir, 'test'), 'helmet')
for f in sampled_helmet_file_list :
  current_file = os.path.join(helmet_data_directory, f)
  shutil.move(current_file, os.path.join(target_dir, f))
  # print(current_file,'\t->\t',os.path.join(target_dir, f))

# 이제 test 데이터를 모두 옮겼으니, train 데이터도 모두 옮겨 줍시다.
%cd /content/gdrive/"My Drive"/data/helmetclassification/
!mv helmet/ train/
!ls

# /content/gdrive/"My Drive"/data/helmetclassification/non_helmet 경로를 작업할 차례입니다.
%cd /content/gdrive/"My Drive"/data/helmetclassification/non_helmet

# 해당 경로는, 다양한 경로로부터 사람의 이미지를 취득했기에, 이를 모두 모아주는 작업이 필요합니다.
file_list = os.listdir(os.getcwd())
print(file_list)

# 내부에 MACOS 용 파일과 자잘한 더미 파일이 존재하니 이를 제거해 줍니다.
# 또한, pedestrian dataset 은 noise 가 있을 수 있기 때문에, 이를 모두 제거해야 합니다.
!rm -r __MACOSX
!rm -r pedestrian
!ls

%cd /content/gdrive/"My Drive"/data/helmetclassification/non_helmet

file_list = []
for (root, dirs, files) in os.walk(os.getcwd()):
  print("root : " + root)
  if len(files) > 0:
    for file_name in files:
      if file_name[-3:] in ['jpg', 'png'] :
        file_full_path = os.path.join(root, file_name)
        # print('append to file_list : ', file_full_path)
        file_list.append(file_full_path)


import random
sampled_nonhelmet_file_list = random.sample(file_list, int(len(file_list) * 0.4))
print('Count of sampled file :', len(sampled_nonhelmet_file_list), 'out of', len(file_list))

# 이제 sample 된 데이터를 /content/gdrive/"My Drive"/data/helmetclassification/test/helmet 으로 옮겨 줄 차례입니다.
import shutil
target_dir = os.path.join(os.path.join(data_dir, 'test'), 'non_helmet')
for cnt, f in enumerate(sampled_nonhelmet_file_list) :
  # 새로운 임시 이름 (cnt) + 확장자 (.xxx) 로 새로운 이름을 부여
  postfix = str(cnt) + f[-4:] 
  shutil.move(f, os.path.join(target_dir, postfix))
  # print('[test data]', f,'\t->\t',os.path.join(target_dir, postfix))

# test 데이터를 모두 옮겼으니, train 데이터도 모두 옮겨 줍시다.
cnt = 0
target_dir = os.path.join(os.path.join(data_dir, 'train'), 'non_helmet')
for f in file_list:
  if os.path.isfile(f):
    postfix = str(cnt) + f[-4:]
    shutil.move(f, os.path.join(target_dir, postfix))
    # print('[train data]', f,'\t->\t',os.path.join(target_dir, postfix))
    cnt += 1

# 원래 존재하던 폴더를 지워 버립시다.
%cd /content/gdrive/"My Drive"/data/helmetclassification/
!rm -r non_helmet
!ls


# 이미지의 size 를 재기 위해, PIL 을 import 합니다
from PIL import Image


%cd /content/gdrive/"My Drive"/data/helmetclassification/train/helmet/
file_list = os.listdir(os.getcwd())
remove_candidate_helmet_training_file_list = []
cnt = 0
for i, img_name in enumerate(file_list):
  print(i, '/', len(file_list))
  img = Image.open(img_name)
  if img.size[0] * img.size[1] < IMG_WIDTH * 0.4  * IMG_WIDTH * 0.4 : # 이미지의 해상도가 input 해상도의 일정 수준이라면. 
                                                                      # 0.4 로 잡은 이유는, nonhelmet data 의 개수와 비슷하게 맞추기 위함입니다.
    cnt+=1
    remove_candidate_helmet_training_file_list.append(img_name)
print("low resolution ratio : ", cnt, '/', len(file_list))

%cd /content/gdrive/"My Drive"/data/helmetclassification/test/helmet/
file_list = os.listdir(os.getcwd())
remove_candidate_helmet_testing_file_list = []
cnt = 0
for i, img_name in enumerate(file_list):
  print(i, '/', len(file_list))
  img = Image.open(img_name)
  if img.size[0] * img.size[1] < IMG_WIDTH * 0.4  * IMG_WIDTH * 0.4 :
    cnt+=1
    remove_candidate_helmet_testing_file_list.append(img_name)    
print("low resolution ratio : ", cnt, '/', len(file_list))


%cd /content/gdrive/"My Drive"/data/helmetclassification/train/non_helmet/
file_list = os.listdir(os.getcwd())
remove_candidate_nonhelmet_training_file_list = []
cnt = 0
for i, img_name in enumerate(file_list):
  print(i, '/', len(file_list))
  img = Image.open(img_name)
  if img.size[0] * img.size[1] < IMG_WIDTH * 0.4  * IMG_WIDTH * 0.4 : # 이미지의 해상도가 input 해상도의 일정 수준이라면. 
                                                                      # 0.4 로 잡은 이유는, nonhelmet data 의 개수와 비슷하게 맞추기 위함입니다.
    cnt+=1
    remove_candidate_nonhelmet_training_file_list.append(img_name)
print("low resolution ratio : ", cnt, '/', len(file_list))

%cd /content/gdrive/"My Drive"/data/helmetclassification/test/non_helmet/
file_list = os.listdir(os.getcwd())
remove_candidate_nonhelmet_testing_file_list = []
cnt = 0
for i, img_name in enumerate(file_list):
  print(i, '/', len(file_list))
  img = Image.open(img_name)
  if img.size[0] * img.size[1] < IMG_WIDTH * 0.4  * IMG_WIDTH * 0.4 :
    cnt+=1
    remove_candidate_nonhelmet_testing_file_list.append(img_name)    
print("low resolution ratio : ", cnt, '/', len(file_list))


import os
%cd /content/gdrive/"My Drive"/data/helmetclassification/train/helmet/
for img_name in remove_candidate_helmet_training_file_list :
  os.remove(img_name)

%cd /content/gdrive/"My Drive"/data/helmetclassification/test/helmet/
for img_name in remove_candidate_helmet_testing_file_list :
  os.remove(img_name)


%cd /content/gdrive/"My Drive"/data/helmetclassification/train/non_helmet/
for img_name in remove_candidate_nonhelmet_training_file_list :
  os.remove(img_name)

%cd /content/gdrive/"My Drive"/data/helmetclassification/test/non_helmet/
for img_name in remove_candidate_nonhelmet_testing_file_list :
  os.remove(img_name)