-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorganize.py
488 lines (339 loc) · 17.2 KB
/
organize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
import os,sys,shutil,string,argparse
# import urllib.request
def n():
print(end="\n\n")
def dash():
print("=="*30)
if (plat:=sys.platform.lower()) == "windows":
loc = str(input("Enter the path to arange >> "))
elif plat == 'linux':
loc = "/home/haxsys/downloadscopy2/Downloads"
else:
print("Does not support your stupid platform")
exit(1)
# # ===================================== Handling Arguments and other involved function============================================
parser = argparse.ArgumentParser()
# Adding all the required arguments
parser.add_argument(
'-m', '--file-match', type = str,help='accepts a pathlike string print outs a dictionary of which the key is the matching word and the values is the list of matched files')
parser.add_argument(
'-p', '--path', type = str,help='accepts a pathlike string (will be used for all as input for other argument')
parser.add_argument(
'-c', '--create_ext_dir',
help='create folders in relation to the existing file extension in the path and moves the related files to the newly created folder \ne.g example.mp4 will cause an "mp4files" folder to be created ',
action='store_true')
parser.add_argument(
'-obr', '--organize_by_relation',
help='create a folder according to matching files and organize them i.e the ouput from the "-match/-m" ',
action = 'store_true')
parser.add_argument('-obe', '--organize_by_extension',
help='uses the ced function to make extension dirs and uses the obr fuction to arange the files by name',
action = 'store_true')
parser.add_argument('-d', '--delete_ext_dir',
help='Empty the extension dirs created and deletes it',
action = 'store_true')
parser.add_argument('-de', '--delete_ext_dirtype',type=str,
help='Used to delete specific ext_dir e.g mp4->mp4files')
parser.add_argument('-ce', '--create_ext_dirtype',type=str,
help='Used to create specific ext_dir e.g html->htmlfiles')
# parser.add_argument('-a', '--about',
# help='Outputs information on the anime in html format')
# parser.add_argument('-ad', '--autodriver', type=str,
# help='Automatically download chromedriver if not installed(works on all platforms)')
args = parser.parse_args()
print(args.__dict__)
marg = args.file_match
parg = args.path
darg = args.delete_ext_dir
dearg = args.delete_ext_dirtype
carg = args.create_ext_dir
cearg = args.create_ext_dirtype
obrarg = args.organize_by_relation
obearg = args.organize_by_extension
# ======================================================== End of argument handling ===========================================
# def download_words():
# url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
# filename = "allwords.txt"
# if not os.path.exists(filename):
# urllib.request.urlretrieve(url, filename)
# def is_valid(word: str):
# download_words() if "allwords.txt" not in os.listdir(os.getcwd()) else None
# with open("allwords.txt", "r") as f:
# english_words = set(f.read().splitlines())
# return word.lower() in english_words
# Function for creating the extension-based folder
def create_ext_dir(path: str, ext_type: str = "", show_existing_dirs: bool = False):
"""
Create folders for file extensions in the given directory and move files into them.
Args:
- path (str): The path of the directory in which the extension folders will be created.
- ext_type (str, optional): If provided, only create a folder for this file extension type.
Defaults to an empty string.
- show_existing_dirs (bool, optional): If True, return a sorted list of existing extension folders
in the directory. Defaults to False.
Returns:
Union[List[str], str]: If show_existing_dirs is True, return a sorted list of existing extension folders
in the directory. If ext_type is provided, return a string with a message indicating that the
extension folder was created and files were moved into it. Otherwise, return a list of all extension
folders created in the directory.
"""
os.chdir(path)
just_files = [file for file in os.listdir(path) if os.path.isfile(file) if not file.startswith('.')]
ext_folders = set([f"{((os.path.splitext(file))[1]).removeprefix('.')}files" for file in just_files])
#The 'ext_folders' variable above is the summary of this code only that it is in a list
# And yes i am aware i am obsessed with list comprehension
# for self_file in normal_files:
# name,ext = os.path.splitext(self_file)
# raw_ext = ext.removeprefix('.')
# folder_names = f"{raw_ext}files"
# return set(folder_names)
# return list of all the available extention based folder in the directory
if show_existing_dirs:
all_dirs= [dirs for dirs in os.listdir(path) if os.path.isdir(dirs) if dirs.endswith("files") and len(dirs)<=20]
return sorted(all_dirs)
# used to create just one type of extention folder
if bool(ext_type):
for file in just_files:
splfile = os.path.splitext(file)
if bool(splfile[1]) == False:
just_files.remove(file)
continue
rawext = splfile[1].removeprefix('.')
if rawext == ext_type:
extfpath = f"{path}/{rawext}files"
filepath = f"{path}/{file}"
#creating ext dir
if os.path.exists(extfpath):
#moving proper files into it
try:
shutil.move(filepath,extfpath)
print(f"Moved {file} to {extfpath}",end="\n\n")
except shutil.Error:
pass
else:
os.mkdir(extfpath)
print(f"Created {extfpath} in {path}\n\n")
shutil.move(filepath,extfpath)
print(f"Moved {file} to {extfpath}",end="\n\n")
return f"Created '{ext_type}files' directory in {path} and moved all {ext_type} files into it"
for file in just_files:
splfile = os.path.splitext(file)
if bool(splfile[1]) == False:
just_files.remove(file)
continue
rawext = splfile[1].removeprefix('.')
extdirpath = f"{path}/{rawext}files"
filepath = f"{path}/{file}"
#creating ext dir
if os.path.exists(extdirpath):
#moving proper files into it
if filepath == f"{path}/organize.py":
continue
shutil.move(filepath,extdirpath)
print(f"Moved {file} to {extdirpath}",end="\n\n")
else:
os.mkdir(extdirpath)
print(f"Created {extdirpath} in {path}\n\n")
shutil.move(filepath,extdirpath)
print(f"Moved {file} to {extdirpath}",end="\n\n")
# print(f"Created the extension folders in {path}\n\n")
all_dirs = [dirs for dirs in os.listdir(path) if os.path.isdir(dirs) if dirs.endswith("files") and len(dirs)<=15]
return all_dirs
# Deleting existing extension folders
def delete_ext_dir(path: str,ext_type: str = "",all: bool = False):
"""
Delete extension-based folders in the specified path.
Args:
- path: A string representing the path where the folders will be deleted.
- ext_type: A string representing the extension keyword of the folder to delete. Default is an empty string.
- all: A boolean value indicating whether to delete all extension-based folders. Default is False.
Returns:
- A string message indicating that the specified or all extension-based folders have been deleted.
The function first creates a dictionary of extension types and their corresponding folder names.
It then attempts to delete the specified or all extension-based folders.
If the ext_type argument is not an empty string, the function attempts to delete the folder corresponding to the
specified extension type. If the folder exists, the function deletes the folder and returns a message indicating that
the folder was deleted.
If the all argument is True, the function iterates over the keys in the extension directory dictionary and attempts
to delete each corresponding folder. If a folder exists, the function deletes the folder and prints a message
indicating that the folder was deleted. The function then returns a message indicating that all extension-based
created folders have been deleted.
"""
os.chdir(path)
only_dirs = [file for file in os.listdir(path) if os.path.isdir(file) and (file.endswith("files") and len(file)<=25)]
only_files = [file for file in os.listdir() if os.path.isfile(file)]
exts_from_dir = [ext.replace("files","") for ext in only_dirs]
exts_from_file= set([(os.path.splitext(file)[1]).removeprefix('.') for file in only_files])
ext_dir_dict = {}
for ext in exts_from_dir:
for dirs in only_dirs:
if ext != " \/-_.," and dirs.startswith(ext):
ext_dir_dict[ext] = dirs
else:
pass
# Now for the main reason of the function
if bool(ext_type) == True:
if ext_type in exts_from_dir:
path_to_del = f"{path}/{ext_dir_dict[ext_type]}"
for files in os.listdir(path_to_del):
shutil.move(f"{path_to_del}/{files}",path)
print(f"moved {path_to_del}/{files} to {path}\n")
if os.path.exists(path_to_del):
shutil.rmtree(path_to_del)
print(f'Deleted {ext_dir_dict[ext_type]} from the directory\n')
else:
print(f'There is no "{ext_type}" based dir in {path}, it must have been deleted.')
elif all == True:
if bool(exts_from_dir):
for ext in exts_from_dir:
print(ext)
delpath = f"{path}/{ext_dir_dict[ext]}"
for files in os.listdir(delpath):
if not os.path.exists(files):
shutil.move(f"{delpath}/{files}",path)
print(f"moved {delpath}/{files} to {path}\n")
else:
pass
if os.path.exists(delpath):
shutil.rmtree(delpath)
print(f'Deleted {ext_dir_dict[ext]} from the directory\n')
print(f"\nDeleted all ext based folders\n")
else:
if bool(ext_dir_dict):
print(ext_dir_dict,end="\n\n")
print("\nChoose an extension keyword to delete the folder by specifying the 'ext_type' argument in the function.\n")
else:
print("\nNo ext based folder in path\n")
# file Grouping algorithm
def similar_file_group(path: str,substr_len: int = 3):
"""
Groups similar filenames in a directory based on a common substring of a certain length.
Args:
- path (str): The path to the directory containing the files.
- substr_len (int, optional): The minimum length of the common substring required to group files together.
Defaults to 5.
Returns:
dict: A dictionary containing groups of similar filenames, grouped by their common substrings.
"""
files_with_ext = [file for file in sorted(os.listdir(path)) if '.' in file and not file.startswith('.')]
asciilettterandspace = string.ascii_letters + " "
similarfdict = {}
similarfdict = {}
for i in range(len(files_with_ext)-1):
curr = str(files_with_ext[i])
similar_grp = set()
for j in range(i+1,len(files_with_ext)-1):
currcomp = str(files_with_ext[j])
common_sub = ""
for i in range(min(len(curr),len(currcomp))):
if curr[i].lower() == currcomp[i].lower() and curr[i] in asciilettterandspace:
if common_sub.count(' ') > 1:
break
common_sub += curr[i].lower()
else:
break
if len(common_sub) >= 3 :
similar_grp.add(curr)
similar_grp.add(currcomp)
if common_sub not in similarfdict:
similarfdict[common_sub] = list(similar_grp)
else:
similarfdict[common_sub].extend(similar_grp)
similarfdict[common_sub]=list(set(similarfdict[common_sub]))
return similarfdict
def organize_by_relation(dstpath: str = str(os.getcwd()),group_len: int=3):
"""
Group and organize files in the specified destination path based on their filename.
Args:
- dstpath: A string representing the path to be organized. Default is the current working directory
- group_len: An integer representing the length of the group in which the files will be organized. The default value is 4.
Returns:
- A string message indicating that the files moving process is complete.
The function uses the similar_file_group function to group similar files in the destination path by a specified
substring length. If no files are grouped, the function will print a message indicating that files must have been
grouped and return 0.
If files are grouped, the function will iterate over the grouped files and create a new folder for each group using
the keyword in the file name. The function checks if the folder already exists; if it does, the function prints a
message indicating that the folder exists. If the folder does not exist, the function creates a new folder and prints
a message indicating that the folder was created.
After creating the folder, the function iterates over the files in the group, moves them to the corresponding folder,
and prints a message indicating that the file was moved. Once all files have been moved, the function returns a message
indicating that the files moving process is complete.
Note: The function uses the shutil module to move files and the os module to check if a folder exists and create a new
folder.
"""
grouped_files_dict = similar_file_group(dstpath,substr_len=group_len)
if bool(grouped_files_dict) == False:
print(f"Files must have been grouped check {dstpath} for the groups\n")
return 0
dictitems = list(grouped_files_dict.items())
# print(dictitems)
for sets in dictitems:
keywordspath =f"{dstpath}/{sets[0]}"
# print(keywordspath)
if os.path.exists(keywordspath):
print(f"{keywordspath} exists\n")
else:
os.mkdir(keywordspath)
print(f"created {keywordspath}\n")
for i in range(len(sets[1])):
valspath = f"{dstpath}/{sets[1][i]}"
shutil.move(valspath,keywordspath)
print(f"moved file {sets[1][i]} -> {keywordspath}\n")
print("Files moving complete")
def organize_by_extension(path: str = os.getcwd(),sublen: int = 3,group: bool = False):
"""
Organizes files in the specified directory by their file extension.
Args:
path (str): The directory path to organize. Defaults to the current working directory.
sublen (int): The length of the subdirectory names for each file extension. Defaults to 3.
group (bool): Whether to use group related files. Defaults to False.
Returns:
None
Raises:
OSError: If the specified path does not exist.
ValueError: If the specified sublen is less than 1.
"""
os.chdir(path)
ext_dirs = create_ext_dir(path)
if group:
for exts in ext_dirs:
extpath = f"{path}/{exts}"
organize_by_relation(dstpath=extpath)
else:
for ext in ext_dirs:
print(f"Created {ext} in {path}\n")
#=========================== ARGUMENT HANDLING =============================
if marg:
result = similar_file_group(marg)
print(result)
else:
pass
if carg:
ced = create_ext_dir(parg)
print(ced)
else:
pass
if cearg:
ced = create_ext_dir(parg,ext_type=cearg)
print(ced)
else:
pass
if obrarg:
obr = organize_by_relation(parg)
print(obr)
else:
pass
if obearg:
obe = organize_by_extension(parg,group=True)
print(obe)
else:
pass
if darg:
delete_ext_dir(parg,all=True)
else:
pass
if dearg:
delete_ext_dir(parg,ext_type=dearg)
else:
pass