-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapplyRegexToFolder.py
145 lines (130 loc) · 3.95 KB
/
applyRegexToFolder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Import all dependencies
import re, os, difflib, sys, getopt
import locale
inputFolder = ''
outputFolder = ''
regex = ''
replacement = ''
parsedFile = ''
createdFile = ''
isDiffEnabled = False
isRegexFromFile = False
isDotMatchingAll = False
regexFile = ''
silentMode = False
def usage():
print('Usage : python applyRegexToFolder.py [-d] [-r filename]')
print(' Copies all files inside the input folder into the output folder,')
print(' and modified their contents by applying the given regex.')
print(' -d : compute diff between the previous version and the parsed one')
print(' -r filename : take the regex from the contents of a file, specified after the -r')
def parseOptions():
global isDiffEnabled
global isRegexFromFile
global regexFile
try:
optlist, args = getopt.getopt(sys.argv[1:], 'dr:')
except getopt.GetoptError as err:
print(str(err))
usage()
sys.exit(2)
for option, value in optlist:
if option == '-d':
isDiffEnabled = True
if option == '-r':
isRegexFromFile = True
regexFile = value
def readInput():
global inputFolder
global outputFolder
global regex
global replacement
inputFolder = input('>> Input folder : ')
outputFolder = input('>> Output folder : ')
if not isRegexFromFile:
regex = input('>> Regex : ')
else:
file = open(regexFile, 'r')
regex = file.read()
file.close()
replacement = input('>> Replacement : ')
def getFileContents(dirname, filename):
global parsedFile
parsedFile = os.path.join(dirname, filename)
contents = ''
file = open(parsedFile, 'rb')
contents = file.read()
file.close()
try:
contents = contents.decode('utf-8')
except UnicodeDecodeError:
try:
contents = contents.decode(locale.getpreferredencoding(False))
except:
print('Error while parsing file' + parsedFile)
contents = ''
return contents
def createParsedOutput(oldContents, filename):
global createdFile
global isDotMatchingAll
flags = 0
if isDotMatchingAll:
flags = re.DOTALL
newContents = re.sub(regex, replacement, oldContents, flags=flags)
if newContents != oldContents:
outputFile = outputFolder + parsedFile[len(inputFolder):len(parsedFile)-len(filename)]
if not os.path.exists(outputFile):
os.mkdir(outputFile)
createdFile = outputFolder + parsedFile[len(inputFolder):len(parsedFile)]
file = open(createdFile, 'wb')
file.write(newContents.encode('utf-8'))
file.close()
return newContents
def computeDiff(oldContents, newContents):
global createdFile
differObject = difflib.Differ()
sCompareOld = oldContents.splitlines(True)
sCompareOld[len(sCompareOld)-1] += '\n'
sCompareNew = newContents.splitlines(True)
sCompareNew[len(sCompareNew)-1] += '\n'
result = list(differObject.compare(sCompareOld, sCompareNew))
file = open(createdFile + '.diff', 'w')
file.write(parsedFile + ' -> ' + createdFile)
file.writelines(result)
file.close()
def setParameters(_inputFolder, _outputFolder, _regex, _replacement, _isDotMatchingAll, _silentMode = False):
global inputFolder
global outputFolder
global regex
global replacement
global isDotMatchingAll
global silentMode
inputFolder = _inputFolder
outputFolder = _outputFolder
regex = _regex
replacement = _replacement
isDotMatchingAll = _isDotMatchingAll
silentMode = _silentMode
extFilter = None
def setExtensionFilter(_extFilter):
global extFilter
extFilter = _extFilter
def run():
# Browse all files and subfolders
for dirname, dirnames, filenames in os.walk(inputFolder):
# Browse all files in current subfolder
for filename in filenames:
if not extFilter or os.path.splitext(filename)[1][1:] in extFilter:
oldContents = getFileContents(dirname, filename)
if oldContents != '':
newContents = createParsedOutput(oldContents, filename)
if not silentMode:
print('Processed ' + os.path.join(dirname, filename))
if isDiffEnabled:
computeDiff(oldContents, newContents)
elif not silentMode:
print('Ignored ' + os.path.join(dirname, filename))
parseOptions()
if __name__ == "__main__":
readInput()
run()