-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmp2docx.py
157 lines (133 loc) · 5.45 KB
/
mp2docx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
""""
Convert MP(xml) to docx
"""
class MpXml2Docx:
"""
Преобразователь xml межевого в печатный вид (word)
"""
CNST_FORMAT = 'docx'
CNST_PATH_TPL = 'template/common/'
def __init__(self):
self.name_number = 0
self.tempfolder = tempfile.mkdtemp()
def close(self):
"""
удаление темповой директории
"""
if os.path.exists(self.tempfolder):
shutil.rmtree(self.tempfolder)
def fast_iter_element(self, elem: object, func: object, args: object = [], kwargs: object = {}) -> object:
"""
the node cleaning
:param context: context
:param func: callback - renderToTPL
:param args: args
:param kwargs: kwargs
:return: None
:rtype: XMLElemet
"""
func(elem, *args, **kwargs)
elem.clear()
while elem.getprevious() is not None:
if type(elem.getprevious()) == etree._Element:
if elem.getparent() is not None:
del elem.getparent()[0]
else:
break
def render_tpl(self, node, XMLClass, path_tpl, name_result):
"""
Рендер шаблона
:param node: узел- noda
:param XMLClass: класс отвечающий за парсинг данной ноды в dict (to_dict)
:param path_tpl: путь до template
:return: word файл с наименованием = [Number - позиция word- элемента в файле]+ [Number - позиция node].docx
"""
try:
if len(node) > 0 or node.text:
tpl = DocxTemplate(path_tpl)
instance = XMLClass(node)
tpl.render(instance.to_dict())
file_res = '.'.join([name_result, self.CNST_FORMAT])
tpl.save(os.path.join(self.tempfolder, file_res))
except Exception as e:
pass
def run_render_tpl_node(self, elem, xml_class_name, is_clean, pos_node):
"""
Запуск парсинга определенного блока xml
:param elem: node
:param xml_class_name: class -> retun dict
:param is_clean: очищать узел или там еще что то нужно
:param pos_node: просто порядковый номер позици узла
:return: docx
"""
dir = os.path.dirname(__file__)
path_tpl = os.path.normpath(os.path.join(dir,self.CNST_PATH_TPL + BINDER_FILE[elem.tag]['tpl']))
if is_clean:
self.fast_iter_element(elem, self.render_tpl, args=(xml_class_name,
path_tpl,
BINDER_FILE[elem.tag]['pos_doc'] + str(pos_node)))
else:
self.render_tpl(elem, xml_class_name, path_tpl,BINDER_FILE[elem.tag]['pos_doc'] + str(pos_node))
def __context_parser(self, context):
"""
Парсим node
:param context:
"""
i = 0
try:
for event, elem in context:
i += 1
if elem.tag in BINDER_FILE.keys() and event == 'end': #пришлось по end, так iterparse может отдать не все
if elem.tag == 'SubParcels' and event == 'end' and elem.getparent().tag != 'Package':
continue
self.run_render_tpl_node(elem, BINDER_FILE[elem.tag]['class'], BINDER_FILE[elem.tag]['clear'], i)
except Exception as e:
pass
finally:
del context
def __xml_block_to_docx(self, path):
"""
Формирование списка док. файлов по блокам xml
:param path: путь до xml файла
"""
# get an iterable
context = iterparse(path, events=("start", "end"))
context = iter(context)
self.__context_parser(context)
del context
def __element_body_docx(self, path):
"""
:param path: получить блок ворд -файла
:return: element docx
"""
# Don't add a page break if you've
# reached the last file.
doc = Document(path)
doc.add_page_break()
for element in doc.element.body:
yield element
def combine_word_documents(self, result_path_file):
"""
Собираем все файлы в единый документ
:param result_path_file: iterable список файлов
"""
files = sorted(os.listdir(self.tempfolder))
_dcx = filter(lambda x: x.endswith('.' + self.CNST_FORMAT), files)
_dcx = map(lambda x: os.path.join(self.tempfolder, x), _dcx)
merged_document = Document()
for filnr, file in enumerate(_dcx):
_ = os.path.join(file)
if filnr == 0:
merged_document = Document(_)
else:
for element in self.__element_body_docx(_):
merged_document.element.body.append(element)
merged_document.save(result_path_file)
def run(self, path_file, result_file):
"""
run convert xml to word
:param path_file: sourse file xml
:param result_file: path file resultc
"""
self.__xml_block_to_docx(path_file)
self.combine_word_documents(result_file)