-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathc2d.py
661 lines (606 loc) · 19.7 KB
/
c2d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
#!/usr/bin/env python
#-*- coding: utf8 -*-
import os
from optparse import OptionParser
# global variable
VERBOSE = 0
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
# -------------------------------------------------------------------------
# build tree
# -------------------------------------------------------------------------
def next_paren(tokens, i) :
'''
tokens[i]에서 시작해서 다음 '(' 혹은 ')'의 위치를 탐색
못찾은 경우 return -1
'''
j = 0
found = False
for token in tokens[i:] :
if token == '(' or token == ')' :
found = True
break
j += 1
if found : return i + j
return -1
def node_string(node, enable_eoj=True) :
if node['leaf'] :
if enable_eoj :
return '(' + node['label'] + ' ' + node['eoj'] + '/' + str(node['eoj_idx']) + ' ' + node['morphs'] + ')'
else :
return '(' + node['label'] + ' ' + node['morphs'] + ')'
else :
return '(' + node['label'] + ')'
def create_node(tokens, i, j) :
'''
i ~ j까지가 label,morphs 영역
i + 1 = j : label
ex) '( NP ('
i j
i + 1 < j : label,morphs
ex) '( NP_MOD 프랑스/NNP+의/JKG )'
i j
'''
node = {'lchild':{}, 'rchild':{}, 'parent':{}, 'sibling':{}}
if i + 1 == j :
node['label'] = tokens[i]
node['leaf'] = False
return node
elif i + 1 < j :
node['label'] = tokens[i]
node['morphs'] = tokens[i+1]
node['leaf'] = True
node['nleaf'] = {}
node['pleaf'] = {}
return node
else :
return None
def make_edge(top, node) :
if not top['lchild'] : # link to left child
top['lchild'] = node
node['parent'] = top
if VERBOSE : print node_string(top) + '-[left]->' + node_string(node)
elif not top['rchild'] : # link to right child
top['rchild'] = node
node['parent'] = top
top['lchild']['sibling'] = node
if VERBOSE : print node_string(top) + '-[right]->' + node_string(node)
else :
return False
return True
def make_leaf_edge(node, history, depth=0) :
'''
tree의 leaf간 next,prev link 연결
즉, node['nleaf'], node['pleaf'] 설정
'''
if node['leaf'] :
length = len(history)
if length != 0 :
prev = history[-1]
prev['nleaf'] = node
node['pleaf'] = prev
history.append(node)
if node['lchild'] :
make_leaf_edge(node['lchild'], history, depth+1)
if node['rchild'] :
make_leaf_edge(node['rchild'], history, depth+1)
def build_tree(sent, tokens) :
'''
sent = ; 프랑스의 세계적인 의상 디자이너 엠마누엘 웅가로가 실내 장식용 직물 디자이너로 나섰다.
tokens = ( S ( NP_SBJ ( NP ( NP_MOD 프랑스/NNP+의/JKG ) \
( NP ( VNP_MOD 세계/NNG+적/XSN+이/VCP+ᆫ/ETM ) ( NP ( NP 의상/NNG ) ( NP 디자이너/NNG ) ) ) ) \
( NP_SBJ ( NP 엠마누엘/NNP ) ( NP_SBJ 웅가로/NNP+가/JKS ) ) ) \
( VP ( NP_AJT ( NP ( NP ( NP 실내/NNG ) ( NP 장식/NNG+용/XSN ) ) ( NP 직물/NNG ) ) \
( NP_AJT 디자이너/NNG+로/JKB ) ) ( VP 나서/VV+었/EP+다/EF+./SF ) ) )
'''
err = ' '.join(tokens)
root = {'lchild':{}, 'rchild':{}, 'parent':{}, 'sibling':{}, 'leaf':False, 'label':'ROOT'}
stack = []
stack.append(root)
max = len(tokens)
i = 0
eoj_idx = 1
eoj_max = len(sent)
while i < max :
token = tokens[i]
if token == '(' : # create node and push
j = next_paren(tokens, i+1)
if j == -1 or i+1 == j :
sys.stderr.write("ill-formed parentheses[1] : %s\n" % (err))
return None
node = create_node(tokens, i+1, j)
if not node : return None
# assign eoj/eoj_idx to leaf node
if node['leaf'] :
if eoj_idx >= eoj_max :
sys.stderr.write("not aligned sentence %s : %s\n" % (' '.join(sent), err))
return None
node['eoj'] = sent[eoj_idx]
node['eoj_idx'] = eoj_idx
eoj_idx += 1
if VERBOSE : print node_string(node)
# push to stack
stack.append(node)
if token == ')' :
# pop and make edge
if len(stack) == 0 :
sys.stderr.write("ill-formed parentheses[2] : %s\n" % (err))
return None
node = stack.pop()
if len(stack) == 0 :
sys.stderr.write("ill-formed parentheses[3] : %s\n" % (err))
return None
top = stack[-1]
if not make_edge(top, node) :
sys.stderr.write("can't make edge : %s\n" % (err))
return None
i += 1
if len(stack) == 1 and stack[-1]['label'] == 'ROOT' :
history = []
make_leaf_edge(root['lchild'], history, depth=0)
return root
else :
sys.stderr.write("build failure : %s\n" % (err))
return None
# -------------------------------------------------------------------------
# -------------------------------------------------------------------------
# preprocessing
# -------------------------------------------------------------------------
def modify_illformed_1(tokens) :
# ex) '( NP ( NP ( NP ( NP+포로/NNG )'
# '(' 다음이 label인데 '+'가 포함되어 있으면 처음 '+'만 공백으로
n_tokens = []
max = len(tokens)
i = 0
while i < max :
token = tokens[i]
if token == '(' :
n_tokens.append(token)
if '+' in tokens[i+1] :
t_list = tokens[i+1].split('+')
n_tokens.append(t_list[0]) # label
n_tokens.append(''.join(t_list[1:])) # morphs
i += 1
else :
n_tokens.append(token)
i += 1
return n_tokens
def tokenize(bucket) :
'''
* 다루기 쉽도록 공백으로 분리된 token 단위로 변환한다.
예) bucket
; 프랑스의 세계적인 의상 디자이너 엠마누엘 웅가로가 실내 장식용 직물 디자이너로 나섰다.
(S (NP_SBJ (NP (NP_MOD 프랑스/NNP + 의/JKG)
(NP (VNP_MOD 세계/NNG + 적/XSN + 이/VCP + ᆫ/ETM)
(NP (NP 의상/NNG)
(NP 디자이너/NNG))))
(NP_SBJ (NP 엠마누엘/NNP)
(NP_SBJ 웅가로/NNP + 가/JKS)))
(VP (NP_AJT (NP (NP (NP 실내/NNG)
(NP 장식/NNG + 용/XSN))
(NP 직물/NNG))
(NP_AJT 디자이너/NNG + 로/JKB))
(VP 나서/VV + 었/EP + 다/EF + ./SF)))
'''
sent = bucket[0].split()
if sent[0] != ';' : return None,None
paren_parse = ' '.join([s.strip('\t').replace('\t',' ') for s in bucket[1:]])
paren_parse = paren_parse.replace(' + ','+')
paren_parse = paren_parse.replace('(/','^[/').replace(')/','^]/')
paren_parse = paren_parse.replace('(',' ( ').replace(')',' ) ')
paren_parse = paren_parse.replace('^[/','(/').replace('^]/',')/')
paren_parse = paren_parse.replace('+ ','+')
tokens = paren_parse.split()
tokens = modify_illformed_1(tokens)
if VERBOSE : print ' '.join(tokens)
return sent, tokens
# -------------------------------------------------------------------------
# -------------------------------------------------------------------------
# tree traversal
# -------------------------------------------------------------------------
def tree2tokens(node, tokens, depth=0) :
'''
입력을 tree로 변환하기 전 tokenizing 했는데,
여기서는 tree를 가지고 역으로 tokenizing 결과를 만든다.
'''
if node['leaf'] :
tokens.append('(')
tokens.append(node['label'])
tokens.append(node['morphs'])
tokens.append(')')
else :
tokens.append('(')
tokens.append(node['label'])
if node['lchild'] :
tree2tokens(node['lchild'], tokens, depth=depth+1)
if not node['rchild'] :
tokens.append(')') # closed
if node['rchild'] :
tree2tokens(node['rchild'], tokens, depth=depth+1)
tokens.append(')') # closed
def modify_morphs(morphs) :
try :
t_morphs = morphs.replace('++/','+\t/') # + -> tab
t_morphs = t_morphs.replace('+',' + ')
t_morphs = t_morphs.replace('\t','+') # tab -> +
except :
return morphs
return t_morphs
def tree2con(node, tokens, history, depth=0) :
'''
입력을 tree로 변환했다면, 여기서 다시
tree를 입력과 같은 형태(constituent, phrase structure)로 출력한다.
'''
if depth == 0 : prev_node = None
else : prev_node = history[-1]
if prev_node and prev_node['leaf'] : # 바로 전에 leaf를 찍었다면
tokens.append('\n')
for i in xrange(depth) :
tokens.append('\t')
if node['leaf'] :
tokens.append('(' + node['label'] + ' ' + modify_morphs(node['morphs']) + ')')
else :
tokens.append('(' + node['label'] + '\t')
history.append(node)
if node['lchild'] :
tree2con(node['lchild'], tokens, history, depth+1)
if not node['rchild'] :
tokens.append(')') # closed
if node['rchild'] :
tree2con(node['rchild'], tokens, history, depth+1)
tokens.append(')') # closed
def is_vx(gov_node) :
morphs = gov_node['morphs']
tokens = morphs.split('+')
if '/VX' in tokens[0] : return True
# VX는 아니지만 VX처럼 동작하는 용언, ex) '지니게 되다'
if '되/' in tokens[0] :
pleaf = None
if gov_node['pleaf'] : pleaf = gov_node['pleaf']
if pleaf :
morphs = pleaf['morphs']
tokens = morphs.split('+')
if '게/EC' in tokens[-1] : return True
if '면/EC' in tokens[-1] : return True
if '아도/EC' in tokens[-1] : return True
if '않/' in tokens[0] :
pleaf = None
if gov_node['pleaf'] : pleaf = gov_node['pleaf']
if pleaf :
morphs = pleaf['morphs']
tokens = morphs.split('+')
if '지/EC' in tokens[-1] : return True
return False
def is_vnp(morphs) :
tokens = morphs.split('+')
if len(tokens) <= 2 : return False
if '/NNB' in tokens[0] and '/VCP' in tokens[1] : return True
return False
def is_va(morphs) :
tokens = morphs.split('+')
# '/VV'로 잘못 태깅된 케이스도 커버
if '있/VA' in tokens[0] or \
'있/VV' in tokens[0] or \
'없/VA' in tokens[0] or \
'없/VV' in tokens[0] or \
'같/VA' in tokens[0] : return True
else : return False
def is_nnb(morphs) :
tokens = morphs.split('+')
if '/NNB' in tokens[0] : return True
return False
def is_etm(morphs) :
tokens = morphs.split('+')
if 'ᆫ/ETM' in tokens[-1] : return True
if '는/ETM' in tokens[-1] : return True
if 'ᆯ/ETM' in tokens[-1] : return True
if '을/ETM' in tokens[-1] : return True
if '를/ETM' in tokens[-1] : return True
return False
def check_vx_rule(gov_node) :
if not gov_node['parent'] : return False
if not gov_node['parent']['lchild'] : return False
if not is_vx(gov_node) : return False
return True
def check_vnp_rule(gov_node) :
if not gov_node['parent'] : return False
if not gov_node['parent']['lchild'] : return False
# 'VNP 것/NNB + 이/VCP + 다/EF' 형태인지 검사
if not is_vnp(gov_node['morphs']) : return False
return True
def check_va_rule(gov_node) :
if not gov_node['parent'] : return False
if not gov_node['parent']['lchild'] : return False
# 'ㄹ NNB 있다/없다/같다' 형태인지 검사
# 'NNB'는 어절의 시작이 NNB이면 된다. 즉, '~ㄹ 수가 없다' 형태도 허용
if is_va(gov_node['morphs']) :
pleaf = None
if gov_node['pleaf'] : pleaf = gov_node['pleaf']
if pleaf and is_nnb(pleaf['morphs']) :
ppleaf = None
if pleaf['pleaf'] :
ppleaf = pleaf['pleaf']
if ppleaf and is_etm(ppleaf['morphs']) :
return True
return False
def find_for_vx_rule(node, gov_node) :
found = None
t_next = gov_node['parent']
while t_next :
# 새로운 지배소가 앞쪽에 있거나 같으면 안됨
if t_next['leaf'] and ('VP' in t_next['label'] or 'VNP' in t_next['label']) and t_next['eoj_idx'] > node['eoj_idx'] :
found = t_next
break
if t_next['lchild'] :
if 'S' in t_next['lchild']['label'] or 'VP' in t_next['lchild']['label'] or 'VNP' in t_next['lchild']['label'] :
t_next = t_next['lchild']
continue
if t_next['rchild'] :
if 'VP' in t_next['rchild']['label'] or 'VNP' in t_next['rchild']['label'] :
t_next = t_next['rchild']
continue
t_next = t_next['lchild']
return found
def find_for_vnp_rule(node, gov_node) :
found = None
t_next = gov_node['parent']
while t_next :
# 새로운 지배소가 앞쪽에 있거나 같으면 안됨
if t_next['leaf'] and ('VP' in t_next['label'] or 'VNP' in t_next['label']) and t_next['eoj_idx'] > node['eoj_idx'] :
# 새로운 지배소와 기존 지배소간 거리가 너무 멀어도 안됨
if abs(gov_node['eoj_idx'] - t_next['eoj_idx']) <= 3 :
found = t_next
break
if t_next['lchild'] :
if 'S' in t_next['lchild']['label'] or 'VP' in t_next['lchild']['label'] or 'VNP' in t_next['lchild']['label'] :
t_next = t_next['lchild']
continue
if t_next['rchild'] :
if 'VP' in t_next['rchild']['label'] or 'VNP' in t_next['rchild']['label'] :
t_next = t_next['rchild']
continue
t_next = t_next['lchild']
return found
def find_for_va_rule(node, gov_node, search_mode=1) :
found = None
if search_mode == 2 : # parent->parent 부터 탐색이 필요한 경우
t_next = gov_node['parent']
if t_next and t_next['parent'] :
t_next = t_next['parent']
else : # 일반적인 경우
t_next = gov_node['parent']
while t_next :
# 새로운 지배소가 앞쪽에 있거나 같으면 안됨
if t_next['leaf'] and ('VP' in t_next['label'] or 'VNP' in t_next['label']) and t_next['eoj_idx'] > node['eoj_idx'] :
# 새로운 지배소와 기존 지배소간 거리가 너무 멀어도 안됨
if abs(gov_node['eoj_idx'] - t_next['eoj_idx']) <= 3 :
found = t_next
break
t_next = t_next['lchild']
return found
def find_gov(node) :
'''
* node = leaf node
1. head final rule
- 현재 node에서 parent를 따라가면서
첫번째로 right child가 있는 node를 만나면
해당 node의 right child를 따라서 leaf node까지 이동
2. VX rule
- 보조용언을 governor로 갖는다면 본용언으로 바꿔준다.
- 보조용언은 아니지만 보조용언처럼 동작하는 용언도 비슷하게 처리한다. ex) '지니게 되다'
3. VNP rule
- 'VNP 것/NNB + 이/VCP + 다/EF' 형태를 governor로 갖는다면 앞쪽 용언으로 바꿔준다.
4. VA rule
- '있/VA, 없/VA, 같/VA'가 governor인 경우, 앞쪽에 'ㄹ NNB' 형태가 오면 앞쪽 용언으로 바꿔준다.
node['pleaf'] 링크를 활용한다.
'''
# 첫번째로 right child가 있는 node를 탐색
# sibling link를 활용한다.
next = node
found = None
while next :
if next['sibling'] :
found = next['sibling']['parent']
break
next = next['parent']
gov_node = None
if found :
# right child를 따라서 leaf node까지
next = found
while next :
if next['leaf'] :
gov_node = next
# -----------------------------------------------------------------
# gov_node가 vx rule을 만족하는 경우 parent->lchild를 따라간다.
if check_vx_rule(gov_node) :
new_gov_node = find_for_vx_rule(node, gov_node)
if new_gov_node : gov_node = new_gov_node
# gov_node가 vnp rule을 만족하는 경우 parent->lchild를 따라간다.
if check_vnp_rule(gov_node) :
new_gov_node = find_for_vnp_rule(node, gov_node)
if new_gov_node :
gov_node = new_gov_node
# 새로운 지배소가 '있다,없다,같다'인 경우
# check_va_rule을 한번 태워본다.
if check_va_rule(gov_node) :
new_gov_node = find_for_va_rule(node, gov_node, search_mode=2)
if new_gov_node : gov_node = new_gov_node
# gov_node가 va rule을 만족하는 경우 parent->lchild를 따라간다.
if check_va_rule(gov_node) :
new_gov_node = find_for_va_rule(node, gov_node, search_mode=1)
if new_gov_node : gov_node = new_gov_node
# -----------------------------------------------------------------
break
next = next['rchild']
if gov_node :
return gov_node['eoj_idx']
return 0
def tree2dep(node, depth=0) :
'''
tree에서 dependency 구조를 뽑아낸다.
'''
if node['leaf'] :
eoj_idx = node['eoj_idx']
eoj = node['eoj']
morphs = modify_morphs(node['morphs'])
label = node['label']
gov = find_gov(node)
out = [eoj_idx, eoj, morphs, label, gov]
print '\t'.join([str(e) for e in out])
if node['lchild'] :
tree2dep(node['lchild'], depth+1)
if node['rchild'] :
tree2dep(node['rchild'], depth+1)
def find_ep(node) :
'''
parent를 따라서 처음으로 VP_MOD,S_MOD,VNP_MOD가 아닌 node를 탐색
해당 node의 most left leaf = ep begin
해당 node의 most right leaf = ep end
'''
next = node
found = None
while next :
if next['label'] not in ['VP_MOD','VNP_MOD','S_MOD'] :
found = next
break
next = next['parent']
left_ep = None
right_ep = None
if found :
# left child를 따라서 leaf node까지
next = found
while next :
if next['leaf'] :
left_ep = next
break
next = next['lchild']
# right child를 따라서 leaf node까지
next = found
while next :
if next['leaf'] :
right_ep = next
break
next = next['rchild']
if left_ep and right_ep :
return left_ep['eoj_idx'], right_ep['eoj_idx']
return 0,0
def is_ec(morphs) :
tokens = morphs.split('+')
if '/EC' in tokens[-1] : return True
if '/SP' in tokens[-1] and len(tokens) >= 2 and '/EC' in tokens[-2] : return True
return False
def find_sp(node) :
'''
parent를 따라서 처음으로 VP,S,VNP_CMP가 아닌 node를 탐색
단, 현재 node는 parent의 right child여야 한다.
정지하기 전 node에 대해서
해당 node의 most left leaf = sp begin
'''
next = node
prev = None
found = None
while next :
if next['label'] not in ['VP','S','VNP_CMP'] :
found = prev
break
if next['sibling'] :
found = next
break
prev = next
next = next['parent']
left_sp = None
if found :
# left child를 따라서 leaf node까지
next = found
while next :
if next['leaf'] :
left_sp = next
break
next = next['lchild']
if left_sp :
return left_sp['eoj_idx']
return 0
def tree2embedded(node, depth=0) :
'''
tree에서 embedded phrase/clause 구조를 뽑아낸다.
'''
if node['leaf'] :
eoj_idx = node['eoj_idx']
eoj = node['eoj']
morphs = modify_morphs(node['morphs'])
label = node['label']
gov = find_gov(node)
ep_begin = 0
ep_end = 0
if label in ['VP_MOD','VNP_MOD'] :
ep_begin,ep_end = find_ep(node)
sp_begin = 0
sp_end = 0
if label in ['VP','VNP','VNP_CMP'] and is_ec(node['morphs']) :
sp_begin = find_sp(node)
if sp_begin != 0 :
sp_end = eoj_idx
if sp_begin == sp_end : # 같은 경우는 의미없음
sp_begin = 0
sp_end = 0
out = [eoj_idx, eoj, morphs, label, gov, ep_begin, ep_end, sp_begin, sp_end]
print '\t'.join([str(e) for e in out])
if node['lchild'] :
tree2embedded(node['lchild'], depth+1)
if node['rchild'] :
tree2embedded(node['rchild'], depth+1)
# -------------------------------------------------------------------------
def spill(bucket, mode) :
# --------------------------------------------------------------
# ill-formed filtering and build tree
sent, tokens = tokenize(bucket)
if not sent : return False
tree = build_tree(sent, tokens)
if not tree : return False
# begin with tree['lchild'](ROOT 제외)
t_tokens = []
tree2tokens(tree['lchild'], t_tokens, depth=0)
if tokens != t_tokens :
sys.stderr.write("input parentheses != tree2tokens\n")
sys.stderr.write("input = %s\n" % (' '.join(tokens)))
sys.stderr.write("tree2tokens = %s\n" % (' '.join(t_tokens)))
return False
# --------------------------------------------------------------
if mode == 0 : # print constituent tree
print ' '.join(sent)
t_tokens = []
history = []
tree2con(tree['lchild'], t_tokens, history, depth=0)
print ''.join(t_tokens).strip()
if mode == 1 : # print dependency tree
tree2dep(tree['lchild'], depth=0)
if mode == 2 : # print embedded phrase/clause tagged tree
tree2embedded(tree['lchild'], depth=0)
print '\n',
return True
if __name__ == '__main__':
parser = OptionParser()
parser.add_option("--verbose", action="store_const", const=1, dest="verbose", help="verbose mode")
parser.add_option("-m", "--mode", dest="mode", help="mode : 0(constituent), 1(dependency), 2(embedded phrase/clause)", metavar="mode")
(options, args) = parser.parse_args()
if options.verbose : VERBOSE = 1
mode = options.mode
if mode == None : mode = 0
else : mode = int(mode)
bucket = []
while 1:
try:
line = sys.stdin.readline()
except KeyboardInterrupt:
break
if not line:
break
line = line.strip()
if not line and len(bucket) >= 1 :
ret = spill(bucket, mode)
bucket = []
continue
if line : bucket.append(line)
if len(bucket) != 0 :
ret = spill(bucket, mode)