-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathtagviz.rb
executable file
·121 lines (100 loc) · 2.39 KB
/
tagviz.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env ruby
# This takes the pseudo-xml mention-tagged format and converts it to a minimal
# text format similar to the one UIUC LBJ outputs by default.
#
# or, html.
# This depends on certain conventions in naming schemes for entity ids.
# So not especially stable.
require 'rubygems'
require 'hpricot'
hp = Hpricot STDIN.read
$html = ARGV.member?("-html")
$colors = %w[ maroon navy green orange purple magenta teal ]
$color_i = -1
if ARGV.member?("-style")
puts %|
<style>
.singleton > .text { color:black }
.singleton > .bracket { color: grey }
.entityid { vertical-align: sub; font-size: 70%; }
</style>
|
end
$short2long = {}
$long2color = {}
def short_eid long_eid
short = long_eid.gsub(/_.*/,"")
if ($short2long[short] || long_eid) != long_eid
raise "bug with entity id format conventions"
end
$short2long[short] = long_eid
short
end
def advance_color
$color_i = ($color_i+1) % $colors.size
end
def entity_color(node)
e = node['entityid']
$long2color[e] ||= $colors[advance_color]
$long2color[e]
end
def is_singleton(node)
node['entityid'] !~ /_/
end
if !$html
def start_mention(node)
print "*"
end
def end_mention(node)
if is_singleton(node)
print "*"
else
print "*_#{short_eid node['entityid']}"
end
end
def print_text(node)
print node
end
else
def start_mention(node)
if is_singleton(node)
print "<span class=singleton>"
print "<span class=bracket>[</span>"
else
print %|<span class=non_singleton style="color: #{entity_color node}">|
print %|<span class=bracket>[</span>|
end
end
def end_mention(node)
if is_singleton(node)
print %|<span class=bracket>]</span>|
print %|</span>|
else
print %|<span class=bracket>]</span>|
print %|<span class=entityid>#{short_eid node['entityid']}</span>|
print %|</span>|
end
end
def print_text(node)
print "<span class=text>"
print node.to_s.gsub("&","&").gsub("<","<").gsub(">",">").gsub("\n", "<br>")
print "</span>"
end
end
#### Tree walk ####
def process(node)
if node.is_a? Hpricot::Text
print_text(node)
elsif node.is_a? Hpricot::Elem
node.name=='mention' or raise "unknown node #{c.inspect}"
start_mention(node)
for child in node.children
process(child)
end
end_mention(node)
end
end
for c in hp.children
process(c)
end
puts