-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathparlamint2meta.xsl
103 lines (98 loc) · 4.4 KB
/
parlamint2meta.xsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
<?xml version="1.0"?>
<!-- Transform one ParlaMint file to a TSV file with its metadata. -->
<!-- Includes header row, cf. template for tei:TEI -->
<!-- Needs the file with corpus teiHeader giving the speaker, party etc. info as the "meta" parameter -->
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns="http://www.tei-c.org/ns/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0"
xmlns:fn="http://www.w3.org/2005/xpath-functions"
xmlns:et="http://nl.ijs.si/et"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xi="http://www.w3.org/2001/XInclude"
exclude-result-prefixes="fn et tei xs xi"
version="2.0">
<xsl:import href="parlamint-lib.xsl"/>
<xsl:output method="text" encoding="utf-8"/>
<xsl:template match="tei:TEI">
<xsl:message select="concat('INFO: Converting ', @xml:id, ' to metadata TSV')"/>
<xsl:text>Text_ID	</xsl:text>
<xsl:text>ID	</xsl:text>
<xsl:text>Title	</xsl:text>
<xsl:text>Date	</xsl:text>
<xsl:text>Body	</xsl:text>
<xsl:text>Term	</xsl:text>
<xsl:text>Session	</xsl:text>
<xsl:text>Meeting	</xsl:text>
<xsl:text>Sitting	</xsl:text>
<xsl:text>Agenda	</xsl:text>
<xsl:text>Subcorpus	</xsl:text>
<xsl:text>Lang	</xsl:text>
<xsl:text>Speaker_role	</xsl:text>
<xsl:text>Speaker_MP	</xsl:text>
<xsl:text>Speaker_minister	</xsl:text>
<xsl:text>Speaker_party	</xsl:text>
<xsl:text>Speaker_party_name	</xsl:text>
<xsl:text>Party_status	</xsl:text>
<xsl:text>Party_orientation	</xsl:text>
<xsl:text>Speaker_ID	</xsl:text>
<xsl:text>Speaker_name	</xsl:text>
<xsl:text>Speaker_gender	</xsl:text>
<xsl:text>Speaker_birth</xsl:text>
<xsl:text> </xsl:text>
<xsl:apply-templates select=".//tei:u"/>
</xsl:template>
<xsl:template match="tei:u">
<xsl:variable name="lang">
<xsl:call-template name="u-langs"/>
</xsl:variable>
<!-- Text metadata -->
<xsl:value-of select="concat($text_id, '	')"/>
<xsl:value-of select="concat(@xml:id, '	')"/>
<xsl:value-of select="concat($title, '	')"/>
<xsl:value-of select="concat($at-date, '	')"/>
<xsl:value-of select="concat($body, '	')"/>
<xsl:value-of select="concat($term, '	')"/>
<xsl:value-of select="concat($session, '	')"/>
<xsl:value-of select="concat($meeting, '	')"/>
<xsl:value-of select="concat($sitting, '	')"/>
<xsl:value-of select="concat($agenda, '	')"/>
<xsl:value-of select="concat($subcorpus, '	')"/>
<xsl:value-of select="concat($lang, '	')"/>
<!-- Speaker metadata -->
<xsl:value-of select="concat(et:u-role(@ana), '	')"/>
<xsl:variable name="speaker" select="key('idr', @who, $rootHeader)"/>
<xsl:choose>
<xsl:when test="not(@who or normalize-space($speaker))">
<xsl:if test="@who and not(normalize-space($speaker))">
<xsl:message select="concat('ERROR: Cant find speaker for ', @who, ' in ', @xml:id)"/>
</xsl:if>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-	</xsl:text>
<xsl:text>-</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat(et:speaker-mp($speaker), '	')"/>
<xsl:value-of select="concat(et:speaker-minister($speaker), '	')"/>
<xsl:value-of select="concat(et:speaker-party($speaker, 'abb'), '	')"/>
<xsl:value-of select="concat(et:speaker-party($speaker, 'yes'), '	')"/>
<xsl:value-of select="concat(et:party-status($speaker), '	')"/>
<xsl:value-of select="concat(et:party-orientation($speaker), '	')"/>
<xsl:value-of select="concat(substring-after(@who, '#'), '	')"/>
<xsl:value-of select="concat(et:format-name-chrono($speaker//tei:persName, $at-date), '	')"/>
<xsl:value-of select="concat(et:tsv-value($speaker/tei:sex/@value), '	')"/>
<xsl:value-of select="et:tsv-value(replace($speaker/tei:birth/@when, '-.+', ''))"/>
</xsl:otherwise>
</xsl:choose>
<!-- Speech sizes? -->
<!--xsl:value-of select="count(.//tei:w) + count(.//tei:pc)"/-->
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>