Skip to content
This repository was archived by the owner on Apr 9, 2025. It is now read-only.

Commit c246541

Browse files
committed
Added Rules for Quoted Attributions.
1 parent b15af59 commit c246541

File tree

4 files changed

+215
-1
lines changed

4 files changed

+215
-1
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
<groupId>org.lambda3.text.simplification</groupId>
2828
<artifactId>discourse-simplification</artifactId>
29-
<version>7.0.0</version>
29+
<version>7.1.0-SNAPSHOT</version>
3030
<packaging>jar</packaging>
3131

3232
<name>Discourse Simplification</name>
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : QuotedISAPostExtractor
4+
*
5+
* Copyright © 2017 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.tree.extraction.rules;
24+
25+
import edu.stanford.nlp.ling.Word;
26+
import edu.stanford.nlp.trees.Tree;
27+
import edu.stanford.nlp.trees.tregex.TregexMatcher;
28+
import edu.stanford.nlp.trees.tregex.TregexPattern;
29+
import org.lambda3.text.simplification.discourse.tree.Relation;
30+
import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
31+
import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
32+
import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
33+
import org.lambda3.text.simplification.discourse.tree.model.Leaf;
34+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
35+
import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
36+
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
38+
39+
import java.util.ArrayList;
40+
import java.util.List;
41+
import java.util.Optional;
42+
43+
/**
44+
*
45+
*/
46+
public class QuotedISAPostExtractor extends ExtractionRule {
47+
private final static Logger LOG = LoggerFactory.getLogger(ExtractionRule.class);
48+
49+
@Override
50+
public Optional<Extraction> extract(Tree parseTree) {
51+
TregexPattern p = TregexPattern.compile("ROOT <<: (S < (S=s < (NP $.. VP) ?,, (/``/=startOut) ?<<, (/``/=startIn) ?<<- (/''/=endIn) ?.. (/''/=endOut) $.. (NP $.. VP)))");
52+
TregexMatcher matcher = p.matcher(parseTree);
53+
54+
while (matcher.findAt(parseTree)) {
55+
Tree quoteStart;
56+
if (matcher.getNode("startOut") != null) {
57+
quoteStart = matcher.getNode("startOut");
58+
} else if (matcher.getNode("startIn") != null) {
59+
quoteStart = matcher.getNode("startIn");
60+
} else {
61+
continue;
62+
}
63+
Tree quoteEnd;
64+
if (matcher.getNode("endOut") != null) {
65+
quoteEnd = matcher.getNode("endOut");
66+
} else if (matcher.getNode("endIn") != null) {
67+
quoteEnd = matcher.getNode("endIn");
68+
} else {
69+
continue;
70+
}
71+
72+
// the left, superordinate constituent
73+
List<Word> leftConstituentWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, quoteStart, quoteEnd, false, false);
74+
Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
75+
76+
// the right, subordinate constituent
77+
List<Word> rightConstituentWords = new ArrayList<>();
78+
rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, quoteStart, false));
79+
rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, quoteEnd, false));
80+
Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
81+
82+
// rephrase
83+
rightConstituent.setProperSentence(false);
84+
List<Word> rephrasedWords = rephraseIntraSententialAttribution(rightConstituentWords);
85+
rightConstituent.setRephrasedText(WordsUtils.wordsToProperSentenceString(rephrasedWords));
86+
rightConstituent.dontAllowSplit();
87+
88+
// relation
89+
Relation relation = Relation.INTRA_SENT_ATTR;
90+
91+
Extraction res = new SubordinationExtraction(
92+
getClass().getSimpleName(),
93+
relation,
94+
null,
95+
leftConstituent, // the superordinate constituent
96+
rightConstituent, // the subordinate constituent
97+
true
98+
);
99+
100+
return Optional.of(res);
101+
}
102+
103+
return Optional.empty();
104+
}
105+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : QuotedISAExtractor
4+
*
5+
* Copyright © 2017 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.tree.extraction.rules;
24+
25+
import edu.stanford.nlp.ling.Word;
26+
import edu.stanford.nlp.trees.Tree;
27+
import edu.stanford.nlp.trees.tregex.TregexMatcher;
28+
import edu.stanford.nlp.trees.tregex.TregexPattern;
29+
import org.lambda3.text.simplification.discourse.tree.Relation;
30+
import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
31+
import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
32+
import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
33+
import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
34+
import org.lambda3.text.simplification.discourse.tree.model.Leaf;
35+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
36+
import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
37+
import org.slf4j.Logger;
38+
import org.slf4j.LoggerFactory;
39+
40+
import java.util.ArrayList;
41+
import java.util.List;
42+
import java.util.Optional;
43+
44+
/**
45+
*
46+
*/
47+
public class QuotedISAPreExtractor extends ExtractionRule {
48+
private final static Logger LOG = LoggerFactory.getLogger(ExtractionRule.class);
49+
50+
@Override
51+
public Optional<Extraction> extract(Tree parseTree) {
52+
TregexPattern p = TregexPattern.compile("ROOT <<: (S < (NP $.. (VP <+(VP) (S=s < (NP $.. VP) ?,, (/``/=startOut) ?<<, (/``/=startIn) ?<<- (/''/=endIn) ?.. (/''/=endOut)))))");
53+
TregexMatcher matcher = p.matcher(parseTree);
54+
55+
while (matcher.findAt(parseTree)) {
56+
Tree quoteStart;
57+
if (matcher.getNode("startOut") != null) {
58+
quoteStart = matcher.getNode("startOut");
59+
} else if (matcher.getNode("startIn") != null) {
60+
quoteStart = matcher.getNode("startIn");
61+
} else {
62+
continue;
63+
}
64+
Tree quoteEnd;
65+
if (matcher.getNode("endOut") != null) {
66+
quoteEnd = matcher.getNode("endOut");
67+
} else if (matcher.getNode("endIn") != null) {
68+
quoteEnd = matcher.getNode("endIn");
69+
} else {
70+
continue;
71+
}
72+
73+
// the left, subordinate constituent
74+
List<Word> leftConstituentWords = new ArrayList<>();
75+
leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, quoteStart, false));
76+
leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, quoteEnd, false));
77+
Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
78+
79+
// rephrase
80+
leftConstituent.setProperSentence(false);
81+
List<Word> rephrasedWords = rephraseIntraSententialAttribution(leftConstituentWords);
82+
leftConstituent.setRephrasedText(WordsUtils.wordsToProperSentenceString(rephrasedWords));
83+
leftConstituent.dontAllowSplit();
84+
85+
// the right, superordinate constituent
86+
List<Word> rightConstituentWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, quoteStart, quoteEnd, false, false);
87+
Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
88+
89+
// relation
90+
Relation relation = Relation.INTRA_SENT_ATTR;
91+
92+
Extraction res = new SubordinationExtraction(
93+
getClass().getSimpleName(),
94+
relation,
95+
null,
96+
leftConstituent, // the superordinate constituent
97+
rightConstituent, // the subordinate constituent
98+
true
99+
);
100+
101+
return Optional.of(res);
102+
}
103+
104+
return Optional.empty();
105+
}
106+
}

src/main/resources/reference.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ discourse-simplification {
44
org.lambda3.text.simplification.discourse.tree.extraction.rules.ReferenceExtractor2,
55
org.lambda3.text.simplification.discourse.tree.extraction.rules.CoordinationExtractor,
66

7+
org.lambda3.text.simplification.discourse.tree.extraction.rules.QuotedISAPreExtractor,
8+
org.lambda3.text.simplification.discourse.tree.extraction.rules.QuotedISAPostExtractor,
9+
710
org.lambda3.text.simplification.discourse.tree.extraction.rules.EnablementPreExtractor,
811
org.lambda3.text.simplification.discourse.tree.extraction.rules.SubordinationPreEnablementExtractor,
912
org.lambda3.text.simplification.discourse.tree.extraction.rules.SharedNPPreParticipalExtractor,

0 commit comments

Comments
 (0)