Skip to content
This repository was archived by the owner on Apr 9, 2025. It is now read-only.

Commit 05ef516

Browse files
Merge pull request #6 from Mischn/7.1.0
7.1.0
2 parents b15af59 + b17d866 commit 05ef516

File tree

4 files changed

+207
-1
lines changed

4 files changed

+207
-1
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
<groupId>org.lambda3.text.simplification</groupId>
2828
<artifactId>discourse-simplification</artifactId>
29-
<version>7.0.0</version>
29+
<version>7.1.0</version>
3030
<packaging>jar</packaging>
3131

3232
<name>Discourse Simplification</name>
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : QuotedISAPostExtractor
4+
*
5+
* Copyright © 2017 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.tree.extraction.rules;
24+
25+
import edu.stanford.nlp.ling.Word;
26+
import edu.stanford.nlp.trees.Tree;
27+
import edu.stanford.nlp.trees.tregex.TregexMatcher;
28+
import edu.stanford.nlp.trees.tregex.TregexPattern;
29+
import org.lambda3.text.simplification.discourse.tree.Relation;
30+
import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
31+
import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
32+
import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
33+
import org.lambda3.text.simplification.discourse.tree.model.Leaf;
34+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
35+
import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
36+
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
38+
39+
import java.util.ArrayList;
40+
import java.util.List;
41+
import java.util.Optional;
42+
43+
/**
44+
*
45+
*/
46+
public class QuotedISAPostExtractor extends ExtractionRule {
47+
private final static Logger LOG = LoggerFactory.getLogger(ExtractionRule.class);
48+
49+
@Override
50+
public Optional<Extraction> extract(Tree parseTree) {
51+
TregexPattern p = TregexPattern.compile("ROOT <<: (S << (/``/=start .. (NP .. (/VB./ .. (/''/=end .. (NP .. VP))))))");
52+
TregexMatcher matcher = p.matcher(parseTree);
53+
54+
while (matcher.findAt(parseTree)) {
55+
Tree quoteStart;
56+
if (matcher.getNode("start") != null) {
57+
quoteStart = (matcher.getNode("start"));
58+
} else {
59+
continue;
60+
}
61+
Tree quoteEnd;
62+
if (matcher.getNode("end") != null) {
63+
quoteEnd = matcher.getNode("end");
64+
} else {
65+
continue;
66+
}
67+
68+
// the left, superordinate constituent
69+
List<Word> leftConstituentWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, quoteStart, quoteEnd, false, false);
70+
Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
71+
72+
// the right, subordinate constituent
73+
List<Word> rightConstituentWords = new ArrayList<>();
74+
rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, quoteStart, false));
75+
rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, quoteEnd, false));
76+
Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
77+
78+
// rephrase
79+
rightConstituent.setProperSentence(false);
80+
List<Word> rephrasedWords = rephraseIntraSententialAttribution(rightConstituentWords);
81+
rightConstituent.setRephrasedText(WordsUtils.wordsToProperSentenceString(rephrasedWords));
82+
rightConstituent.dontAllowSplit();
83+
84+
// relation
85+
Relation relation = Relation.INTRA_SENT_ATTR;
86+
87+
Extraction res = new SubordinationExtraction(
88+
getClass().getSimpleName(),
89+
relation,
90+
null,
91+
leftConstituent, // the superordinate constituent
92+
rightConstituent, // the subordinate constituent
93+
true
94+
);
95+
96+
return Optional.of(res);
97+
}
98+
99+
return Optional.empty();
100+
}
101+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : QuotedISAExtractor
4+
*
5+
* Copyright © 2017 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.tree.extraction.rules;
24+
25+
import edu.stanford.nlp.ling.Word;
26+
import edu.stanford.nlp.trees.Tree;
27+
import edu.stanford.nlp.trees.tregex.TregexMatcher;
28+
import edu.stanford.nlp.trees.tregex.TregexPattern;
29+
import org.lambda3.text.simplification.discourse.tree.Relation;
30+
import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
31+
import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
32+
import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
33+
import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
34+
import org.lambda3.text.simplification.discourse.tree.model.Leaf;
35+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
36+
import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
37+
import org.slf4j.Logger;
38+
import org.slf4j.LoggerFactory;
39+
40+
import java.util.ArrayList;
41+
import java.util.List;
42+
import java.util.Optional;
43+
44+
/**
45+
*
46+
*/
47+
public class QuotedISAPreExtractor extends ExtractionRule {
48+
private final static Logger LOG = LoggerFactory.getLogger(ExtractionRule.class);
49+
50+
@Override
51+
public Optional<Extraction> extract(Tree parseTree) {
52+
TregexPattern p = TregexPattern.compile("ROOT <<: (S << (NP .. (VP .. (/``/=start .. (NP .. (/VB./ .. (/''/=end)))))))");
53+
TregexMatcher matcher = p.matcher(parseTree);
54+
55+
while (matcher.findAt(parseTree)) {
56+
Tree quoteStart;
57+
if (matcher.getNode("start") != null) {
58+
quoteStart = (matcher.getNode("start"));
59+
} else {
60+
continue;
61+
}
62+
Tree quoteEnd;
63+
if (matcher.getNode("end") != null) {
64+
quoteEnd = matcher.getNode("end");
65+
} else {
66+
continue;
67+
}
68+
69+
// the left, subordinate constituent
70+
List<Word> leftConstituentWords = new ArrayList<>();
71+
leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, quoteStart, false));
72+
leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, quoteEnd, false));
73+
Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
74+
75+
// rephrase
76+
leftConstituent.setProperSentence(false);
77+
List<Word> rephrasedWords = rephraseIntraSententialAttribution(leftConstituentWords);
78+
leftConstituent.setRephrasedText(WordsUtils.wordsToProperSentenceString(rephrasedWords));
79+
leftConstituent.dontAllowSplit();
80+
81+
// the right, superordinate constituent
82+
List<Word> rightConstituentWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, quoteStart, quoteEnd, false, false);
83+
Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
84+
85+
// relation
86+
Relation relation = Relation.INTRA_SENT_ATTR;
87+
88+
Extraction res = new SubordinationExtraction(
89+
getClass().getSimpleName(),
90+
relation,
91+
null,
92+
leftConstituent, // the superordinate constituent
93+
rightConstituent, // the subordinate constituent
94+
true
95+
);
96+
97+
return Optional.of(res);
98+
}
99+
100+
return Optional.empty();
101+
}
102+
}

src/main/resources/reference.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ discourse-simplification {
44
org.lambda3.text.simplification.discourse.tree.extraction.rules.ReferenceExtractor2,
55
org.lambda3.text.simplification.discourse.tree.extraction.rules.CoordinationExtractor,
66

7+
org.lambda3.text.simplification.discourse.tree.extraction.rules.QuotedISAPreExtractor,
8+
org.lambda3.text.simplification.discourse.tree.extraction.rules.QuotedISAPostExtractor,
9+
710
org.lambda3.text.simplification.discourse.tree.extraction.rules.EnablementPreExtractor,
811
org.lambda3.text.simplification.discourse.tree.extraction.rules.SubordinationPreEnablementExtractor,
912
org.lambda3.text.simplification.discourse.tree.extraction.rules.SharedNPPreParticipalExtractor,

0 commit comments

Comments
 (0)