Merge pull request #6 from Mischn/7.1.0

bermeitinger-b · web-flow · commit 05ef516bdfd7 · 2017-07-25T15:43:33.000+02:00
7.1.0
diff --git a/pom.xml b/pom.xml
@@ -26,7 +26,7 @@
 
     <groupId>org.lambda3.text.simplification</groupId>
     <artifactId>discourse-simplification</artifactId>
-    <version>7.0.0</version>
+    <version>7.1.0</version>
     <packaging>jar</packaging>
 
     <name>Discourse Simplification</name>
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/QuotedISAPostExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/QuotedISAPostExtractor.java
@@ -0,0 +1,101 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : QuotedISAPostExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import edu.stanford.nlp.trees.tregex.TregexPattern;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class QuotedISAPostExtractor extends ExtractionRule {
+    private final static Logger LOG = LoggerFactory.getLogger(ExtractionRule.class);
+
+    @Override
+    public Optional<Extraction> extract(Tree parseTree) {
+        TregexPattern p = TregexPattern.compile("ROOT <<: (S << (/``/=start .. (NP .. (/VB./ .. (/''/=end .. (NP .. VP))))))");
+        TregexMatcher matcher = p.matcher(parseTree);
+
+        while (matcher.findAt(parseTree)) {
+            Tree quoteStart;
+            if (matcher.getNode("start") != null) {
+                quoteStart = (matcher.getNode("start"));
+            } else {
+                continue;
+            }
+            Tree quoteEnd;
+            if (matcher.getNode("end") != null) {
+                quoteEnd = matcher.getNode("end");
+            } else {
+                continue;
+            }
+
+            // the left, superordinate constituent
+            List<Word> leftConstituentWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, quoteStart, quoteEnd, false, false);
+            Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
+
+            // the right, subordinate constituent
+            List<Word> rightConstituentWords = new ArrayList<>();
+            rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, quoteStart, false));
+            rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, quoteEnd, false));
+            Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
+
+            // rephrase
+            rightConstituent.setProperSentence(false);
+            List<Word> rephrasedWords = rephraseIntraSententialAttribution(rightConstituentWords);
+            rightConstituent.setRephrasedText(WordsUtils.wordsToProperSentenceString(rephrasedWords));
+            rightConstituent.dontAllowSplit();
+
+            // relation
+            Relation relation = Relation.INTRA_SENT_ATTR;
+
+            Extraction res = new SubordinationExtraction(
+                    getClass().getSimpleName(),
+                    relation,
+                    null,
+                    leftConstituent, // the superordinate constituent
+                    rightConstituent, // the subordinate constituent
+                    true
+            );
+
+            return Optional.of(res);
+        }
+
+        return Optional.empty();
+    }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/QuotedISAPreExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/QuotedISAPreExtractor.java
@@ -0,0 +1,102 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : QuotedISAExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import edu.stanford.nlp.trees.tregex.TregexPattern;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class QuotedISAPreExtractor extends ExtractionRule {
+    private final static Logger LOG = LoggerFactory.getLogger(ExtractionRule.class);
+
+    @Override
+    public Optional<Extraction> extract(Tree parseTree) {
+        TregexPattern p = TregexPattern.compile("ROOT <<: (S << (NP .. (VP .. (/``/=start .. (NP .. (/VB./ .. (/''/=end)))))))");
+        TregexMatcher matcher = p.matcher(parseTree);
+
+        while (matcher.findAt(parseTree)) {
+            Tree quoteStart;
+            if (matcher.getNode("start") != null) {
+                quoteStart = (matcher.getNode("start"));
+            } else {
+                continue;
+            }
+            Tree quoteEnd;
+            if (matcher.getNode("end") != null) {
+                quoteEnd = matcher.getNode("end");
+            } else {
+                continue;
+            }
+
+            // the left, subordinate constituent
+            List<Word> leftConstituentWords = new ArrayList<>();
+            leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, quoteStart, false));
+            leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, quoteEnd, false));
+            Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
+
+            // rephrase
+            leftConstituent.setProperSentence(false);
+            List<Word> rephrasedWords = rephraseIntraSententialAttribution(leftConstituentWords);
+            leftConstituent.setRephrasedText(WordsUtils.wordsToProperSentenceString(rephrasedWords));
+            leftConstituent.dontAllowSplit();
+
+            // the right, superordinate constituent
+            List<Word> rightConstituentWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, quoteStart, quoteEnd, false, false);
+            Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
+
+            // relation
+            Relation relation = Relation.INTRA_SENT_ATTR;
+
+            Extraction res = new SubordinationExtraction(
+                    getClass().getSimpleName(),
+                    relation,
+                    null,
+                    leftConstituent, // the superordinate constituent
+                    rightConstituent, // the subordinate constituent
+                    true
+            );
+
+            return Optional.of(res);
+        }
+
+        return Optional.empty();
+    }
+}
diff --git a/src/main/resources/reference.conf b/src/main/resources/reference.conf
@@ -4,6 +4,9 @@ discourse-simplification {
         org.lambda3.text.simplification.discourse.tree.extraction.rules.ReferenceExtractor2,
         org.lambda3.text.simplification.discourse.tree.extraction.rules.CoordinationExtractor,
 
+        org.lambda3.text.simplification.discourse.tree.extraction.rules.QuotedISAPreExtractor,
+        org.lambda3.text.simplification.discourse.tree.extraction.rules.QuotedISAPostExtractor,
+
         org.lambda3.text.simplification.discourse.tree.extraction.rules.EnablementPreExtractor,
         org.lambda3.text.simplification.discourse.tree.extraction.rules.SubordinationPreEnablementExtractor,
         org.lambda3.text.simplification.discourse.tree.extraction.rules.SharedNPPreParticipalExtractor,