Skip to content
This repository was archived by the owner on Apr 9, 2025. It is now read-only.

Commit b141631

Browse files
Merge pull request #3 from Mischn/7.0.0-SNAPSHOT
7.0.0 snapshot
2 parents 666412b + 88ecf70 commit b141631

File tree

12 files changed

+185
-62
lines changed

12 files changed

+185
-62
lines changed

pom.xml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
<groupId>org.lambda3.text.simplification</groupId>
2828
<artifactId>discourse-simplification</artifactId>
29-
<version>6.0.0</version>
29+
<version>7.0.0-SNAPSHOT</version>
3030
<packaging>jar</packaging>
3131

3232
<name>Discourse Simplification</name>
@@ -40,6 +40,8 @@
4040
</scm>
4141

4242
<properties>
43+
<config.version>1.3.1</config.version>
44+
4345
<corenlp.version>3.7.0</corenlp.version>
4446
<logback.version>1.1.8</logback.version>
4547

@@ -53,6 +55,13 @@
5355

5456
<dependencies>
5557

58+
<!-- Config -->
59+
<dependency>
60+
<groupId>com.typesafe</groupId>
61+
<artifactId>config</artifactId>
62+
<version>${config.version}</version>
63+
</dependency>
64+
5665
<!-- Stanford NLP -->
5766
<dependency>
5867
<groupId>edu.stanford.nlp</groupId>

src/main/java/org/lambda3/text/simplification/discourse/processing/Processor.java

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@
2222

2323
package org.lambda3.text.simplification.discourse.processing;
2424

25+
import com.typesafe.config.Config;
26+
import com.typesafe.config.ConfigFactory;
2527
import org.lambda3.text.simplification.discourse.relation_extraction.DiscourseExtractor;
2628
import org.lambda3.text.simplification.discourse.relation_extraction.Element;
2729
import org.lambda3.text.simplification.discourse.tree.DiscourseTreeCreator;
30+
import org.lambda3.text.simplification.discourse.utils.ConfigUtils;
2831
import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils;
2932
import org.slf4j.Logger;
3033
import org.slf4j.LoggerFactory;
@@ -39,10 +42,26 @@
3942
*
4043
*/
4144
public class Processor {
42-
private final static DiscourseTreeCreator DISCOURSE_TREE_CREATOR = new DiscourseTreeCreator();
43-
private final static DiscourseExtractor DISCOURSE_EXTRACTOR = new DiscourseExtractor();
45+
private final DiscourseTreeCreator discourseTreeCreator;
46+
private final DiscourseExtractor discourseExtractor;
4447
private final Logger logger = LoggerFactory.getLogger(getClass());
4548

49+
private final Config config;
50+
51+
public Processor(Config config) {
52+
this.config = config.getConfig("discourse-simplification");
53+
54+
this.discourseTreeCreator = new DiscourseTreeCreator(this.config);
55+
this.discourseExtractor = new DiscourseExtractor(this.config);
56+
57+
logger.info("Processor initialized");
58+
logger.info("\n{}", ConfigUtils.prettyPrint(this.config));
59+
}
60+
61+
public Processor() {
62+
this(ConfigFactory.load());
63+
}
64+
4665
public List<OutSentence> process(File file, ProcessingType type) throws FileNotFoundException {
4766
List<String> sentences = SentencesUtils.splitIntoSentencesFromFile(file);
4867
return process(sentences, type);
@@ -89,22 +108,22 @@ private List<OutSentence> processWhole(List<String> sentences) {
89108
// Step 1) create document discourse tree
90109
logger.info("### STEP 1) CREATE DOCUMENT DISCOURSE TREE ###");
91110

92-
DISCOURSE_TREE_CREATOR.reset();
111+
discourseTreeCreator.reset();
93112

94113
int idx = 0;
95114
for (String sentence : sentences) {
96115
logger.info("# Processing sentence {}/{} #", (idx + 1), sentences.size());
97116
logger.info("'" + sentence + "'");
98117

99118
// extend discourse tree
100-
DISCOURSE_TREE_CREATOR.addSentence(sentence, idx);
101-
DISCOURSE_TREE_CREATOR.update();
119+
discourseTreeCreator.addSentence(sentence, idx);
120+
discourseTreeCreator.update();
102121
if (logger.isDebugEnabled()) {
103122

104-
Optional.ofNullable(DISCOURSE_TREE_CREATOR.getLastSentenceTree())
123+
Optional.ofNullable(discourseTreeCreator.getLastSentenceTree())
105124
.ifPresent(t -> logger.debug(t.toString()));
106125

107-
// logger.debug(DISCOURSE_TREE_CREATOR.getDiscourseTree().toString()); // to show the current document discourse tree
126+
// logger.debug(discourseTreeCreator.getDiscourseTree().toString()); // to show the current document discourse tree
108127
}
109128

110129
++idx;
@@ -113,7 +132,7 @@ private List<OutSentence> processWhole(List<String> sentences) {
113132
// Step 2) extract elements
114133
logger.info("### STEP 2) EXTRACT ELEMENTS ###");
115134

116-
List<Element> elements = DISCOURSE_EXTRACTOR.extract(DISCOURSE_TREE_CREATOR.getDiscourseTree());
135+
List<Element> elements = discourseExtractor.extract(discourseTreeCreator.getDiscourseTree());
117136
if (logger.isDebugEnabled()) {
118137
elements.stream().filter(e -> e.getContextLayer() == 0).forEach(x -> logger.debug(x.toString()));
119138
}
@@ -143,17 +162,17 @@ private List<OutSentence> processSeparate(List<String> sentences) {
143162
// Step 1) create sentence discourse tree
144163
logger.debug("### Step 1) CREATE SENTENCE DISCOURSE TREE ###");
145164

146-
DISCOURSE_TREE_CREATOR.reset();
147-
DISCOURSE_TREE_CREATOR.addSentence(sentence, idx);
148-
DISCOURSE_TREE_CREATOR.update();
165+
discourseTreeCreator.reset();
166+
discourseTreeCreator.addSentence(sentence, idx);
167+
discourseTreeCreator.update();
149168
if (logger.isDebugEnabled()) {
150-
logger.debug(DISCOURSE_TREE_CREATOR.getDiscourseTree().toString());
169+
logger.debug(discourseTreeCreator.getDiscourseTree().toString());
151170
}
152171

153172
// Step 2) extract elements
154173
logger.debug("### STEP 2) EXTRACT ELEMENTS ###");
155174

156-
List<Element> es = DISCOURSE_EXTRACTOR.extract(DISCOURSE_TREE_CREATOR.getDiscourseTree());
175+
List<Element> es = discourseExtractor.extract(discourseTreeCreator.getDiscourseTree());
157176
if (logger.isDebugEnabled()) {
158177
es.stream().filter(e -> e.getContextLayer() == 0).forEach(x -> logger.debug(x.toString()));
159178
}

src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/DiscourseExtractor.java

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,20 @@
2222

2323
package org.lambda3.text.simplification.discourse.relation_extraction;
2424

25+
import com.typesafe.config.Config;
26+
import com.typesafe.config.ConfigException;
2527
import org.lambda3.text.simplification.discourse.tree.Relation;
28+
import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
2629
import org.lambda3.text.simplification.discourse.tree.model.Coordination;
2730
import org.lambda3.text.simplification.discourse.tree.model.DiscourseTree;
2831
import org.lambda3.text.simplification.discourse.tree.model.Leaf;
2932
import org.lambda3.text.simplification.discourse.tree.model.Subordination;
3033
import org.slf4j.Logger;
3134
import org.slf4j.LoggerFactory;
3235

33-
import java.util.Arrays;
36+
import java.lang.reflect.Constructor;
37+
import java.lang.reflect.InvocationTargetException;
38+
import java.util.ArrayList;
3439
import java.util.LinkedHashMap;
3540
import java.util.List;
3641
import java.util.stream.Collectors;
@@ -39,19 +44,27 @@
3944
*
4045
*/
4146
public class DiscourseExtractor {
42-
private static final List<Relation> IGNORED_RELATIONS = Arrays.asList(
43-
Relation.UNKNOWN_COORDINATION,
44-
Relation.JOINT_LIST,
45-
Relation.JOINT_DISJUNCTION,
46-
Relation.JOINT_NP_LIST,
47-
Relation.JOINT_NP_DISJUNCTION
48-
49-
);
47+
private final List<Relation> ignoredRelations;
5048
private final Logger logger = LoggerFactory.getLogger(getClass());
5149

50+
private final Config config;
5251
private LinkedHashMap<Leaf, Element> processedLeaves;
5352

54-
public DiscourseExtractor() {
53+
public DiscourseExtractor(Config config) {
54+
this.config = config;
55+
56+
// create ignored relations from config
57+
this.ignoredRelations = new ArrayList<>();
58+
for (String valueName : this.config.getStringList("ignored-relations")) {
59+
try {
60+
Relation relation = Relation.valueOf(valueName);
61+
ignoredRelations.add(relation);
62+
} catch (IllegalArgumentException e) {
63+
logger.error("Failed to create enum value of {}", valueName);
64+
throw new ConfigException.BadValue("ignored-relations." + valueName, "Failed to create enum value.");
65+
}
66+
}
67+
5568
this.processedLeaves = new LinkedHashMap<Leaf, Element>();
5669
}
5770

@@ -89,7 +102,7 @@ private void extractRec(DiscourseTree node, int contextLayer) {
89102
}
90103

91104
// set relations
92-
if (!IGNORED_RELATIONS.contains(coordination.getRelation())) {
105+
if (!ignoredRelations.contains(coordination.getRelation())) {
93106
for (DiscourseTree child : coordination.getCoordinations()) {
94107
List<Element> childNElements = child.getNucleusPathLeaves().stream().map(n -> processedLeaves.get(n)).collect(Collectors.toList());
95108

@@ -128,7 +141,7 @@ private void extractRec(DiscourseTree node, int contextLayer) {
128141
extractRec(subordination.getSubordination(), contextLayer + 1);
129142

130143
// add relations
131-
if (!IGNORED_RELATIONS.contains(subordination.getRelation())) {
144+
if (!ignoredRelations.contains(subordination.getRelation())) {
132145
List<Element> superordinationNElements = subordination.getSuperordination().getNucleusPathLeaves().stream().map(n -> processedLeaves.get(n)).collect(Collectors.toList());
133146
List<Element> subordinationNElements = subordination.getSubordination().getNucleusPathLeaves().stream().map(n -> processedLeaves.get(n)).collect(Collectors.toList());
134147

src/main/java/org/lambda3/text/simplification/discourse/tree/DiscourseTreeCreator.java

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
package org.lambda3.text.simplification.discourse.tree;
2424

25+
import com.typesafe.config.Config;
26+
import com.typesafe.config.ConfigException;
2527
import edu.stanford.nlp.trees.Tree;
2628
import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
2729
import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
@@ -39,6 +41,8 @@
3941
import org.slf4j.Logger;
4042
import org.slf4j.LoggerFactory;
4143

44+
import java.lang.reflect.Constructor;
45+
import java.lang.reflect.InvocationTargetException;
4246
import java.util.ArrayList;
4347
import java.util.List;
4448
import java.util.Optional;
@@ -47,37 +51,29 @@
4751
*
4852
*/
4953
public class DiscourseTreeCreator {
50-
private static final List<ExtractionRule> rules;
51-
52-
static {
53-
rules = new ArrayList<>();
54-
55-
rules.add(new ReferenceExtractor1());
56-
rules.add(new ReferenceExtractor2());
57-
rules.add(new CoordinationExtractor());
58-
59-
rules.add(new EnablementPreExtractor());
60-
rules.add(new SubordinationPreEnablementExtractor());
61-
rules.add(new SharedNPPreParticipalExtractor());
62-
rules.add(new SubordinationPreExtractor());
63-
64-
rules.add(new EnablementPostExtractor());
65-
rules.add(new SubordinationPostEnablementExtractor());
66-
rules.add(new SharedNPPostCoordinationExtractor());
67-
rules.add(new SharedNPPostParticipalExtractor());
68-
rules.add(new SubordinationPostISAExtractor());
69-
rules.add(new SubordinationPostISAExtractor2());
70-
rules.add(new SubordinationPostExtractor());
71-
72-
// should be applied last (because they dont allow further splitting)
73-
rules.add(new PreListNPExtractor());
74-
rules.add(new PostListNPExtractor());
75-
}
54+
private final List<ExtractionRule> rules;
7655

7756
private final Logger logger = LoggerFactory.getLogger(getClass());
57+
private final Config config;
7858
private Coordination discourseTree;
7959

80-
public DiscourseTreeCreator() {
60+
public DiscourseTreeCreator(Config config) {
61+
this.config = config;
62+
63+
// create rules from config
64+
this.rules = new ArrayList<>();
65+
for (String className : this.config.getStringList("rules")) {
66+
try {
67+
Class<?> clazz = Class.forName(className);
68+
Constructor<?> constructor = clazz.getConstructor();
69+
ExtractionRule rule = (ExtractionRule) constructor.newInstance();
70+
rules.add(rule);
71+
} catch (InstantiationException | InvocationTargetException | NoSuchMethodException | IllegalAccessException | ClassNotFoundException e) {
72+
logger.error("Failed to create instance of {}", className);
73+
throw new ConfigException.BadValue("rules." + className, "Failed to create instance.");
74+
}
75+
}
76+
8177
reset();
8278
}
8379

src/main/java/org/lambda3/text/simplification/discourse/tree/Relation.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ public enum Relation {
3838
ELABORATION,
3939
ENABLEMENT,
4040
EXPLANATION,
41-
JOINT_LIST,
41+
JOINT_CONJUNCTION,
4242
JOINT_DISJUNCTION,
4343
TEMPORAL_BEFORE,
4444
TEMPORAL_AFTER,
4545
TEMPORAL_SEQUENCE,
4646

4747
// special relations
4848
INTRA_SENT_ATTR,
49-
JOINT_NP_LIST,
49+
JOINT_NP_CONJUNCTION,
5050
JOINT_NP_DISJUNCTION;
5151

5252
static {

src/main/java/org/lambda3/text/simplification/discourse/tree/classification/SignalPhraseClassifier.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ public class SignalPhraseClassifier {
9595
new Mapping(Relation.EXPLANATION, "...indeed..."),
9696
new Mapping(Relation.EXPLANATION, "...so...that..."),
9797

98-
// JOINT_LIST
99-
new Mapping(Relation.JOINT_LIST, "...and..."),
98+
// JOINT_CONJUNCTION
99+
new Mapping(Relation.JOINT_CONJUNCTION, "...and..."),
100100

101101
// JOINT_DISJUNCTION
102102
new Mapping(Relation.JOINT_DISJUNCTION, "...or..."),

src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SharedNPPreParticipalExtractor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@ public Optional<Extraction> extract(Tree parseTree) {
5959
leftConstituentWords.addAll(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("np")));
6060
leftConstituentWords.addAll(getRephrasedParticipalS(matcher.getNode("np"), matcher.getNode("vp"), matcher.getNode("s"), matcher.getNode("vbgn")));
6161
leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, matcher.getNode("vp"), false));
62-
Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
62+
Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
6363

6464
// the right constituent
6565
List<Word> rightConstituentWords = new ArrayList<>();
6666
rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, matcher.getNode("s"), false));
6767
rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, matcher.getNode("s"), false));
68-
Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
68+
Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
6969

7070
// relation
7171
Relation relation = Relation.UNKNOWN_COORDINATION;

src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/utils/ListNPSplitter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ public static Optional<Result> splitList(Tree anchorTree, Tree np) {
204204
if (containsSemicolon) {
205205

206206
// check for conjunction with elements separated by ;
207-
Optional<Result> r = check(anchorTree, np, new ConjunctionLeafChecker("and"), new ValueLeafChecker(";"), Relation.JOINT_NP_LIST);
207+
Optional<Result> r = check(anchorTree, np, new ConjunctionLeafChecker("and"), new ValueLeafChecker(";"), Relation.JOINT_NP_CONJUNCTION);
208208
if (r.isPresent()) {
209209
return r;
210210
}
@@ -217,7 +217,7 @@ public static Optional<Result> splitList(Tree anchorTree, Tree np) {
217217
} else {
218218

219219
// check for conjunction with elements separated by ,
220-
Optional<Result> r = check(anchorTree, np, new ConjunctionLeafChecker("and"), new ValueLeafChecker(","), Relation.JOINT_NP_LIST);
220+
Optional<Result> r = check(anchorTree, np, new ConjunctionLeafChecker("and"), new ValueLeafChecker(","), Relation.JOINT_NP_CONJUNCTION);
221221
if (r.isPresent()) {
222222
return r;
223223
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : DiscourseTreeCreator
4+
*
5+
* Copyright © 2017 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.utils;
24+
25+
import com.typesafe.config.Config;
26+
import com.typesafe.config.ConfigRenderOptions;
27+
28+
/**
29+
*
30+
*/
31+
public class ConfigUtils {
32+
public static String prettyPrint(Config config) {
33+
return config == null
34+
? null
35+
: config.root().render(ConfigRenderOptions.concise().setFormatted(true));
36+
}
37+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
discourse-simplification {
2+
3+
}

0 commit comments

Comments
 (0)