diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
index dc331a63..ff1e5a44 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
@@ -124,12 +124,13 @@ public void setParameters(Parameters p) {
public void setLossFlag() {
lossFlag = true;
}
+
public void unsetLossFlag() {
- lossFlag=false;
+ lossFlag = false;
}
public void setCandidates(int a) {
- candidates= a;
+ candidates = a;
}
/** Retrieves the parameters that are set in this learner. */
@@ -817,23 +818,24 @@ public void forget() {
**/
public ScoreSet scores(Object example) {
Object[] exampleArray = getExampleArray(example, false);
- ScoreSet resultS = scores((int[])exampleArray[0], (double[])exampleArray[1]);
+ ScoreSet resultS = scores((int[]) exampleArray[0], (double[]) exampleArray[1]);
if (!lossFlag)
return resultS;
else
- return scoresAugmented(example,resultS);
- }
-
- /**
- * Update the score of each binary variable (label) based on the gold value of each example for that variable.
- * When using a {@code SparseNetworkLearner} to keep the model there is an LTU for each label.
- * If the gold is same as a specific label then its binary value for that label is 1 and the score for that label
- * will be {@code oldScore - lossOffset}; otherwise it will be 0 and the score will be {@code oldScore + lossOffset}.
- *
- * @param example The object to make decisions about.
- * @param resultS The original scores (see {@link #scores(Object)}).
- * @return The augmented set of scores.
- */
+ return scoresAugmented(example, resultS);
+ }
+
+ /**
+ * Update the score of each binary variable (label) based on the gold value of each example for
+ * that variable. When using a {@code SparseNetworkLearner} to keep the model there is an LTU
+ * for each label. If the gold is same as a specific label then its binary value for that label
+ * is 1 and the score for that label will be {@code oldScore - lossOffset}; otherwise it will be
+ * 0 and the score will be {@code oldScore + lossOffset}.
+ *
+ * @param example The object to make decisions about.
+ * @param resultS The original scores (see {@link #scores(Object)}).
+ * @return The augmented set of scores.
+ */
public ScoreSet scoresAugmented(Object example, ScoreSet resultS) {
ScoreSet augmentedScores = new ScoreSet();
Lexicon lLexicon = getLabelLexicon();
@@ -843,7 +845,7 @@ public ScoreSet scoresAugmented(Object example, ScoreSet resultS) {
double originalScore = resultS.getScore(candidate).score;
double lossOffset = 1 / (double) (candidates);
if (candidate.equals(gold))
- augmentedScores.put(candidate, originalScore - lossOffset);
+ augmentedScores.put(candidate, originalScore - lossOffset);
else
augmentedScores.put(candidate, originalScore + lossOffset);
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java
index fc29e2ba..8f5b6507 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java
@@ -8,7 +8,7 @@
package edu.illinois.cs.cogcomp.lbjava.learn;
import java.io.PrintStream;
-
+import java.util.Objects;
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature;
@@ -40,6 +40,8 @@ public class StochasticGradientDescent extends Learner {
public static final double defaultLearningRate = 0.1;
/** Default for {@link #weightVector}. */
public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
+ /** Default loss function */
+ public static final String defaultLossFunction = "lms";
/** The hypothesis vector; default {@link #defaultWeightVector}. */
@@ -52,7 +54,14 @@ public class StochasticGradientDescent extends Learner {
* The rate at which weights are updated; default {@link #defaultLearningRate}.
**/
protected double learningRate;
-
+ /**
+ * The name of the loss function
+ */
+ protected String lossFunction;
+ /**
+ * Boolean flag for loss function
+ */
+ private boolean isLMS;
/**
* The learning rate takes the default value, while the name of the classifier gets the empty
@@ -125,6 +134,15 @@ public StochasticGradientDescent(String n, Parameters p) {
public void setParameters(Parameters p) {
weightVector = p.weightVector;
learningRate = p.learningRate;
+ lossFunction = p.lossFunction;
+ if (Objects.equals(p.lossFunction, "lms")) {
+ isLMS = true;
+ } else if (Objects.equals(p.lossFunction, "hinge")) {
+ isLMS = false;
+ } else {
+ System.out.println("Undefined loss function! lms or hinge");
+ System.exit(-1);
+ }
}
@@ -161,6 +179,10 @@ public void setLearningRate(double t) {
learningRate = t;
}
+ public String getLossFunction() {
+ return lossFunction;
+ }
+
/** Resets the weight vector to all zeros. */
public void forget() {
@@ -193,11 +215,19 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa
assert exampleLabels.length == 1 : "Example must have a single label.";
double labelValue = labelValues[0];
- double multiplier =
- learningRate
- * (labelValue - weightVector.dot(exampleFeatures, exampleValues) - bias);
- weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
- bias += multiplier;
+ double wtx = weightVector.dot(exampleFeatures, exampleValues) + bias;
+
+ if (isLMS) {
+ double multiplier = learningRate * (labelValue - wtx);
+ weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+ bias += multiplier;
+ } else {
+ if (labelValue * wtx <= 1) {
+ double multiplier = learningRate * labelValue;
+ weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+ bias += multiplier;
+ }
+ }
}
@@ -326,12 +356,17 @@ public static class Parameters extends Learner.Parameters {
* The rate at which weights are updated; default {@link #defaultLearningRate}.
**/
public double learningRate;
+ /**
+ * This name of the loss function
+ */
+ public String lossFunction;
/** Sets all the default values. */
public Parameters() {
weightVector = (SparseWeightVector) defaultWeightVector.clone();
learningRate = defaultLearningRate;
+ lossFunction = defaultLossFunction;
}
@@ -343,6 +378,7 @@ public Parameters(Learner.Parameters p) {
super(p);
weightVector = (SparseWeightVector) defaultWeightVector.clone();
learningRate = defaultLearningRate;
+ lossFunction = defaultLossFunction;
}
@@ -351,6 +387,7 @@ public Parameters(Parameters p) {
super(p);
weightVector = p.weightVector;
learningRate = p.learningRate;
+ lossFunction = p.lossFunction;
}
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java
new file mode 100644
index 00000000..298103d1
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java
@@ -0,0 +1,421 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
+import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream;
+import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream;
+import java.io.PrintStream;
+import java.util.Objects;
+
+/**
+ * Stochastic Gradient Descent learning algorithm for classification
+ *
+ * There are two user-configurable loss functions: hinge, least mean square. Default is least mean
+ * square, "lms".
+ *
+ * @author Yiming Jiang
+ */
+public class StochasticGradientDescentCL extends LinearThresholdUnit {
+ /** Default value for {@link #learningRate}. */
+ public static final double defaultLearningRate = 0.1;
+ /** Default for {@link #weightVector}. */
+ public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
+ /** Default loss function */
+ public static final String defaultLossFunction = "lms";
+
+
+ /** The hypothesis vector; default {@link #defaultWeightVector}. */
+ protected SparseWeightVector weightVector;
+ /**
+ * The bias is stored here rather than as an element of the weight vector.
+ **/
+ protected double bias;
+ /**
+ * The rate at which weights are updated; default {@link #defaultLearningRate}.
+ **/
+ protected double learningRate;
+ /**
+ * The name of the loss function
+ */
+ protected String lossFunction;
+ /**
+ * Boolean flag for loss function
+ */
+ protected boolean isLMS;
+
+ /**
+ * The learning rate takes the default value, while the name of the classifier gets the empty
+ * string.
+ **/
+ public StochasticGradientDescentCL() {
+ this("");
+ }
+
+ /**
+ * Sets the learning rate to the specified value, while the name of the classifier gets the
+ * empty string.
+ *
+ * @param r The desired learning rate value.
+ **/
+ public StochasticGradientDescentCL(double r) {
+ this("", r);
+ }
+
+ /**
+ * Initializing constructor. Sets all member variables to their associated settings in the
+ * {@link StochasticGradientDescent.Parameters} object.
+ *
+ * @param p The settings of all parameters.
+ **/
+ public StochasticGradientDescentCL(Parameters p) {
+ this("", p);
+ }
+
+ /**
+ * The learning rate takes the default value.
+ *
+ * @param n The name of the classifier.
+ **/
+ public StochasticGradientDescentCL(String n) {
+ this(n, defaultLearningRate);
+ }
+
+ /**
+ * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}.
+ *
+ * @param n The name of the classifier.
+ * @param r The desired learning rate value.
+ **/
+ public StochasticGradientDescentCL(String n, double r) {
+ super(n);
+ Parameters p = new Parameters();
+ p.learningRate = r;
+ setParameters(p);
+ }
+
+ /**
+ * Initializing constructor. Sets all member variables to their associated settings in the
+ * {@link StochasticGradientDescent.Parameters} object.
+ *
+ * @param n The name of the classifier.
+ * @param p The settings of all parameters.
+ **/
+ public StochasticGradientDescentCL(String n, Parameters p) {
+ super(n);
+ setParameters(p);
+ }
+
+
+ /**
+ * Sets the values of parameters that control the behavior of this learning algorithm.
+ *
+ * @param p The parameters.
+ **/
+ public void setParameters(Parameters p) {
+ weightVector = p.weightVector;
+ learningRate = p.learningRate;
+ lossFunction = p.lossFunction;
+ if (Objects.equals(p.lossFunction, "lms")) {
+ isLMS = true;
+ } else if (Objects.equals(p.lossFunction, "hinge")) {
+ isLMS = false;
+ } else {
+ System.out.println("Undefined loss function! lms or hinge");
+ System.exit(-1);
+ }
+ }
+
+
+ /**
+ * Retrieves the parameters that are set in this learner.
+ *
+ * @return An object containing all the values of the parameters that control the behavior of
+ * this learning algorithm.
+ **/
+ public Learner.Parameters getParameters() {
+ Parameters p = new Parameters(super.getParameters());
+ p.weightVector = weightVector;
+ p.learningRate = learningRate;
+ return p;
+ }
+
+
+ /**
+ * Returns the current value of the {@link #learningRate} variable.
+ *
+ * @return The value of the {@link #learningRate} variable.
+ **/
+ public double getLearningRate() {
+ return learningRate;
+ }
+
+
+ /**
+ * Sets the {@link #learningRate} member variable to the specified value.
+ *
+ * @param t The new value for {@link #learningRate}.
+ **/
+ public void setLearningRate(double t) {
+ learningRate = t;
+ }
+
+ public String getLossFunction() {
+ return lossFunction;
+ }
+
+
+ /** Resets the weight vector to all zeros. */
+ public void forget() {
+ super.forget();
+ weightVector = weightVector.emptyClone();
+ bias = 0;
+ }
+
+ /** Inherited unused method from LTU class */
+ @Override
+ public void promote(int[] exampleFeatures, double[] exampleValues, double rate) {
+
+ }
+
+ /** Inherited unused method from LTU class */
+ @Override
+ public void demote(int[] exampleFeatures, double[] exampleValues, double rate) {
+
+ }
+
+
+ /**
+ * Trains the learning algorithm given an object as an example.
+ *
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @param exampleLabels The example's label(s).
+ * @param labelValues The labels' values.
+ **/
+ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
+ double[] labelValues) {
+ assert exampleLabels.length == 1 : "Example must have a single label.";
+
+ double labelValue = 1;
+ if (exampleLabels[0] == 1) {
+ labelValue = 1;
+ } else if (exampleLabels[0] == 0) {
+ labelValue = -1;
+ }
+
+ double wtx = weightVector.dot(exampleFeatures, exampleValues) + bias;
+
+ learnUpdate(exampleFeatures, exampleValues, labelValue, wtx);
+ }
+
+ void learnUpdate(int[] exampleFeatures, double[] exampleValues, double labelValue, double wtx) {
+ if (isLMS) {
+ double multiplier = learningRate * (labelValue - wtx);
+ weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+ bias += multiplier;
+ } else {
+ if (labelValue * wtx <= 1) {
+ double multiplier = learningRate * labelValue;
+ weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+ bias += multiplier;
+ }
+ }
+ }
+
+
+ /**
+ * Since this algorithm returns a real feature, it does not return scores.
+ *
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @return null
+ **/
+ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
+ return null;
+ }
+
+
+ /**
+ * Returns the classification of the given example as a single feature instead of a
+ * {@link FeatureVector}.
+ *
+ * @param f The features array.
+ * @param v The values array.
+ * @return The classification of the example as a feature.
+ **/
+ public Feature featureValue(int[] f, double[] v) {
+ int index = score(f, v) >= 0 ? 1 : 0;
+ return predictions.get(index);
+ }
+
+
+ /**
+ * Simply computes the dot product of the weight vector and the example
+ *
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @return The computed real value.
+ **/
+ public double score(int[] exampleFeatures, double[] exampleValues) {
+ return weightVector.dot(exampleFeatures, exampleValues) + bias;
+ }
+
+
+ /**
+ * Simply computes the dot product of the weight vector and the feature vector extracted from
+ * the example object.
+ *
+ * @param exampleFeatures The example's array of feature indices.
+ * @param exampleValues The example's array of feature values.
+ * @return The computed feature (in a vector).
+ **/
+ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
+ return new FeatureVector(featureValue(exampleFeatures, exampleValues));
+ }
+
+
+ /**
+ * Writes the algorithm's internal representation as text. In the first line of output, the name
+ * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}.
+ *
+ * @param out The output stream.
+ **/
+ public void write(PrintStream out) {
+ out.println(name + ": " + learningRate + ", " + bias);
+ if (lexicon.size() == 0)
+ weightVector.write(out);
+ else
+ weightVector.write(out, lexicon);
+ }
+
+
+ /**
+ * Writes the learned function's internal representation in binary form.
+ *
+ * @param out The output stream.
+ **/
+ public void write(ExceptionlessOutputStream out) {
+ super.write(out);
+ out.writeDouble(learningRate);
+ out.writeDouble(bias);
+ weightVector.write(out);
+ }
+
+
+ /**
+ * Reads the binary representation of a learner with this object's run-time type, overwriting
+ * any and all learned or manually specified parameters as well as the label lexicon but without
+ * modifying the feature lexicon.
+ *
+ * @param in The input stream.
+ **/
+ public void read(ExceptionlessInputStream in) {
+ super.read(in);
+ learningRate = in.readDouble();
+ bias = in.readDouble();
+ weightVector = SparseWeightVector.readWeightVector(in);
+ }
+
+
+ /** Returns a deep clone of this learning algorithm. */
+ public Object clone() {
+ StochasticGradientDescentCL clone = null;
+
+ try {
+ clone = (StochasticGradientDescentCL) super.clone();
+ } catch (Exception e) {
+ System.err.println("Error cloning StochasticGradientDescentCL: " + e);
+ System.exit(1);
+ }
+
+ clone.weightVector = (SparseWeightVector) weightVector.clone();
+ return clone;
+ }
+
+
+ /**
+ * Simply a container for all of {@link StochasticGradientDescent}'s configurable parameters.
+ * Using instances of this class should make code more readable and constructors less
+ * complicated.
+ *
+ * @author Nick Rizzolo
+ **/
+ public static class Parameters extends Learner.Parameters {
+ /**
+ * The hypothesis vector; default {@link StochasticGradientDescent#defaultWeightVector}.
+ **/
+ public SparseWeightVector weightVector;
+ /**
+ * The rate at which weights are updated; default {@link #defaultLearningRate}.
+ **/
+ public double learningRate;
+ /**
+ * This name of the loss function
+ */
+ public String lossFunction;
+
+
+ /** Sets all the default values. */
+ public Parameters() {
+ weightVector = (SparseWeightVector) defaultWeightVector.clone();
+ learningRate = defaultLearningRate;
+ lossFunction = defaultLossFunction;
+ }
+
+
+ /**
+ * Sets the parameters from the parent's parameters object, giving defaults to all
+ * parameters declared in this object.
+ **/
+ public Parameters(Learner.Parameters p) {
+ super(p);
+ weightVector = (SparseWeightVector) defaultWeightVector.clone();
+ learningRate = defaultLearningRate;
+ lossFunction = defaultLossFunction;
+ }
+
+
+ /** Copy constructor. */
+ public Parameters(Parameters p) {
+ super(p);
+ weightVector = p.weightVector;
+ learningRate = p.learningRate;
+ lossFunction = p.lossFunction;
+ }
+
+
+ /**
+ * Calls the appropriate Learner.setParameters(Parameters)
method for this
+ * Parameters
object.
+ *
+ * @param l The learner whose parameters will be set.
+ **/
+ public void setParameters(Learner l) {
+ ((StochasticGradientDescentCL) l).setParameters(this);
+ }
+
+
+ /**
+ * Creates a string representation of these parameters in which only those parameters that
+ * differ from their default values are mentioned.
+ **/
+ public String nonDefaultString() {
+ String result = super.nonDefaultString();
+
+ if (learningRate != StochasticGradientDescentCL.defaultLearningRate)
+ result += ", learningRate = " + learningRate;
+
+ if (result.startsWith(", "))
+ result = result.substring(2);
+ return result;
+ }
+ }
+}