diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java index dc331a63..ff1e5a44 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java @@ -124,12 +124,13 @@ public void setParameters(Parameters p) { public void setLossFlag() { lossFlag = true; } + public void unsetLossFlag() { - lossFlag=false; + lossFlag = false; } public void setCandidates(int a) { - candidates= a; + candidates = a; } /** Retrieves the parameters that are set in this learner. */ @@ -817,23 +818,24 @@ public void forget() { **/ public ScoreSet scores(Object example) { Object[] exampleArray = getExampleArray(example, false); - ScoreSet resultS = scores((int[])exampleArray[0], (double[])exampleArray[1]); + ScoreSet resultS = scores((int[]) exampleArray[0], (double[]) exampleArray[1]); if (!lossFlag) return resultS; else - return scoresAugmented(example,resultS); - } - - /** - * Update the score of each binary variable (label) based on the gold value of each example for that variable. - * When using a {@code SparseNetworkLearner} to keep the model there is an LTU for each label. - * If the gold is same as a specific label then its binary value for that label is 1 and the score for that label - * will be {@code oldScore - lossOffset}; otherwise it will be 0 and the score will be {@code oldScore + lossOffset}. - * - * @param example The object to make decisions about. - * @param resultS The original scores (see {@link #scores(Object)}). - * @return The augmented set of scores. - */ + return scoresAugmented(example, resultS); + } + + /** + * Update the score of each binary variable (label) based on the gold value of each example for + * that variable. When using a {@code SparseNetworkLearner} to keep the model there is an LTU + * for each label. If the gold is same as a specific label then its binary value for that label + * is 1 and the score for that label will be {@code oldScore - lossOffset}; otherwise it will be + * 0 and the score will be {@code oldScore + lossOffset}. + * + * @param example The object to make decisions about. + * @param resultS The original scores (see {@link #scores(Object)}). + * @return The augmented set of scores. + */ public ScoreSet scoresAugmented(Object example, ScoreSet resultS) { ScoreSet augmentedScores = new ScoreSet(); Lexicon lLexicon = getLabelLexicon(); @@ -843,7 +845,7 @@ public ScoreSet scoresAugmented(Object example, ScoreSet resultS) { double originalScore = resultS.getScore(candidate).score; double lossOffset = 1 / (double) (candidates); if (candidate.equals(gold)) - augmentedScores.put(candidate, originalScore - lossOffset); + augmentedScores.put(candidate, originalScore - lossOffset); else augmentedScores.put(candidate, originalScore + lossOffset); } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java index fc29e2ba..8f5b6507 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java @@ -8,7 +8,7 @@ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.PrintStream; - +import java.util.Objects; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature; @@ -40,6 +40,8 @@ public class StochasticGradientDescent extends Learner { public static final double defaultLearningRate = 0.1; /** Default for {@link #weightVector}. */ public static final SparseWeightVector defaultWeightVector = new SparseWeightVector(); + /** Default loss function */ + public static final String defaultLossFunction = "lms"; /** The hypothesis vector; default {@link #defaultWeightVector}. */ @@ -52,7 +54,14 @@ public class StochasticGradientDescent extends Learner { * The rate at which weights are updated; default {@link #defaultLearningRate}. **/ protected double learningRate; - + /** + * The name of the loss function + */ + protected String lossFunction; + /** + * Boolean flag for loss function + */ + private boolean isLMS; /** * The learning rate takes the default value, while the name of the classifier gets the empty @@ -125,6 +134,15 @@ public StochasticGradientDescent(String n, Parameters p) { public void setParameters(Parameters p) { weightVector = p.weightVector; learningRate = p.learningRate; + lossFunction = p.lossFunction; + if (Objects.equals(p.lossFunction, "lms")) { + isLMS = true; + } else if (Objects.equals(p.lossFunction, "hinge")) { + isLMS = false; + } else { + System.out.println("Undefined loss function! lms or hinge"); + System.exit(-1); + } } @@ -161,6 +179,10 @@ public void setLearningRate(double t) { learningRate = t; } + public String getLossFunction() { + return lossFunction; + } + /** Resets the weight vector to all zeros. */ public void forget() { @@ -193,11 +215,19 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa assert exampleLabels.length == 1 : "Example must have a single label."; double labelValue = labelValues[0]; - double multiplier = - learningRate - * (labelValue - weightVector.dot(exampleFeatures, exampleValues) - bias); - weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); - bias += multiplier; + double wtx = weightVector.dot(exampleFeatures, exampleValues) + bias; + + if (isLMS) { + double multiplier = learningRate * (labelValue - wtx); + weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); + bias += multiplier; + } else { + if (labelValue * wtx <= 1) { + double multiplier = learningRate * labelValue; + weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); + bias += multiplier; + } + } } @@ -326,12 +356,17 @@ public static class Parameters extends Learner.Parameters { * The rate at which weights are updated; default {@link #defaultLearningRate}. **/ public double learningRate; + /** + * This name of the loss function + */ + public String lossFunction; /** Sets all the default values. */ public Parameters() { weightVector = (SparseWeightVector) defaultWeightVector.clone(); learningRate = defaultLearningRate; + lossFunction = defaultLossFunction; } @@ -343,6 +378,7 @@ public Parameters(Learner.Parameters p) { super(p); weightVector = (SparseWeightVector) defaultWeightVector.clone(); learningRate = defaultLearningRate; + lossFunction = defaultLossFunction; } @@ -351,6 +387,7 @@ public Parameters(Parameters p) { super(p); weightVector = p.weightVector; learningRate = p.learningRate; + lossFunction = p.lossFunction; } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java new file mode 100644 index 00000000..298103d1 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java @@ -0,0 +1,421 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn; + +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; +import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; +import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; +import java.io.PrintStream; +import java.util.Objects; + +/** + * Stochastic Gradient Descent learning algorithm for classification + * + * There are two user-configurable loss functions: hinge, least mean square. Default is least mean + * square, "lms". + * + * @author Yiming Jiang + */ +public class StochasticGradientDescentCL extends LinearThresholdUnit { + /** Default value for {@link #learningRate}. */ + public static final double defaultLearningRate = 0.1; + /** Default for {@link #weightVector}. */ + public static final SparseWeightVector defaultWeightVector = new SparseWeightVector(); + /** Default loss function */ + public static final String defaultLossFunction = "lms"; + + + /** The hypothesis vector; default {@link #defaultWeightVector}. */ + protected SparseWeightVector weightVector; + /** + * The bias is stored here rather than as an element of the weight vector. + **/ + protected double bias; + /** + * The rate at which weights are updated; default {@link #defaultLearningRate}. + **/ + protected double learningRate; + /** + * The name of the loss function + */ + protected String lossFunction; + /** + * Boolean flag for loss function + */ + protected boolean isLMS; + + /** + * The learning rate takes the default value, while the name of the classifier gets the empty + * string. + **/ + public StochasticGradientDescentCL() { + this(""); + } + + /** + * Sets the learning rate to the specified value, while the name of the classifier gets the + * empty string. + * + * @param r The desired learning rate value. + **/ + public StochasticGradientDescentCL(double r) { + this("", r); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link StochasticGradientDescent.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public StochasticGradientDescentCL(Parameters p) { + this("", p); + } + + /** + * The learning rate takes the default value. + * + * @param n The name of the classifier. + **/ + public StochasticGradientDescentCL(String n) { + this(n, defaultLearningRate); + } + + /** + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + **/ + public StochasticGradientDescentCL(String n, double r) { + super(n); + Parameters p = new Parameters(); + p.learningRate = r; + setParameters(p); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link StochasticGradientDescent.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public StochasticGradientDescentCL(String n, Parameters p) { + super(n); + setParameters(p); + } + + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + weightVector = p.weightVector; + learningRate = p.learningRate; + lossFunction = p.lossFunction; + if (Objects.equals(p.lossFunction, "lms")) { + isLMS = true; + } else if (Objects.equals(p.lossFunction, "hinge")) { + isLMS = false; + } else { + System.out.println("Undefined loss function! lms or hinge"); + System.exit(-1); + } + } + + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.weightVector = weightVector; + p.learningRate = learningRate; + return p; + } + + + /** + * Returns the current value of the {@link #learningRate} variable. + * + * @return The value of the {@link #learningRate} variable. + **/ + public double getLearningRate() { + return learningRate; + } + + + /** + * Sets the {@link #learningRate} member variable to the specified value. + * + * @param t The new value for {@link #learningRate}. + **/ + public void setLearningRate(double t) { + learningRate = t; + } + + public String getLossFunction() { + return lossFunction; + } + + + /** Resets the weight vector to all zeros. */ + public void forget() { + super.forget(); + weightVector = weightVector.emptyClone(); + bias = 0; + } + + /** Inherited unused method from LTU class */ + @Override + public void promote(int[] exampleFeatures, double[] exampleValues, double rate) { + + } + + /** Inherited unused method from LTU class */ + @Override + public void demote(int[] exampleFeatures, double[] exampleValues, double rate) { + + } + + + /** + * Trains the learning algorithm given an object as an example. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + assert exampleLabels.length == 1 : "Example must have a single label."; + + double labelValue = 1; + if (exampleLabels[0] == 1) { + labelValue = 1; + } else if (exampleLabels[0] == 0) { + labelValue = -1; + } + + double wtx = weightVector.dot(exampleFeatures, exampleValues) + bias; + + learnUpdate(exampleFeatures, exampleValues, labelValue, wtx); + } + + void learnUpdate(int[] exampleFeatures, double[] exampleValues, double labelValue, double wtx) { + if (isLMS) { + double multiplier = learningRate * (labelValue - wtx); + weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); + bias += multiplier; + } else { + if (labelValue * wtx <= 1) { + double multiplier = learningRate * labelValue; + weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); + bias += multiplier; + } + } + } + + + /** + * Since this algorithm returns a real feature, it does not return scores. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return null + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + return null; + } + + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + int index = score(f, v) >= 0 ? 1 : 0; + return predictions.get(index); + } + + + /** + * Simply computes the dot product of the weight vector and the example + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed real value. + **/ + public double score(int[] exampleFeatures, double[] exampleValues) { + return weightVector.dot(exampleFeatures, exampleValues) + bias; + } + + + /** + * Simply computes the dot product of the weight vector and the feature vector extracted from + * the example object. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed feature (in a vector). + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name + ": " + learningRate + ", " + bias); + if (lexicon.size() == 0) + weightVector.write(out); + else + weightVector.write(out, lexicon); + } + + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(learningRate); + out.writeDouble(bias); + weightVector.write(out); + } + + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + learningRate = in.readDouble(); + bias = in.readDouble(); + weightVector = SparseWeightVector.readWeightVector(in); + } + + + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + StochasticGradientDescentCL clone = null; + + try { + clone = (StochasticGradientDescentCL) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning StochasticGradientDescentCL: " + e); + System.exit(1); + } + + clone.weightVector = (SparseWeightVector) weightVector.clone(); + return clone; + } + + + /** + * Simply a container for all of {@link StochasticGradientDescent}'s configurable parameters. + * Using instances of this class should make code more readable and constructors less + * complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends Learner.Parameters { + /** + * The hypothesis vector; default {@link StochasticGradientDescent#defaultWeightVector}. + **/ + public SparseWeightVector weightVector; + /** + * The rate at which weights are updated; default {@link #defaultLearningRate}. + **/ + public double learningRate; + /** + * This name of the loss function + */ + public String lossFunction; + + + /** Sets all the default values. */ + public Parameters() { + weightVector = (SparseWeightVector) defaultWeightVector.clone(); + learningRate = defaultLearningRate; + lossFunction = defaultLossFunction; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + weightVector = (SparseWeightVector) defaultWeightVector.clone(); + learningRate = defaultLearningRate; + lossFunction = defaultLossFunction; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + weightVector = p.weightVector; + learningRate = p.learningRate; + lossFunction = p.lossFunction; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((StochasticGradientDescentCL) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (learningRate != StochasticGradientDescentCL.defaultLearningRate) + result += ", learningRate = " + learningRate; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } + } +}