Skip to content

Updated SGD Regression & Classification with user-configurable loss functions #76

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,13 @@ public void setParameters(Parameters p) {
public void setLossFlag() {
lossFlag = true;
}

public void unsetLossFlag() {
lossFlag=false;
lossFlag = false;
}

public void setCandidates(int a) {
candidates= a;
candidates = a;
}

/** Retrieves the parameters that are set in this learner. */
Expand Down Expand Up @@ -817,23 +818,24 @@ public void forget() {
**/
public ScoreSet scores(Object example) {
Object[] exampleArray = getExampleArray(example, false);
ScoreSet resultS = scores((int[])exampleArray[0], (double[])exampleArray[1]);
ScoreSet resultS = scores((int[]) exampleArray[0], (double[]) exampleArray[1]);
if (!lossFlag)
return resultS;
else
return scoresAugmented(example,resultS);
}

/**
* Update the score of each binary variable (label) based on the gold value of each example for that variable.
* When using a {@code SparseNetworkLearner} to keep the model there is an LTU for each label.
* If the gold is same as a specific label then its binary value for that label is 1 and the score for that label
* will be {@code oldScore - lossOffset}; otherwise it will be 0 and the score will be {@code oldScore + lossOffset}.
*
* @param example The object to make decisions about.
* @param resultS The original scores (see {@link #scores(Object)}).
* @return The augmented set of scores.
*/
return scoresAugmented(example, resultS);
}

/**
* Update the score of each binary variable (label) based on the gold value of each example for
* that variable. When using a {@code SparseNetworkLearner} to keep the model there is an LTU
* for each label. If the gold is same as a specific label then its binary value for that label
* is 1 and the score for that label will be {@code oldScore - lossOffset}; otherwise it will be
* 0 and the score will be {@code oldScore + lossOffset}.
*
* @param example The object to make decisions about.
* @param resultS The original scores (see {@link #scores(Object)}).
* @return The augmented set of scores.
*/
public ScoreSet scoresAugmented(Object example, ScoreSet resultS) {
ScoreSet augmentedScores = new ScoreSet();
Lexicon lLexicon = getLabelLexicon();
Expand All @@ -843,7 +845,7 @@ public ScoreSet scoresAugmented(Object example, ScoreSet resultS) {
double originalScore = resultS.getScore(candidate).score;
double lossOffset = 1 / (double) (candidates);
if (candidate.equals(gold))
augmentedScores.put(candidate, originalScore - lossOffset);
augmentedScores.put(candidate, originalScore - lossOffset);
else
augmentedScores.put(candidate, originalScore + lossOffset);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
package edu.illinois.cs.cogcomp.lbjava.learn;

import java.io.PrintStream;

import java.util.Objects;
import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature;
Expand Down Expand Up @@ -40,6 +40,8 @@ public class StochasticGradientDescent extends Learner {
public static final double defaultLearningRate = 0.1;
/** Default for {@link #weightVector}. */
public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
/** Default loss function */
public static final String defaultLossFunction = "lms";


/** The hypothesis vector; default {@link #defaultWeightVector}. */
Expand All @@ -52,7 +54,14 @@ public class StochasticGradientDescent extends Learner {
* The rate at which weights are updated; default {@link #defaultLearningRate}.
**/
protected double learningRate;

/**
* The name of the loss function
*/
protected String lossFunction;
/**
* Boolean flag for loss function
*/
private boolean isLMS;

/**
* The learning rate takes the default value, while the name of the classifier gets the empty
Expand Down Expand Up @@ -125,6 +134,15 @@ public StochasticGradientDescent(String n, Parameters p) {
public void setParameters(Parameters p) {
weightVector = p.weightVector;
learningRate = p.learningRate;
lossFunction = p.lossFunction;
if (Objects.equals(p.lossFunction, "lms")) {
isLMS = true;
} else if (Objects.equals(p.lossFunction, "hinge")) {
isLMS = false;
} else {
System.out.println("Undefined loss function! lms or hinge");
System.exit(-1);
}
}


Expand Down Expand Up @@ -161,6 +179,10 @@ public void setLearningRate(double t) {
learningRate = t;
}

public String getLossFunction() {
return lossFunction;
}


/** Resets the weight vector to all zeros. */
public void forget() {
Expand Down Expand Up @@ -193,11 +215,19 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa
assert exampleLabels.length == 1 : "Example must have a single label.";

double labelValue = labelValues[0];
double multiplier =
learningRate
* (labelValue - weightVector.dot(exampleFeatures, exampleValues) - bias);
weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
bias += multiplier;
double wtx = weightVector.dot(exampleFeatures, exampleValues) + bias;

if (isLMS) {
double multiplier = learningRate * (labelValue - wtx);
weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
bias += multiplier;
} else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm ... not sure about this. Hinge loss usually makes more sense in classification problems.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can remove the configuration, using hinge loss in SGD classification and lms in SGD regression. Is it better?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's better.

Even better than that is: having a single SGD which works for both classification and regression, and the user has the option of setting either LMS or Hinge, depending on what they need (classification/regression/etc).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can get the first part working soon. The latter may need further deliberation. Thanks!

if (labelValue * wtx <= 1) {
double multiplier = learningRate * labelValue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now it's ok, but as I suggested earlier the implementation of loss functions can be more general. You can define a trait/abstract class for loss functions, with functions to get the value and gradient, given an input and output label; and implement it for each instance of the loss function, e.g. LMS, hinge, etc.

weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
bias += multiplier;
}
}
}


Expand Down Expand Up @@ -326,12 +356,17 @@ public static class Parameters extends Learner.Parameters {
* The rate at which weights are updated; default {@link #defaultLearningRate}.
**/
public double learningRate;
/**
* This name of the loss function
*/
public String lossFunction;


/** Sets all the default values. */
public Parameters() {
weightVector = (SparseWeightVector) defaultWeightVector.clone();
learningRate = defaultLearningRate;
lossFunction = defaultLossFunction;
}


Expand All @@ -343,6 +378,7 @@ public Parameters(Learner.Parameters p) {
super(p);
weightVector = (SparseWeightVector) defaultWeightVector.clone();
learningRate = defaultLearningRate;
lossFunction = defaultLossFunction;
}


Expand All @@ -351,6 +387,7 @@ public Parameters(Parameters p) {
super(p);
weightVector = p.weightVector;
learningRate = p.learningRate;
lossFunction = p.lossFunction;
}


Expand Down
Loading