diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
index dc331a63..ff1e5a44 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java
@@ -124,12 +124,13 @@ public void setParameters(Parameters p) {
     public void setLossFlag() {
         lossFlag = true;
     }
+
     public void unsetLossFlag() {
-        lossFlag=false;
+        lossFlag = false;
     }
 
     public void setCandidates(int a) {
-        candidates= a;
+        candidates = a;
     }
 
     /** Retrieves the parameters that are set in this learner. */
@@ -817,23 +818,24 @@ public void forget() {
      **/
     public ScoreSet scores(Object example) {
         Object[] exampleArray = getExampleArray(example, false);
-        ScoreSet resultS = scores((int[])exampleArray[0], (double[])exampleArray[1]);
+        ScoreSet resultS = scores((int[]) exampleArray[0], (double[]) exampleArray[1]);
         if (!lossFlag)
             return resultS;
         else
-            return scoresAugmented(example,resultS);
-    }
-
-  /**
-   * Update the score of each binary variable (label) based on the gold value of each example for that variable.
-   * When using a {@code SparseNetworkLearner} to keep the model there is an LTU for each label.
-   * If the gold is same as a specific label then its binary value for that label is 1 and the score for that label
-   * will be {@code oldScore - lossOffset}; otherwise it will be 0 and the score will be {@code oldScore + lossOffset}.
-   *
-   * @param example The object to make decisions about.
-   * @param resultS The original scores (see {@link #scores(Object)}).
-   * @return The augmented set of scores.
-   */
+            return scoresAugmented(example, resultS);
+    }
+
+    /**
+     * Update the score of each binary variable (label) based on the gold value of each example for
+     * that variable. When using a {@code SparseNetworkLearner} to keep the model there is an LTU
+     * for each label. If the gold is same as a specific label then its binary value for that label
+     * is 1 and the score for that label will be {@code oldScore - lossOffset}; otherwise it will be
+     * 0 and the score will be {@code oldScore + lossOffset}.
+     *
+     * @param example The object to make decisions about.
+     * @param resultS The original scores (see {@link #scores(Object)}).
+     * @return The augmented set of scores.
+     */
     public ScoreSet scoresAugmented(Object example, ScoreSet resultS) {
         ScoreSet augmentedScores = new ScoreSet();
         Lexicon lLexicon = getLabelLexicon();
@@ -843,7 +845,7 @@ public ScoreSet scoresAugmented(Object example, ScoreSet resultS) {
             double originalScore = resultS.getScore(candidate).score;
             double lossOffset = 1 / (double) (candidates);
             if (candidate.equals(gold))
-                augmentedScores.put(candidate,  originalScore - lossOffset);
+                augmentedScores.put(candidate, originalScore - lossOffset);
             else
                 augmentedScores.put(candidate, originalScore + lossOffset);
         }
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java
index fc29e2ba..8f5b6507 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java
@@ -8,7 +8,7 @@
 package edu.illinois.cs.cogcomp.lbjava.learn;
 
 import java.io.PrintStream;
-
+import java.util.Objects;
 import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
 import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
 import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature;
@@ -40,6 +40,8 @@ public class StochasticGradientDescent extends Learner {
     public static final double defaultLearningRate = 0.1;
     /** Default for {@link #weightVector}. */
     public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
+    /** Default loss function */
+    public static final String defaultLossFunction = "lms";
 
 
     /** The hypothesis vector; default {@link #defaultWeightVector}. */
@@ -52,7 +54,14 @@ public class StochasticGradientDescent extends Learner {
      * The rate at which weights are updated; default {@link #defaultLearningRate}.
      **/
     protected double learningRate;
-
+    /**
+     * The name of the loss function
+     */
+    protected String lossFunction;
+    /**
+     * Boolean flag for loss function
+     */
+    private boolean isLMS;
 
     /**
      * The learning rate takes the default value, while the name of the classifier gets the empty
@@ -125,6 +134,15 @@ public StochasticGradientDescent(String n, Parameters p) {
     public void setParameters(Parameters p) {
         weightVector = p.weightVector;
         learningRate = p.learningRate;
+        lossFunction = p.lossFunction;
+        if (Objects.equals(p.lossFunction, "lms")) {
+            isLMS = true;
+        } else if (Objects.equals(p.lossFunction, "hinge")) {
+            isLMS = false;
+        } else {
+            System.out.println("Undefined loss function! lms or hinge");
+            System.exit(-1);
+        }
     }
 
 
@@ -161,6 +179,10 @@ public void setLearningRate(double t) {
         learningRate = t;
     }
 
+    public String getLossFunction() {
+        return lossFunction;
+    }
+
 
     /** Resets the weight vector to all zeros. */
     public void forget() {
@@ -193,11 +215,19 @@ public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLa
         assert exampleLabels.length == 1 : "Example must have a single label.";
 
         double labelValue = labelValues[0];
-        double multiplier =
-                learningRate
-                        * (labelValue - weightVector.dot(exampleFeatures, exampleValues) - bias);
-        weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
-        bias += multiplier;
+        double wtx = weightVector.dot(exampleFeatures, exampleValues) + bias;
+
+        if (isLMS) {
+            double multiplier = learningRate * (labelValue - wtx);
+            weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+            bias += multiplier;
+        } else {
+            if (labelValue * wtx <= 1) {
+                double multiplier = learningRate * labelValue;
+                weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+                bias += multiplier;
+            }
+        }
     }
 
 
@@ -326,12 +356,17 @@ public static class Parameters extends Learner.Parameters {
          * The rate at which weights are updated; default {@link #defaultLearningRate}.
          **/
         public double learningRate;
+        /**
+         * This name of the loss function
+         */
+        public String lossFunction;
 
 
         /** Sets all the default values. */
         public Parameters() {
             weightVector = (SparseWeightVector) defaultWeightVector.clone();
             learningRate = defaultLearningRate;
+            lossFunction = defaultLossFunction;
         }
 
 
@@ -343,6 +378,7 @@ public Parameters(Learner.Parameters p) {
             super(p);
             weightVector = (SparseWeightVector) defaultWeightVector.clone();
             learningRate = defaultLearningRate;
+            lossFunction = defaultLossFunction;
         }
 
 
@@ -351,6 +387,7 @@ public Parameters(Parameters p) {
             super(p);
             weightVector = p.weightVector;
             learningRate = p.learningRate;
+            lossFunction = p.lossFunction;
         }
 
 
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java
new file mode 100644
index 00000000..298103d1
--- /dev/null
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescentCL.java
@@ -0,0 +1,421 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.lbjava.learn;
+
+import edu.illinois.cs.cogcomp.lbjava.classify.Feature;
+import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector;
+import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet;
+import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream;
+import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream;
+import java.io.PrintStream;
+import java.util.Objects;
+
+/**
+ * Stochastic Gradient Descent learning algorithm for classification
+ *
+ * There are two user-configurable loss functions: hinge, least mean square. Default is least mean
+ * square, "lms".
+ *
+ * @author Yiming Jiang
+ */
+public class StochasticGradientDescentCL extends LinearThresholdUnit {
+    /** Default value for {@link #learningRate}. */
+    public static final double defaultLearningRate = 0.1;
+    /** Default for {@link #weightVector}. */
+    public static final SparseWeightVector defaultWeightVector = new SparseWeightVector();
+    /** Default loss function */
+    public static final String defaultLossFunction = "lms";
+
+
+    /** The hypothesis vector; default {@link #defaultWeightVector}. */
+    protected SparseWeightVector weightVector;
+    /**
+     * The bias is stored here rather than as an element of the weight vector.
+     **/
+    protected double bias;
+    /**
+     * The rate at which weights are updated; default {@link #defaultLearningRate}.
+     **/
+    protected double learningRate;
+    /**
+     * The name of the loss function
+     */
+    protected String lossFunction;
+    /**
+     * Boolean flag for loss function
+     */
+    protected boolean isLMS;
+
+    /**
+     * The learning rate takes the default value, while the name of the classifier gets the empty
+     * string.
+     **/
+    public StochasticGradientDescentCL() {
+        this("");
+    }
+
+    /**
+     * Sets the learning rate to the specified value, while the name of the classifier gets the
+     * empty string.
+     *
+     * @param r The desired learning rate value.
+     **/
+    public StochasticGradientDescentCL(double r) {
+        this("", r);
+    }
+
+    /**
+     * Initializing constructor. Sets all member variables to their associated settings in the
+     * {@link StochasticGradientDescent.Parameters} object.
+     *
+     * @param p The settings of all parameters.
+     **/
+    public StochasticGradientDescentCL(Parameters p) {
+        this("", p);
+    }
+
+    /**
+     * The learning rate takes the default value.
+     *
+     * @param n The name of the classifier.
+     **/
+    public StochasticGradientDescentCL(String n) {
+        this(n, defaultLearningRate);
+    }
+
+    /**
+     * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}.
+     *
+     * @param n The name of the classifier.
+     * @param r The desired learning rate value.
+     **/
+    public StochasticGradientDescentCL(String n, double r) {
+        super(n);
+        Parameters p = new Parameters();
+        p.learningRate = r;
+        setParameters(p);
+    }
+
+    /**
+     * Initializing constructor. Sets all member variables to their associated settings in the
+     * {@link StochasticGradientDescent.Parameters} object.
+     *
+     * @param n The name of the classifier.
+     * @param p The settings of all parameters.
+     **/
+    public StochasticGradientDescentCL(String n, Parameters p) {
+        super(n);
+        setParameters(p);
+    }
+
+
+    /**
+     * Sets the values of parameters that control the behavior of this learning algorithm.
+     *
+     * @param p The parameters.
+     **/
+    public void setParameters(Parameters p) {
+        weightVector = p.weightVector;
+        learningRate = p.learningRate;
+        lossFunction = p.lossFunction;
+        if (Objects.equals(p.lossFunction, "lms")) {
+            isLMS = true;
+        } else if (Objects.equals(p.lossFunction, "hinge")) {
+            isLMS = false;
+        } else {
+            System.out.println("Undefined loss function! lms or hinge");
+            System.exit(-1);
+        }
+    }
+
+
+    /**
+     * Retrieves the parameters that are set in this learner.
+     *
+     * @return An object containing all the values of the parameters that control the behavior of
+     *         this learning algorithm.
+     **/
+    public Learner.Parameters getParameters() {
+        Parameters p = new Parameters(super.getParameters());
+        p.weightVector = weightVector;
+        p.learningRate = learningRate;
+        return p;
+    }
+
+
+    /**
+     * Returns the current value of the {@link #learningRate} variable.
+     *
+     * @return The value of the {@link #learningRate} variable.
+     **/
+    public double getLearningRate() {
+        return learningRate;
+    }
+
+
+    /**
+     * Sets the {@link #learningRate} member variable to the specified value.
+     *
+     * @param t The new value for {@link #learningRate}.
+     **/
+    public void setLearningRate(double t) {
+        learningRate = t;
+    }
+
+    public String getLossFunction() {
+        return lossFunction;
+    }
+
+
+    /** Resets the weight vector to all zeros. */
+    public void forget() {
+        super.forget();
+        weightVector = weightVector.emptyClone();
+        bias = 0;
+    }
+
+    /** Inherited unused method from LTU class */
+    @Override
+    public void promote(int[] exampleFeatures, double[] exampleValues, double rate) {
+
+    }
+
+    /** Inherited unused method from LTU class */
+    @Override
+    public void demote(int[] exampleFeatures, double[] exampleValues, double rate) {
+
+    }
+
+
+    /**
+     * Trains the learning algorithm given an object as an example.
+     *
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @param exampleLabels The example's label(s).
+     * @param labelValues The labels' values.
+     **/
+    public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels,
+            double[] labelValues) {
+        assert exampleLabels.length == 1 : "Example must have a single label.";
+
+        double labelValue = 1;
+        if (exampleLabels[0] == 1) {
+            labelValue = 1;
+        } else if (exampleLabels[0] == 0) {
+            labelValue = -1;
+        }
+
+        double wtx = weightVector.dot(exampleFeatures, exampleValues) + bias;
+
+        learnUpdate(exampleFeatures, exampleValues, labelValue, wtx);
+    }
+
+    void learnUpdate(int[] exampleFeatures, double[] exampleValues, double labelValue, double wtx) {
+        if (isLMS) {
+            double multiplier = learningRate * (labelValue - wtx);
+            weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+            bias += multiplier;
+        } else {
+            if (labelValue * wtx <= 1) {
+                double multiplier = learningRate * labelValue;
+                weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier);
+                bias += multiplier;
+            }
+        }
+    }
+
+
+    /**
+     * Since this algorithm returns a real feature, it does not return scores.
+     *
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @return <code>null</code>
+     **/
+    public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
+        return null;
+    }
+
+
+    /**
+     * Returns the classification of the given example as a single feature instead of a
+     * {@link FeatureVector}.
+     *
+     * @param f The features array.
+     * @param v The values array.
+     * @return The classification of the example as a feature.
+     **/
+    public Feature featureValue(int[] f, double[] v) {
+        int index = score(f, v) >= 0 ? 1 : 0;
+        return predictions.get(index);
+    }
+
+
+    /**
+     * Simply computes the dot product of the weight vector and the example
+     *
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @return The computed real value.
+     **/
+    public double score(int[] exampleFeatures, double[] exampleValues) {
+        return weightVector.dot(exampleFeatures, exampleValues) + bias;
+    }
+
+
+    /**
+     * Simply computes the dot product of the weight vector and the feature vector extracted from
+     * the example object.
+     *
+     * @param exampleFeatures The example's array of feature indices.
+     * @param exampleValues The example's array of feature values.
+     * @return The computed feature (in a vector).
+     **/
+    public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
+        return new FeatureVector(featureValue(exampleFeatures, exampleValues));
+    }
+
+
+    /**
+     * Writes the algorithm's internal representation as text. In the first line of output, the name
+     * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}.
+     *
+     * @param out The output stream.
+     **/
+    public void write(PrintStream out) {
+        out.println(name + ": " + learningRate + ", " + bias);
+        if (lexicon.size() == 0)
+            weightVector.write(out);
+        else
+            weightVector.write(out, lexicon);
+    }
+
+
+    /**
+     * Writes the learned function's internal representation in binary form.
+     *
+     * @param out The output stream.
+     **/
+    public void write(ExceptionlessOutputStream out) {
+        super.write(out);
+        out.writeDouble(learningRate);
+        out.writeDouble(bias);
+        weightVector.write(out);
+    }
+
+
+    /**
+     * Reads the binary representation of a learner with this object's run-time type, overwriting
+     * any and all learned or manually specified parameters as well as the label lexicon but without
+     * modifying the feature lexicon.
+     *
+     * @param in The input stream.
+     **/
+    public void read(ExceptionlessInputStream in) {
+        super.read(in);
+        learningRate = in.readDouble();
+        bias = in.readDouble();
+        weightVector = SparseWeightVector.readWeightVector(in);
+    }
+
+
+    /** Returns a deep clone of this learning algorithm. */
+    public Object clone() {
+        StochasticGradientDescentCL clone = null;
+
+        try {
+            clone = (StochasticGradientDescentCL) super.clone();
+        } catch (Exception e) {
+            System.err.println("Error cloning StochasticGradientDescentCL: " + e);
+            System.exit(1);
+        }
+
+        clone.weightVector = (SparseWeightVector) weightVector.clone();
+        return clone;
+    }
+
+
+    /**
+     * Simply a container for all of {@link StochasticGradientDescent}'s configurable parameters.
+     * Using instances of this class should make code more readable and constructors less
+     * complicated.
+     *
+     * @author Nick Rizzolo
+     **/
+    public static class Parameters extends Learner.Parameters {
+        /**
+         * The hypothesis vector; default {@link StochasticGradientDescent#defaultWeightVector}.
+         **/
+        public SparseWeightVector weightVector;
+        /**
+         * The rate at which weights are updated; default {@link #defaultLearningRate}.
+         **/
+        public double learningRate;
+        /**
+         * This name of the loss function
+         */
+        public String lossFunction;
+
+
+        /** Sets all the default values. */
+        public Parameters() {
+            weightVector = (SparseWeightVector) defaultWeightVector.clone();
+            learningRate = defaultLearningRate;
+            lossFunction = defaultLossFunction;
+        }
+
+
+        /**
+         * Sets the parameters from the parent's parameters object, giving defaults to all
+         * parameters declared in this object.
+         **/
+        public Parameters(Learner.Parameters p) {
+            super(p);
+            weightVector = (SparseWeightVector) defaultWeightVector.clone();
+            learningRate = defaultLearningRate;
+            lossFunction = defaultLossFunction;
+        }
+
+
+        /** Copy constructor. */
+        public Parameters(Parameters p) {
+            super(p);
+            weightVector = p.weightVector;
+            learningRate = p.learningRate;
+            lossFunction = p.lossFunction;
+        }
+
+
+        /**
+         * Calls the appropriate <code>Learner.setParameters(Parameters)</code> method for this
+         * <code>Parameters</code> object.
+         *
+         * @param l The learner whose parameters will be set.
+         **/
+        public void setParameters(Learner l) {
+            ((StochasticGradientDescentCL) l).setParameters(this);
+        }
+
+
+        /**
+         * Creates a string representation of these parameters in which only those parameters that
+         * differ from their default values are mentioned.
+         **/
+        public String nonDefaultString() {
+            String result = super.nonDefaultString();
+
+            if (learningRate != StochasticGradientDescentCL.defaultLearningRate)
+                result += ", learningRate = " + learningRate;
+
+            if (result.startsWith(", "))
+                result = result.substring(2);
+            return result;
+        }
+    }
+}