diff --git a/mustard-experiments/src/main/java/org/data2semantics/mustard/experiments/cluster/LocalExecutor.java b/mustard-experiments/src/main/java/org/data2semantics/mustard/experiments/cluster/LocalExecutor.java index e055c33..3c95434 100644 --- a/mustard-experiments/src/main/java/org/data2semantics/mustard/experiments/cluster/LocalExecutor.java +++ b/mustard-experiments/src/main/java/org/data2semantics/mustard/experiments/cluster/LocalExecutor.java @@ -7,11 +7,11 @@ public class LocalExecutor { // parameter settings file - private static final String parmsFile = "../src/main/java/org/data2semantics/mustard/experiments/JWS2015/parms.txt"; + private static final String parmsFile = "../../src/main/java/org/data2semantics/mustard/experiments/JWS2015/parms.txt"; //private static final String parmsFile = "../src/main/java/org/data2semantics/mustard/experiments/JWS2015/parms_minfreq.txt"; // regular affiliation prediction - private static final String prefix = "-dataset AIFB -file ../datasets/aifb-fixed_complete.n3"; + private static final String prefix = "-dataset AIFB -file ../../datasets/aifb-fixed_complete.n3"; // regular affiliation prediction with minfreq //private static final String prefix = "-leaveRootLabel true -dataset AIFB -file ../datasets/aifb-fixed_complete.n3"; diff --git a/mustard-kernels/src/main/java/org/data2semantics/mustard/kernels/KernelUtils.java b/mustard-kernels/src/main/java/org/data2semantics/mustard/kernels/KernelUtils.java index 902f446..16b77b1 100644 --- a/mustard-kernels/src/main/java/org/data2semantics/mustard/kernels/KernelUtils.java +++ b/mustard-kernels/src/main/java/org/data2semantics/mustard/kernels/KernelUtils.java @@ -16,6 +16,48 @@ public class KernelUtils { public static final String ROOTID = "ROOT1337"; // Special root label used in some kernels + /** + * extract a subset train (square) kernel matrix from a larger (square) kernel matrix + * + * @param kernel + * @param startIdx, begin index inclusive + * @param endIdx, end index exclusive + * @return train kernel matrix of size: (endIdx-startIdx) x (endIdx-startIdx) + */ + public static double[][] trainSubset(double[][] kernel, int startIdx, int endIdx) { + double[][] ss = new double[endIdx - startIdx][endIdx - startIdx]; + + for (int i = startIdx; i < endIdx; i++) { + for (int j = startIdx; j < endIdx; j++) { + ss[i][j] = kernel[i][j]; + } + } + return ss; + } + + /** + * extract a subset test (rectangle) kernel matrix from a larger (square) kernel matrix + * Each row of this matrix represents of test instance, each column a train instance. + * + * @param kernel + * @param startIdx, begin index of the test instances, inclusive + * @param endIdx, end index of the test instances, exclusive + * @return, test kernel matrix of size: (endIdx - startIdx) x (kernel.length - (endIdx-startIdx)) + */ + public static double[][] testSubset(double[][] kernel, int startIdx, int endIdx) { + double[][] ss = new double[endIdx - startIdx][kernel.length - (endIdx-startIdx)]; + + for (int i = startIdx; i < endIdx; i++) { + for (int j = 0; j < startIdx; j++) { + ss[i][j] = kernel[i][j]; + } + for (int j = endIdx; j < kernel.length; j++) { + ss[i][j] = kernel[i][j]; + } + } + return ss; + } + /** * returns a copy of the kernel matrix. * @@ -52,24 +94,6 @@ public static double[][] shuffle(double[][] kernel, long seed) { return convert2DoublePrimitives(kernelDouble); } - /** - * Convert an array of SparseVectors to a kernel matrix, by computing the dot product of all the SparseVectors - * - * @param featureVectors, an array of SparseVectors - * @return a 2d array of doubles, the kernel matrix - */ - public static double[][] featureVectors2Kernel(SparseVector[] featureVectors) { - double[][] kernel = initMatrix(featureVectors.length, featureVectors.length); - - for (int i = 0; i < featureVectors.length; i++) { - for (int j = i; j < featureVectors.length; j++) { - kernel[i][j] = featureVectors[i].dot(featureVectors[j]); - kernel[j][i] = kernel[i][j]; - } - } - return kernel; - } - /** * Convert an array of SparseVectors to binary SparseVectors, i.e. only 0 or 1 values. * diff --git a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEAR.java b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEAR.java index 771ca74..fcd4eca 100644 --- a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEAR.java +++ b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEAR.java @@ -58,7 +58,7 @@ private static LibLINEARModel trainLinearModel(Problem prob, LibLINEARParameters } private static LibLINEARModel trainLinearModel(Map probs, LibLINEARParameters params) { - if (!params.isVerbose()) { + if (params.getVerbosity() != LibLINEARParameters.VERBOSITY_FULL) { Linear.disableDebugOutput(); } @@ -133,7 +133,9 @@ public int compare(Kernel o1, Kernel o2) { label = bestSetting.getLabel(); } - System.out.println("Trained Linear SVM for " + label + ", with C: " + bestC + " and P: " + bestP); + if (params.getVerbosity() == LibLINEARParameters.VERBOSITY_FULL || params.getVerbosity() == LibLINEARParameters.VERBOSITY_DEFAULT) { + System.out.println("Trained Linear SVM for " + label + ", with C: " + bestC + " and P: " + bestP); + } double avg = 0; for (Feature[] v : probs.get(bestSetting).x) { @@ -141,7 +143,9 @@ public int compare(Kernel o1, Kernel o2) { } avg /= probs.get(bestSetting).x.length; - System.out.println("#instances:" + probs.get(bestSetting).l + ", #features: " + probs.get(bestSetting).n + ", #avg-non-zero: " + avg); + if (params.getVerbosity() == LibLINEARParameters.VERBOSITY_FULL || params.getVerbosity() == LibLINEARParameters.VERBOSITY_DEFAULT) { + System.out.println("#instances:" + probs.get(bestSetting).l + ", #features: " + probs.get(bestSetting).n + ", #avg-non-zero: " + avg); + } return model; } diff --git a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEARParameters.java b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEARParameters.java index 7f97da0..14da09e 100644 --- a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEARParameters.java +++ b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/liblinear/LibLINEARParameters.java @@ -14,6 +14,10 @@ public class LibLINEARParameters { public static final int SVR_PRIMAL = 4; public static final int LR_DUAL = 5; public static final int LR_PRIMAL = 6; + + public static final int VERBOSITY_NONE = 0; + public static final int VERBOSITY_DEFAULT = 1; + public static final int VERBOSITY_FULL = 2; private int[] weightLabels; private double[] weights; @@ -21,7 +25,7 @@ public class LibLINEARParameters { private Parameter params; private double[] cs; private double[] ps; - private boolean verbose; + private int verbose; private double bias; private boolean doCrossValidation; @@ -67,7 +71,7 @@ public LibLINEARParameters(int algorithm) { break; } - verbose = false; + verbose = VERBOSITY_DEFAULT; bias = -1; doCrossValidation = true; doWeightLabels = false; @@ -94,8 +98,8 @@ public void setCs(double[] itParams) { this.cs = itParams; } - public void setVerbose(boolean verbose) { - this.verbose = verbose; + public void setVerbosity(int verbosity) { + this.verbose = verbosity; } public void setBias(double bias) { @@ -141,7 +145,7 @@ public double getEps() { return eps; } - public boolean isVerbose() { + public int getVerbosity() { return verbose; } diff --git a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVM.java b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVM.java index 6eaf61d..e84c077 100644 --- a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVM.java +++ b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVM.java @@ -111,7 +111,7 @@ public static LibSVMModel trainSVMModel(double[][] kernel, double[] target, LibS } private static LibSVMModel trainSVMModel(Map svmProbs, double[] target, LibSVMParameters params) { - if (!params.isVerbose()) { + if (params.getVerbosity() != LibSVMParameters.VERBOSITY_FULL) { setNoOutput(); } @@ -173,7 +173,9 @@ public int compare(Kernel o1, Kernel o2) { label = bestSetting.getLabel(); } - System.out.println("Trained SVM for " + label + ", with C: " + bestC + " and P: " + bestP); + if (params.getVerbosity() == LibSVMParameters.VERBOSITY_DEFAULT || params.getVerbosity() == LibSVMParameters.VERBOSITY_FULL) { + System.out.println("Trained SVM for " + label + ", with C: " + bestC + " and P: " + bestP); + } return model; } diff --git a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVMParameters.java b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVMParameters.java index 963f49e..14612ec 100644 --- a/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVMParameters.java +++ b/mustard-learners/src/main/java/org/data2semantics/mustard/learners/libsvm/LibSVMParameters.java @@ -22,11 +22,15 @@ public class LibSVMParameters implements Serializable { public static final int EPSILON_SVR = svm_parameter.EPSILON_SVR; public static final int NU_SVR = svm_parameter.NU_SVR; + public static final int VERBOSITY_NONE = 0; + public static final int VERBOSITY_DEFAULT = 1; + public static final int VERBOSITY_FULL = 2; + private svm_parameter params; private double[] itParams; private double[] ps; - private boolean verbose; + private int verbose; private int numFolds; private EvaluationFunction evalFunction; @@ -62,7 +66,7 @@ public LibSVMParameters(int algorithm) { // Weights, SVR epsilon, verbosity, evalFunction can be changed afterwards via setters params.nr_weight = 0; params.p = 0.1; - verbose = false; + verbose = VERBOSITY_DEFAULT; ps = new double[1]; ps[0] = 0.1; @@ -123,11 +127,11 @@ public double[] getItParams() { return itParams; } - public void setVerbose(boolean verbose) { - this.verbose = verbose; + public void setVerbosity(int verbosity) { + this.verbose = verbosity; } - public boolean isVerbose() { + public int getVerbosity() { return verbose; }