diff --git a/README.md b/README.md index e458b75d..56f9de3e 100644 --- a/README.md +++ b/README.md @@ -21,16 +21,19 @@ Please cite the following papers, if you use Gin for academic purposes:
- release 1: ["GI in No Time"](https://github.com/gintool/gin/blob/master/doc/gin.pdf), David R. White, 3rd International GI Workshop, GECCO Companion Material Proceedings, 2017. -Extensions: +Extensions and specific parts of Gin: -Please cite the following paper, if using the edits from -the [insert](https://github.com/gintool/gin/tree/master/src/main/java/gin/edit/insert) folder: + - "Standard" Statement/Line Edits from the [statement](https://github.com/gintool/gin/tree/master/src/main/java/gin/edit/statement) and [line](https://github.com/gintool/gin/tree/master/src/main/java/gin/edit/line) folders: +["Program Transformation Landscapes for Automated Program Modification Using Gin"](https://link.springer.com/article/10.1007/s10664-023-10344-5), Petke, J., Alexander, B., Barr, E.T., Brownlee, A. E. I., Wagner, M. & White, D.R., vol 28. 2023. + - Edits from the [insert](https://github.com/gintool/gin/tree/master/src/main/java/gin/edit/insert) folder: ["Injecting Shortcuts for Faster Running Java Code"](https://ieeexplore.ieee.org/document/9185708), Alexander E. I. Brownlee, Justyna Petke, Anna F. Rasburn, CEC 2020. - -Please cite the following paper, if using Regression Test Selection (RTS) strategies: + - Edits from the [llm](https://github.com/gintool/gin/tree/llm/src/main/java/gin/edit/llm) branch: + ["Enhancing Genetic Improvement Mutations Using Large Language Models"](https://link.springer.com/chapter/10.1007/978-3-031-48796-5_13), Alexander E. I. Brownlee, James Callan, Karine Even-Mendoza, Alina Geiger, Carol Hanna, Justyna Petke, Federica Sarro and Dominik Sobania. International Symposium on Search Based Software Engineering 2023. LNCS 14415. + - Regression Test Selection (RTS) strategies: ["Enhancing Genetic Improvement of Software with Regression Test Selection"](https://doi.org/10.1109/ICSE43902.2021.00120), Giovani Guizzo, Justyna Petke, Federica Sarro, Mark Harman, ICSE 2021. + - Profiler and associated tools: ["Comparing Apples and Oranges? Investigating the Consistency of CPU and Memory Profiler Results Across Multiple Java Versions"](doi.org/10.1007/s10515-024-00423-2), Watkinson, M., Brownlee, A.E.I. Automated Software Engineering vol 31. 2024. ## The Gin Design Philosophy @@ -205,6 +208,23 @@ unexpected hard-coded dependencies. A full example with an existing Maven project is given further below. +### Multiple profiling runs + +As sampler profiling is a stochastic process, it is also worth performing repeat runs, ideally with a reboot between runs. We have provided a tool to merge multiple profiler CSVs into a single file: gin.util.analysis.MergeProfilerFiles. This will retain only hot methods appearing in more than a specific fraction of the repeats, and takes the union of all unit tests observed as calling a given hot method. If, for example, you were to run the following bash script: +``` +#!/bin/bash +projectnameforgin='spatial4j' + +for i in {1..20}; do + java -cp ../gin/build/gin.jar gin.util.Profiler -r 1 -p $projectnamforgin -d ./ -h ~/.sdkman/candidates/maven/current/ -o $projectnameforgin.Profiler_output_$i.csv &> $projectnameforgin.Profiler_stdoutstderr_$i.txt +done +``` +you will get a series of profiler files like spatial4j.Profiler_output_1.csv, spatial4j.Profiler_output_2.csv, etc. To aggregate them, do this: +``` +java -cp ../gin/build/gin.jar gin.util.analysis.MergeProfilerFiles -if "spatial4j.Profiler_output_*.csv" -of spatial4j.Profiler_aggregated_output.csv +``` +and you'll get a single file spatial4j.Profiler_aggregated_output.csv. By default this will contain the intersection of hot methods found across all profile runs, with each method having the union of calling unit tests discovered for it. + ## Automated test case generation for Maven and Gradle projects Gin uses [EvoSuite](http://www.evosuite.org/) to generate test cases automatically. Make sure test class file are @@ -270,6 +290,8 @@ Assuming EvoSuite tests were generated and original tests not removed: ``` java -cp build/gin.jar:testgeneration/evosuite-1.0.6.jar gin.util.RandomSampler -d examples/maven-simple -p my-app -m examples/maven-simple/example_profiler_results.csv -h ``` +The CSV written out by RandomSampler contains one line per unit test per patch. The utility gin.util.analysis.AggregateRandomSamplerOutput can be used to aggregate these results to one line per patch, with summary statistics indicating test pass rates and run times. + Gin also offers an implementation of the multi-objective algorithm NSGA-II for improving the execution time and memory consumption of software. diff --git a/src/main/java/gin/util/GPFix.java b/src/main/java/gin/util/GPFix.java index 3152331c..16c9c17b 100644 --- a/src/main/java/gin/util/GPFix.java +++ b/src/main/java/gin/util/GPFix.java @@ -135,7 +135,10 @@ private void setup(UnitTestResultSet results) { failing += 1; } } + if (passing == 0 && failing>0){this.multiplier = 1;} + else{ this.multiplier = (failing > 0) ? passing * WEIGHT / failing : 0; + } Logger.info("Currently failing tests: " + failing); Logger.info("Currently passing tests (i.e., current fitness): " + passing); this.targetFitness = passing + (this.multiplier * failing); diff --git a/src/main/java/gin/util/analysis/AggregateRandomSamplerOutput.java b/src/main/java/gin/util/analysis/AggregateRandomSamplerOutput.java index 4e6468ad..260e9fd0 100644 --- a/src/main/java/gin/util/analysis/AggregateRandomSamplerOutput.java +++ b/src/main/java/gin/util/analysis/AggregateRandomSamplerOutput.java @@ -2,13 +2,10 @@ import java.io.File; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Map; import org.pmw.tinylog.Logger; @@ -23,17 +20,18 @@ import com.sampullara.cli.Args; import com.sampullara.cli.Argument; - /** * RS output is one line per test - * This aggregates to one line per edit; with a count of test passes, total tests run, and total run time + * This tool aggregates to one line per edit; with a count of test passes, total tests run, and total run time */ public class AggregateRandomSamplerOutput { - @Argument(alias = "f", description = "Input: a file output by RandomSampler", required = true) + @Argument(alias = "if", description = "Input file: a CSV file output by RandomSampler", required = true) protected File inputFile; public static void main(String[] args) { + Logger.info("Random Sampler output is one line per test."); + Logger.info("This tool aggregates to one line per edit; with a count of test passes, total tests run, and total run time."); AggregateRandomSamplerOutput a = new AggregateRandomSamplerOutput(args); a.process(); } diff --git a/src/main/java/gin/util/analysis/MergeProfilerFiles.java b/src/main/java/gin/util/analysis/MergeProfilerFiles.java new file mode 100644 index 00000000..57a3ac09 --- /dev/null +++ b/src/main/java/gin/util/analysis/MergeProfilerFiles.java @@ -0,0 +1,172 @@ +package gin.util.analysis; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.FileFilter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.WildcardFileFilter; +import org.pmw.tinylog.Logger; + +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; +import com.opencsv.CSVWriterBuilder; +import com.opencsv.ICSVWriter; +import com.sampullara.cli.Args; +import com.sampullara.cli.Argument; + + +public class MergeProfilerFiles { + + @Argument(alias = "d", description = "Root directory for filename patterns; defaults to current dir", required = false) + protected String dir = "."; + + @Argument(alias = "if", description = "Comma separated list of filename patterns, e.g. spark.Profiler_output_*.csv or spark.Profiler_output_1.csv,spark.Profiler_output_2.csv,spark.Profiler_output_3.csv", required = true) + protected String inputFiles; + + @Argument(alias = "of", description = "Output: e.g. spark.Profiler_output.csv", required = true) + protected File outputFile; + + @Argument(alias = "n", description = "Only methods in at least n repeats will be included; defaults to the number of files (so keep only the intersection of hot methods)", required = false) + protected int n = -1; + + + // this will + // only methods in at least n repeats will be included + // count is total + // rank is based on total of counts + // tests include all seen over the files (union) + + public static void main(String[] args) { + Logger.info("A tool to merge Gin profiler files."); + Logger.info("Only methods in at least n repeats will be included"); + Logger.info("n defaults to the number of files (so keep only the intersection of hot methods)"); + Logger.info("In the output, 'count' is sum of counts across all repeat runs; 'rank' is based on the aggregated count"); + Logger.info("Tests for each method are all those seen over the input files (union)"); + + MergeProfilerFiles m = new MergeProfilerFiles(args); + try { + m.mergeProfiles(); + } catch (IOException e) { + Logger.error("IOException processing files."); + Logger.error(e); + } + } + + public MergeProfilerFiles(String[] args) { + Args.parseOrExit(this, args); + } + + + public void mergeProfiles() throws IOException { + + // figure out the filenames + List inputs = new ArrayList<>(); + + for (String pattern : inputFiles.split(",")) { + File[] files = new File(dir).listFiles((FileFilter)new WildcardFileFilter(pattern)); + inputs.addAll(Arrays.asList(files)); + } + Collections.sort(inputs); + + if (n < 0) { + n = inputs.size(); + } else { + n = Math.min(n, inputs.size()); + } + + Logger.info("Found " + inputs.size() + " profiler files. Keeping hot methods appearing in at least " + n + " files."); + + Map methods = new HashMap(); + String project = ""; + + for (File input : inputs) { + CSVReader inCSV = new CSVReader(new FileReader(input)); + + inCSV.skip(1); + for (String[] s : inCSV) { + // cols are: Project,Rank,Method,Count,Tests + project = s[0]; + ProfiledMethod pm = methods.get(s[2]); + if (pm == null) { + pm = new ProfiledMethod(s[2]); + methods.put(s[2], pm); + } + pm.counts.add(Integer.parseInt(s[3])); + String[] tests = s[4].split(","); + pm.tests.addAll(Arrays.asList(tests)); + } + } + + List sortedMethods = new ArrayList<>(); + for (ProfiledMethod pm : methods.values()) { + if (pm.counts.size() == n) { + sortedMethods.add(pm); + } + } + + Collections.sort(sortedMethods, new Comparator() { + @Override + public int compare(ProfiledMethod arg0, ProfiledMethod arg1) { + return Integer.compare(arg1.getTotalCount(), arg0.getTotalCount()); + } + }); + + ICSVWriter writer = new CSVWriterBuilder(new FileWriter(outputFile)).build(); + writer.writeNext(new String[] {"Project","Rank","Method","Count","Tests"}); + + int i = 1; + for (ProfiledMethod pm : sortedMethods) { + writer.writeNext(new String[] { + project, + Integer.toString(i++), + pm.methodSignature, + Integer.toString(pm.getTotalCount()), + pm.getCSVTests()} + ); + } + writer.close(); + + + Logger.info("All done."); + } + + + private static class ProfiledMethod { + List counts = new ArrayList<>(); + Set tests = new TreeSet<>(); + String methodSignature; + public ProfiledMethod(String methodSignature) { + this.methodSignature = methodSignature; + } + + public int getTotalCount() { + int sum = 0; + for (Integer i : counts) { + sum += i; + } + return sum; + } + + public String getCSVTests() { + String rval = ""; + for (String s : tests) { + if (!rval.isEmpty()) { + rval += ","; + } + rval += s; + } + return rval; + } + } +}