Skip to content

Commit

Permalink
Scan new versions for existing libraries (#552)
Browse files Browse the repository at this point in the history
* Scan new versions for existing libraries

* Remove redudant changes left from testing

* Fix function call after refactoring

* Fix task registration after refactoring

* Reduce number of jobs to pass github limitations

* Use simple matrix

* Use env variable to specify library version

* Find update entry based on the latest supported version

* Use jackson to parse index file when fetching latest version

* Remove unused import

* Improve workflow steps titles

* Remove suppress warnings after refactoring

* Extract github limitations as parameter of the gradle task

* Remove unused suppress

* Use different PR branch name

* Use bash instead of sh to invoke push script

* Add comments into the tryPush script

* Always extract coordinates part in the same way

* Extract gradle task for fetching newer versions

* Properly add Input and Output anotations to the updater task

* Rename function that extracts information from provided coordinates

* Use abstract getters for properties in the fetching task

* Add a doc file that explains how the scan works
  • Loading branch information
dnestoro authored Nov 12, 2024
1 parent 8a3a3d2 commit 6d740fe
Show file tree
Hide file tree
Showing 9 changed files with 492 additions and 12 deletions.
144 changes: 144 additions & 0 deletions .github/workflows/check-new-library-versions.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
name: "Check new library versions"

# The workflow runs bi-weekly alternating with the scheduled release workflow. This way we have enough time to provide metadata for failing tests.
# In case we need more scans, there is a possibility to trigger the workflow manually.
on:
schedule:
- cron: "0 0 8 * *"
- cron: "0 0 22 * *"
workflow_dispatch:

permissions:
contents: write
actions: write

concurrency:
group: "workflow = ${{ github.workflow }}, ref = ${{ github.event.ref }}, pr = ${{ github.event.pull_request.id }}"
cancel-in-progress: true

jobs:
get-all-libraries:
if: github.repository == 'oracle/graalvm-reachability-metadata'
name: "📋 Get list of all supported libraries with newer versions"
permissions: write-all
runs-on: "ubuntu-20.04"
timeout-minutes: 5
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
issue: ${{ steps.set-issue.outputs.issue }}
steps:
- name: "☁️ Checkout repository"
uses: actions/checkout@v4
- name: "🔧 Prepare environment"
uses: graalvm/setup-graalvm@v1
with:
java-version: '21'
distribution: 'graalvm'
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: "🕸️ Populate matrix"
id: set-matrix
run: |
./gradlew fetchExistingLibrariesWithNewerVersions --matrixLimit=200
- name: "🔨 Create branch"
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"
git switch -C check-new-library-versions/$(date '+%Y-%m-%d')
git push origin check-new-library-versions/$(date '+%Y-%m-%d')
- name: "🔨 Create issue"
id: set-issue
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"
issue_url=$(gh issue create --title "List unsupported library versions" --body "This issue lists unsupported versions of the existing libraries in the repo")
echo "::set-output name=issue::$issue_url"
test-all-metadata:
name: "🧪 ${{ matrix.coordinates }} (GraalVM for JDK ${{ matrix.version }} @ ${{ matrix.os }})"
permissions: write-all
runs-on: ${{ matrix.os }}
timeout-minutes: 20
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
needs: get-all-libraries
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.get-all-libraries.outputs.matrix) }}
steps:
- name: "☁️ Checkout repository"
uses: actions/checkout@v4
- name: "🔧 Setup java"
uses: actions/setup-java@v4
with:
distribution: 'oracle'
java-version: '21'
- name: "🔧 Prepare environment"
uses: graalvm/setup-graalvm@v1
with:
set-java-home: 'false'
java-version: ${{ matrix.version }}
distribution: 'graalvm'
github-token: ${{ secrets.GITHUB_TOKEN }}
native-image-job-reports: 'true'
- name: "Extract test path and library version"
run: |
LIBRARY_PATH=$(echo ${{ matrix.coordinates }} | cut -d ':' -f1-2 | sed 's/:/\//g')
LATEST_VERSION=$(find tests/src/$LIBRARY_PATH/* -maxdepth 1 -type d | sort -V | tail -1 | cut -d '/' -f5)
TEST_PATH="$LIBRARY_PATH/$LATEST_VERSION"
TEST_COORDINATES=$(echo "$TEST_PATH" | tr / :)
echo "LATEST_VERSION=$LATEST_VERSION" >> ${GITHUB_ENV}
echo "TEST_PATH=$TEST_PATH" >> ${GITHUB_ENV}
echo "TEST_COORDINATES=$TEST_COORDINATES" >> ${GITHUB_ENV}
- name: "Pull allowed docker images"
run: |
./gradlew pullAllowedDockerImages --coordinates=${{ env.TEST_COORDINATES }}
- name: "Disable docker"
run: |
sudo apt-get install openbsd-inetd
sudo bash -c "cat ./.github/workflows/discard-port.conf >> /etc/inetd.conf"
sudo systemctl start inetd
sudo mkdir /etc/systemd/system/docker.service.d
sudo bash -c "cat ./.github/workflows/dockerd.service > /etc/systemd/system/docker.service.d/http-proxy.conf"
sudo systemctl daemon-reload
sudo systemctl restart docker
- name: "🧪 Run '${{ env.TEST_COORDINATES }}' tests"
run: |
TESTING_VERSION=$(echo ${{ matrix.coordinates }} | cut -d ":" -f3)
export GVM_TCK_LV=$TESTING_VERSION
./gradlew test -Pcoordinates=${{ env.TEST_COORDINATES }}
- name: "✔️ New library is supported"
if: success()
run: |
bash ./.github/workflows/tryPushVersionsUpdate.sh ${{ matrix.coordinates }} ${{ env.LATEST_VERSION }}
- name: "❗ New library is not supported"
if: failure()
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"
gh issue comment "${{ needs.get-all-libraries.outputs.issue }}" --body "${{ matrix.coordinates }}"
process-results:
name: "🧪 Process results"
runs-on: "ubuntu-20.04"
if: ${{ always() }}
needs:
- get-all-libraries
- test-all-metadata
permissions: write-all
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: "☁️ Checkout repository"
uses: actions/checkout@v4
- name: "✏️ PR for supported versions"
run: |
git config --local user.email "[email protected]"
git config --local user.name "Github Actions"
git fetch origin check-new-library-versions/$(date '+%Y-%m-%d')
git checkout check-new-library-versions/$(date '+%Y-%m-%d')
gh pr create --title "Update supported library versions" --body "This pull request updates supported versions of the existing libraries in the repo"
46 changes: 46 additions & 0 deletions .github/workflows/tryPushVersionsUpdate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
:' This script tries to run addTestedVersion gradle task which adds new version in the tested-versions list of the proper index.json file.
Since the script could be executed from multiple parallel jobs, we want to avoid two things here: overwriting of previous changes and merge conflicts.
To prevent overwriting of changes that some job already created, we only push changes from the current job if we are 0 commits behind the origin branch.
Once that is achieved, we can try to push changes.
If the push was rejected because of a merge conflict, we are: removing changes of the current job, rebasing, and doing the process again until it succeeds.
'

set -x

git config --local user.email "[email protected]"
git config --local user.name "Github Actions"

BRANCH="check-new-library-versions/$(date '+%Y-%m-%d')"
git fetch origin "$BRANCH"
git checkout "$BRANCH"

while [ true ]
do
# update the list of tested versions
./gradlew addTestedVersion --coordinates="$1" --lastSupportedVersion="$2"

# commit changes
git add -u
git commit -m "$1"

# only push changes if we are not behind the remote branch
if [ "$(git rev-list --count origin/$BRANCH --not $BRANCH)" -eq 0 ]
then
# try to push changes
git push origin "$BRANCH"
PUSH_RETVAL=$?
if [ "$PUSH_RETVAL" -eq 0 ]
then
# if the push was successful, we can exit the loop
break
fi
fi

# we are either behind the remote branch or we have a merge conflict => remove changes and rebase accepting incoming changes
git reset --hard HEAD~1
git fetch origin "$BRANCH"
git rebase -X theirs "origin/$BRANCH"
done


51 changes: 51 additions & 0 deletions docs/Infrastructure/check-new-versions-of-libraries.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Check new versions of existing libraries in the repository

As the number of libraries in the repository grow fast, it is hard to track new library versions for every library manually.
Instead of doing this process manually, we provided a mechanism (through [a GitHub workflow](https://github.com/oracle/graalvm-reachability-metadata/blob/master/.github/workflows/check-new-library-versions.yml))
that automatically scans MavenCentral repository for new versions of the libraries that we currently have.

## How it works

The workflow gets triggered every two weeks automatically (alternating to the automatic release weeks). Besides that, the job can be triggered manually from the GitHub actions.
The whole process consists of the following parts:
* Scanning of the MavenCentral
* Running existing tests with newer versions of the library
* Creating a pull-request that updates `tested-versions` field of the `index.json` file for libraries that passed tests with a new version
* Creating an issue that lists all versions of libraries that failed their existing tests.

As a preparation for the whole process, we are creating a branch for all successful tests, and a single issue for all failed tests.

### Scanning the MavenCentral

At first, the workflow runs gradle task called `fetchExistingLibrariesWithNewerVersions`.
The task itself does the following:
1. Gets the list of all existing libraries in the repository
2. For each library, it searches for the latest tested version in the corresponding library `index.json` file
3. For the given library name, it fetches `maven-metadata.xml` file from the MavenCentral repository
4. In the fetched `maven-metadata.xml` file, it finds the position of the latest tested version (gathered in the step 3) and returns all the versions after it
5. As a last step, the task returns list of maven coordinates of libraries with newer versions (alongside java version and os version required for testing)

### Running existing tests with newer versions

Now that we have coordinates list, we are spawning a new job in GitHub workflow for each coordinate in the list.
Each of the spawned jobs:
1. Extracts the following parts from the given maven coordinates:
1. Latest version that we have tests written for
2. Path to the latest tests we have
3. Maven coordinates of the latest tests
2. Sets `GVM_TCK_LV` env variable to the version we want to test. This way the executed tests will use library version specified in the env variable.
3. Run the latest test with `./gradlew test -Pcoordinates=<testCoordinates>` (with `testCoordinates` calculated in the step 1)

### Aggregating results of the tests

Based on the outcome of the test we:
* Update the list of `tested-versions` in the proper library `index.json` file and commit changes to the previously created branch, if the test passed
* Add a comment that explains which library version cannot pass the tests, in the issue we previously created

Note: since the spawned jobs run tests in parallel, we have to make some kind of synchronization to avoid merge conflicts if two tests are populating the same `index.json` file.
The whole process of synchronization is driven by the [tryPushVersionsUpdate](https://github.com/oracle/graalvm-reachability-metadata/blob/master/.github/workflows/tryPushVersionsUpdate.sh) script.

At the end, when all jobs have finished their executions, the workflow just creates a pull-request based on a branch the jobs committed to.
As a final result, we have:
* a pull-request with updates of all new tested versions
* an issue with list of all versions that doesn't work with existing metadata
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import org.graalvm.internal.tck.DockerTask
import org.graalvm.internal.tck.ConfigFilesChecker
import org.graalvm.internal.tck.ScaffoldTask
import org.graalvm.internal.tck.GrypeTask
import org.graalvm.internal.tck.TestedVersionUpdaterTask
import org.graalvm.internal.tck.harness.tasks.CheckstyleInvocationTask
import org.graalvm.internal.tck.harness.tasks.FetchExistingLibrariesWithNewerVersionsTask
import org.graalvm.internal.tck.harness.tasks.TestInvocationTask


Expand Down Expand Up @@ -161,6 +163,14 @@ Provider<Task> generateMatrixDiffCoordinates = tasks.register("generateMatrixDif
}
}

// groovy tasks
tasks.register("fetchExistingLibrariesWithNewerVersions", FetchExistingLibrariesWithNewerVersionsTask.class) { task ->
task.setGroup(METADATA_GROUP)
task.setDescription("Returns list of all libraries coordinates")
task.setAllLibraryCoordinates(matchingCoordinates)
}

// java tasks
tasks.register("checkAllowedDockerImages", GrypeTask.class) { task ->
task.setDescription("Returns list of allowed docker images")
task.setGroup(METADATA_GROUP)
Expand All @@ -182,3 +192,8 @@ tasks.register("checkConfigFiles", ConfigFilesChecker.class) { task ->
task.setDescription("Checks content of config files for a new library.")
task.setGroup(METADATA_GROUP)
}

tasks.register("addTestedVersion", TestedVersionUpdaterTask.class) { task ->
task.setDescription("Updates list of tested versions.")
task.setGroup(METADATA_GROUP)
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,33 @@
*/
package org.graalvm.internal.tck.harness;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import org.graalvm.internal.tck.model.MetadataVersionsIndexEntry;
import org.gradle.api.Project;
import org.gradle.api.file.Directory;
import org.gradle.api.file.DirectoryProperty;
import org.gradle.api.file.FileSystemLocation;
import org.gradle.api.provider.Property;
import org.gradle.api.provider.Provider;
import org.gradle.process.ExecOperations;
import org.gradle.util.internal.VersionNumber;

import javax.inject.Inject;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -299,9 +301,7 @@ List<String> getMatchingCoordinates(String coordinateFilter) {
String artifactId = strings.get(1);
String version = strings.get(2);


Set<String> matchingCoordinates = new HashSet<>();

for (String directory : getMatchingMetadataDirs(groupId, artifactId)) {
Path index = metadataRoot().resolve(directory).resolve("index.json");
List<Map<String, ?>> metadataIndex = (List<Map<String, ?>>) extractJsonFile(index);
Expand All @@ -322,7 +322,8 @@ List<String> getMatchingCoordinates(String coordinateFilter) {
}
}
}
return matchingCoordinates.stream().collect(Collectors.toList());

return new ArrayList<>(matchingCoordinates);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ abstract class AbstractSubprojectTask extends DefaultTask {
// Environment variables for setting up TCK
env.put("GVM_TCK_LC", coordinates)
env.put("GVM_TCK_EXCLUDE", override.toString())
env.put("GVM_TCK_LV", version)
if (System.getenv("GVM_TCK_LV") == null) {
// we only set this env variable if user didn't specify it manually
env.put("GVM_TCK_LV", version)
}
env.put("GVM_TCK_MD", metadataDir.toAbsolutePath().toString())
env.put("GVM_TCK_TCKDIR", tckExtension.getTckRoot().get().getAsFile().toPath().toAbsolutePath().toString())
spec.environment(env)
Expand Down
Loading

0 comments on commit 6d740fe

Please sign in to comment.