-
Notifications
You must be signed in to change notification settings - Fork 19
Fix URL encoding and decoding #160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |||||
| import java.io.Serializable; | ||||||
| import java.net.URI; | ||||||
| import java.net.URISyntaxException; | ||||||
| import java.nio.ByteBuffer; | ||||||
| import java.nio.charset.StandardCharsets; | ||||||
| import java.util.Arrays; | ||||||
| import java.util.Collections; | ||||||
|
|
@@ -34,6 +35,7 @@ | |||||
| import java.util.TreeMap; | ||||||
| import java.util.function.IntPredicate; | ||||||
| import java.util.stream.Collectors; | ||||||
| import java.util.stream.IntStream; | ||||||
| import org.jspecify.annotations.Nullable; | ||||||
|
|
||||||
| /** | ||||||
|
|
@@ -53,9 +55,10 @@ | |||||
| * @since 1.0.0 | ||||||
| */ | ||||||
| public final class PackageURL implements Serializable { | ||||||
|
|
||||||
| private static final long serialVersionUID = 3243226021636427586L; | ||||||
|
|
||||||
| private static final char PERCENT_CHAR = '%'; | ||||||
|
|
||||||
| /** | ||||||
| * Constructs a new PackageURL object by parsing the specified string. | ||||||
| * | ||||||
|
|
@@ -494,35 +497,14 @@ private String canonicalize(boolean coordinatesOnly) { | |||||
| return purl.toString(); | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Encodes the input in conformance with RFC 3986. | ||||||
| * | ||||||
| * @param input the String to encode | ||||||
| * @return an encoded String | ||||||
| */ | ||||||
| private String percentEncode(final String input) { | ||||||
| if (input.isEmpty()) { | ||||||
| return input; | ||||||
| } | ||||||
|
|
||||||
| StringBuilder builder = new StringBuilder(); | ||||||
| for (byte b : input.getBytes(StandardCharsets.UTF_8)) { | ||||||
| if (isUnreserved(b)) { | ||||||
| builder.append((char) b); | ||||||
| } | ||||||
| else { | ||||||
| // Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character | ||||||
| builder.append('%'); | ||||||
| builder.append(Integer.toHexString(b).toUpperCase()); | ||||||
| } | ||||||
| } | ||||||
| return builder.toString(); | ||||||
| } | ||||||
|
|
||||||
| private static boolean isUnreserved(int c) { | ||||||
| return (isValidCharForKey(c) || c == '~'); | ||||||
| } | ||||||
|
|
||||||
| private static boolean shouldEncode(int c) { | ||||||
| return !isUnreserved(c); | ||||||
| } | ||||||
|
|
||||||
| private static boolean isAlpha(int c) { | ||||||
| return (isLowerCase(c) || isUpperCase(c)); | ||||||
| } | ||||||
|
|
@@ -578,43 +560,134 @@ private static String toLowerCase(String s) { | |||||
| return new String(chars); | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Optionally decodes a String, if it's encoded. If String is not encoded, | ||||||
| * method will return the original input value. | ||||||
| * | ||||||
| * @param input the value String to decode | ||||||
| * @return a decoded String | ||||||
| */ | ||||||
| private String percentDecode(final String input) { | ||||||
| final String decoded = uriDecode(input); | ||||||
| if (!decoded.equals(input)) { | ||||||
| return decoded; | ||||||
| private static int indexOfPercentChar(final byte[] bytes, final int start) { | ||||||
| return IntStream.range(start, bytes.length).filter(i -> isPercent(bytes[i])).findFirst().orElse(-1); | ||||||
| } | ||||||
|
|
||||||
| private static int indexOfUnsafeChar(final byte[] bytes, final int start) { | ||||||
| return IntStream.range(start, bytes.length).filter(i -> shouldEncode(bytes[i])).findFirst().orElse(-1); | ||||||
| } | ||||||
|
|
||||||
| private static byte percentDecode(final byte[] bytes, final int start) { | ||||||
| if (start + 2 >= bytes.length) { | ||||||
| throw new ValidationException("Incomplete percent encoding at offset " + start + " with value '" + new String(bytes, start, bytes.length - start, StandardCharsets.UTF_8) + "'"); | ||||||
| } | ||||||
|
|
||||||
| int pos1 = start + 1; | ||||||
| byte b1 = bytes[pos1]; | ||||||
| int c1 = Character.digit(b1, 16); | ||||||
|
|
||||||
| if (c1 == -1) { | ||||||
| throw new ValidationException("Invalid percent encoding char 1 at offset " + pos1 + " with value '" + ((char) b1) + "'"); | ||||||
| } | ||||||
| return input; | ||||||
|
|
||||||
| int pos2 = pos1 + 1; | ||||||
| byte b2 = bytes[pos2]; | ||||||
| int c2 = Character.digit(bytes[pos2], 16); | ||||||
|
|
||||||
| if (c2 == -1) { | ||||||
| throw new ValidationException("Invalid percent encoding char 2 at offset " + pos2 + " with value '" + ((char) b2) + "'"); | ||||||
| } | ||||||
|
|
||||||
| return ((byte) ((c1 << 4) + c2)); | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Decodes a percent-encoded string. | ||||||
| * | ||||||
| * @param source string to decode, not {@code null} | ||||||
| * @return A decoded string | ||||||
| * @throws NullPointerException if {@code source} is {@code null} | ||||||
| */ | ||||||
| public static String uriDecode(String source) { | ||||||
| int length = source.length(); | ||||||
| StringBuilder builder = new StringBuilder(); | ||||||
| for (int i = 0; i < length; i++) { | ||||||
| if (source.charAt(i) == '%') { | ||||||
| String str = source.substring(i + 1, i + 3); | ||||||
| char c = (char) Integer.parseInt(str, 16); | ||||||
| builder.append(c); | ||||||
| i += 2; | ||||||
| public static String percentDecode(final String source) { | ||||||
| if (source.isEmpty()) { | ||||||
| return source; | ||||||
| } | ||||||
|
|
||||||
| byte[] bytes = source.getBytes(StandardCharsets.UTF_8); | ||||||
jeremylong marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
|
|
||||||
| int off = 0; | ||||||
| int idx = indexOfPercentChar(bytes, off); | ||||||
|
|
||||||
| if (idx == -1) { | ||||||
| return source; | ||||||
| } | ||||||
|
|
||||||
| ByteBuffer buffer = ByteBuffer.wrap(bytes); | ||||||
|
|
||||||
| while (true) { | ||||||
| int len = idx - off; | ||||||
|
|
||||||
| if (len > 0) { | ||||||
| buffer.put(bytes, off, len); | ||||||
| off += len; | ||||||
| } | ||||||
| else { | ||||||
| builder.append(source.charAt(i)); | ||||||
|
|
||||||
| buffer.put(percentDecode(bytes, off)); | ||||||
| off += 3; | ||||||
| idx = indexOfPercentChar(bytes, off); | ||||||
|
|
||||||
| if (idx == -1) { | ||||||
| int rem = bytes.length - off; | ||||||
|
|
||||||
| if (rem > 0) { | ||||||
| buffer.put(bytes, off, rem); | ||||||
| } | ||||||
|
|
||||||
| break; | ||||||
| } | ||||||
| } | ||||||
| return builder.toString(); | ||||||
|
|
||||||
| return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8); | ||||||
| } | ||||||
|
|
||||||
| @Deprecated | ||||||
| public String uriDecode(final String source) { | ||||||
| return source != null ? percentDecode(source) : null; | ||||||
| } | ||||||
|
|
||||||
| private static boolean isPercent(int c) { | ||||||
| return (c == PERCENT_CHAR); | ||||||
| } | ||||||
|
|
||||||
| private static byte[] percentEncode(byte b) { | ||||||
| byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); | ||||||
| byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)); | ||||||
| return new byte[] {(byte) PERCENT_CHAR, b1, b2}; | ||||||
| } | ||||||
|
|
||||||
| public static String percentEncode(final String source) { | ||||||
| if (source.isEmpty()) { | ||||||
| return source; | ||||||
| } | ||||||
|
|
||||||
| byte[] bytes = source.getBytes(StandardCharsets.UTF_8); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
The encoded URI should only contain ASCII characters. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This results in incorrect behavior.
Meaning the proposed version would convert non-ASCII characters into The spec says that PURLs are ASCII strings, but it seems like all parsers except maennchen/purl accept Unicode inputs, and it's probably better to continue accepting Unicode inputs:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, the Java
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, use |
||||||
|
|
||||||
| int off = 0; | ||||||
| int idx = indexOfUnsafeChar(bytes, off); | ||||||
|
|
||||||
| if (idx == -1) { | ||||||
| return source; | ||||||
| } | ||||||
|
|
||||||
| ByteBuffer buffer = ByteBuffer.allocate(bytes.length * 3); | ||||||
|
|
||||||
| while (true) { | ||||||
| int len = idx - off; | ||||||
|
|
||||||
| if (len > 0) { | ||||||
| buffer.put(bytes, off, len); | ||||||
| off += len; | ||||||
| } | ||||||
|
|
||||||
| buffer.put(percentEncode(bytes[off++])); | ||||||
| idx = indexOfUnsafeChar(bytes, off); | ||||||
|
|
||||||
| if (idx == -1) { | ||||||
| int rem = bytes.length - off; | ||||||
|
|
||||||
| if (rem > 0) { | ||||||
| buffer.put(bytes, off, rem); | ||||||
| } | ||||||
|
|
||||||
| break; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8); | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
|
|
@@ -758,12 +831,12 @@ private void verifyTypeConstraints(String type, @Nullable String namespace, @Nul | |||||
| private String[] parsePath(final String path, final boolean isSubpath) { | ||||||
| return Arrays.stream(path.split("/")) | ||||||
| .filter(segment -> !segment.isEmpty() && !(isSubpath && (".".equals(segment) || "..".equals(segment)))) | ||||||
| .map(this::percentDecode) | ||||||
| .map(PackageURL::percentDecode) | ||||||
| .toArray(String[]::new); | ||||||
| } | ||||||
|
|
||||||
| private String encodePath(final String path) { | ||||||
| return Arrays.stream(path.split("/")).map(this::percentEncode).collect(Collectors.joining("/")); | ||||||
| return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/")); | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
|
|
@@ -894,5 +967,4 @@ private StandardTypes() { | |||||
|
|
||||||
| } | ||||||
| } | ||||||
|
|
||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,6 +71,31 @@ static void resetLocale() { | |
| Locale.setDefault(defaultLocale); | ||
| } | ||
|
|
||
| @Test | ||
| void validPercentEncoding() throws MalformedPackageURLException { | ||
| PackageURL purl = new PackageURL("maven", "com.google.summit", "summit-ast", "2.2.0\n", null, null); | ||
| assertEquals("pkg:maven/com.google.summit/[email protected]%0A", purl.toString()); | ||
| PackageURL purl2 = new PackageURL("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5"); | ||
| assertEquals("Мicrosоft.ЕntitуFramеworkСоrе", purl2.getName()); | ||
| assertEquals("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl2.toString()); | ||
| } | ||
|
|
||
| @SuppressWarnings("deprecation") | ||
| @Test | ||
| void invalidPercentEncoding() throws MalformedPackageURLException { | ||
| assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/[email protected]%")); | ||
| assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/[email protected]%0")); | ||
| PackageURL purl = new PackageURL("pkg:maven/com.google.summit/[email protected]"); | ||
| Throwable t1 = assertThrows(ValidationException.class, () -> purl.uriDecode("%")); | ||
| assertEquals("Incomplete percent encoding at offset 0 with value '%'", t1.getMessage()); | ||
| Throwable t2 = assertThrows(ValidationException.class, () -> purl.uriDecode("a%0")); | ||
| assertEquals("Incomplete percent encoding at offset 1 with value '%0'", t2.getMessage()); | ||
| Throwable t3 = assertThrows(ValidationException.class, () -> purl.uriDecode("aaaa%%0A")); | ||
| assertEquals("Invalid percent encoding char 1 at offset 5 with value '%'", t3.getMessage()); | ||
| Throwable t4 = assertThrows(ValidationException.class, () -> purl.uriDecode("%0G")); | ||
| assertEquals("Invalid percent encoding char 2 at offset 2 with value 'G'", t4.getMessage()); | ||
| } | ||
|
|
||
| @Test | ||
| void constructorParsing() throws Exception { | ||
| for (int i = 0; i < json.length(); i++) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.