Skip to content

Commit c3f02af

Browse files
committed
Fix URL encoding and decoding
The methods `uriEncode` and `uriDecode` did not properly handle percent-encoding. In particular, `uriEncode` didn't properly output two uppercase hex digits and `urlDecode` did not properly handle non-ASCII characters. Aditionally, if no percent-encoding was performed, these methods will now return the original string. Fixes #150 Closes #153 Fixes #154
1 parent dd5f743 commit c3f02af

File tree

2 files changed

+114
-63
lines changed

2 files changed

+114
-63
lines changed

src/main/java/com/github/packageurl/PackageURL.java

Lines changed: 94 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.Serializable;
2727
import java.net.URI;
2828
import java.net.URISyntaxException;
29+
import java.nio.ByteBuffer;
2930
import java.nio.charset.StandardCharsets;
3031
import java.util.Arrays;
3132
import java.util.Collections;
@@ -34,6 +35,7 @@
3435
import java.util.TreeMap;
3536
import java.util.function.IntPredicate;
3637
import java.util.stream.Collectors;
38+
import java.util.stream.IntStream;
3739
import org.jspecify.annotations.Nullable;
3840

3941
/**
@@ -53,9 +55,10 @@
5355
* @since 1.0.0
5456
*/
5557
public final class PackageURL implements Serializable {
56-
5758
private static final long serialVersionUID = 3243226021636427586L;
5859

60+
private static final char PERCENT_CHAR = '%';
61+
5962
/**
6063
* Constructs a new PackageURL object by parsing the specified string.
6164
*
@@ -494,35 +497,14 @@ private String canonicalize(boolean coordinatesOnly) {
494497
return purl.toString();
495498
}
496499

497-
/**
498-
* Encodes the input in conformance with RFC 3986.
499-
*
500-
* @param input the String to encode
501-
* @return an encoded String
502-
*/
503-
private String percentEncode(final String input) {
504-
if (input.isEmpty()) {
505-
return input;
506-
}
507-
508-
StringBuilder builder = new StringBuilder();
509-
for (byte b : input.getBytes(StandardCharsets.UTF_8)) {
510-
if (isUnreserved(b)) {
511-
builder.append((char) b);
512-
}
513-
else {
514-
// Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character
515-
builder.append('%');
516-
builder.append(Integer.toHexString(b).toUpperCase());
517-
}
518-
}
519-
return builder.toString();
520-
}
521-
522500
private static boolean isUnreserved(int c) {
523501
return (isValidCharForKey(c) || c == '~');
524502
}
525503

504+
private static boolean shouldEncode(int c) {
505+
return !isUnreserved(c);
506+
}
507+
526508
private static boolean isAlpha(int c) {
527509
return (isLowerCase(c) || isUpperCase(c));
528510
}
@@ -578,43 +560,93 @@ private static String toLowerCase(String s) {
578560
return new String(chars);
579561
}
580562

581-
/**
582-
* Optionally decodes a String, if it's encoded. If String is not encoded,
583-
* method will return the original input value.
584-
*
585-
* @param input the value String to decode
586-
* @return a decoded String
587-
*/
588-
private String percentDecode(final String input) {
589-
final String decoded = uriDecode(input);
590-
if (!decoded.equals(input)) {
591-
return decoded;
563+
private static String percentDecode(final String source) {
564+
if (source.isEmpty()) {
565+
return source;
592566
}
593-
return input;
594-
}
595567

596-
/**
597-
* Decodes a percent-encoded string.
598-
*
599-
* @param source string to decode, not {@code null}
600-
* @return A decoded string
601-
* @throws NullPointerException if {@code source} is {@code null}
602-
*/
603-
public static String uriDecode(String source) {
604-
int length = source.length();
605-
StringBuilder builder = new StringBuilder();
568+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
569+
int percentCharCount = getPercentCharCount(bytes);
570+
571+
if (percentCharCount == 0) {
572+
return source;
573+
}
574+
575+
int length = bytes.length;
576+
int capacity = (length + percentCharCount) - (percentCharCount * 3);
577+
578+
if (capacity <= 0) {
579+
throw new ValidationException("Invalid encoding in '" + source + "'");
580+
}
581+
582+
ByteBuffer buffer = ByteBuffer.allocate(capacity);
583+
606584
for (int i = 0; i < length; i++) {
607-
if (source.charAt(i) == '%') {
608-
String str = source.substring(i + 1, i + 3);
609-
char c = (char) Integer.parseInt(str, 16);
610-
builder.append(c);
611-
i += 2;
585+
if (buffer.position() + 1 > capacity) {
586+
throw new ValidationException("Invalid encoding in '" + source + "'");
612587
}
613-
else {
614-
builder.append(source.charAt(i));
588+
589+
int b;
590+
591+
if (bytes[i] == PERCENT_CHAR) {
592+
int b1 = Character.digit(bytes[++i], 16);
593+
int b2 = Character.digit(bytes[++i], 16);
594+
b = (byte) ((b1 << 4) + b2);
595+
} else {
596+
b = bytes[i];
615597
}
598+
599+
buffer.put((byte) b);
616600
}
617-
return builder.toString();
601+
602+
return new String(buffer.array(), StandardCharsets.UTF_8);
603+
}
604+
605+
@Deprecated
606+
public String uriDecode(final String source) {
607+
return source != null ? percentDecode(source) : null;
608+
}
609+
610+
private static int getUnsafeCharCount(final byte[] bytes) {
611+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::shouldEncode).count();
612+
}
613+
614+
private static boolean isPercent(int c) {
615+
return (c == PERCENT_CHAR);
616+
}
617+
618+
private static int getPercentCharCount(final byte[] bytes) {
619+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::isPercent).count();
620+
}
621+
622+
private static String percentEncode(final String source) {
623+
if (source.isEmpty()) {
624+
return source;
625+
}
626+
627+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
628+
int unsafeCharCount = getUnsafeCharCount(bytes);
629+
630+
if (unsafeCharCount == 0) {
631+
return source;
632+
}
633+
634+
int length = bytes.length;
635+
int capacity = (length - unsafeCharCount) + (3 * unsafeCharCount);
636+
ByteBuffer buffer = ByteBuffer.allocate(capacity);
637+
638+
for (byte b : bytes) {
639+
if (shouldEncode(b)) {
640+
byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
641+
byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16));
642+
byte[] encoded = {(byte) PERCENT_CHAR, b1, b2};
643+
buffer.put(encoded, 0, encoded.length);
644+
} else {
645+
buffer.put(b);
646+
}
647+
}
648+
649+
return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8);
618650
}
619651

620652
/**
@@ -691,9 +723,9 @@ private void parse(final String purl) throws MalformedPackageURLException {
691723
// The 'remainder' should now consist of an optional namespace and the name
692724
index = remainder.lastIndexOf('/');
693725
if (index <= start) {
694-
this.name = validateName(percentDecode(remainder.substring(start)));
726+
this.name = validateName(uriDecode(remainder.substring(start)));
695727
} else {
696-
this.name = validateName(percentDecode(remainder.substring(index + 1)));
728+
this.name = validateName(uriDecode(remainder.substring(index + 1)));
697729
remainder = remainder.substring(0, index);
698730
this.namespace = validateNamespace(parsePath(remainder.substring(start), false));
699731
}
@@ -743,7 +775,7 @@ private void verifyTypeConstraints(String type, @Nullable String namespace, @Nul
743775
final String[] entry = value.split("=", 2);
744776
if (entry.length == 2 && !entry[1].isEmpty()) {
745777
String key = toLowerCase(entry[0]);
746-
if (map.put(key, percentDecode(entry[1])) != null) {
778+
if (map.put(key, uriDecode(entry[1])) != null) {
747779
throw new ValidationException("Duplicate package qualifier encountered. More then one value was specified for " + key);
748780
}
749781
}
@@ -758,12 +790,12 @@ private void verifyTypeConstraints(String type, @Nullable String namespace, @Nul
758790
private String[] parsePath(final String path, final boolean isSubpath) {
759791
return Arrays.stream(path.split("/"))
760792
.filter(segment -> !segment.isEmpty() && !(isSubpath && (".".equals(segment) || "..".equals(segment))))
761-
.map(this::percentDecode)
793+
.map(PackageURL::percentDecode)
762794
.toArray(String[]::new);
763795
}
764796

765797
private String encodePath(final String path) {
766-
return Arrays.stream(path.split("/")).map(this::percentEncode).collect(Collectors.joining("/"));
798+
return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/"));
767799
}
768800

769801
/**
@@ -894,5 +926,4 @@ private StandardTypes() {
894926

895927
}
896928
}
897-
898929
}

src/test/java/com/github/packageurl/PackageURLTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,26 @@ static void resetLocale() {
7171
Locale.setDefault(defaultLocale);
7272
}
7373

74+
@Test
75+
void validPercentEncoding() throws MalformedPackageURLException {
76+
PackageURL purl = new PackageURL("maven", "com.google.summit", "summit-ast", "2.2.0\n", null, null);
77+
assertEquals("pkg:maven/com.google.summit/[email protected]%0A", purl.toString());
78+
PackageURL purl2 = new PackageURL("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5");
79+
assertEquals("Мicrosоft.ЕntitуFramеworkСоrе", purl2.getName());
80+
assertEquals("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl2.toString());
81+
}
82+
83+
@SuppressWarnings("deprecation")
84+
@Test
85+
void invalidPercentEncoding() throws MalformedPackageURLException {
86+
assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/[email protected]%"));
87+
assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/[email protected]%0"));
88+
PackageURL purl = new PackageURL("pkg:maven/com.google.summit/[email protected]");
89+
assertThrows(ValidationException.class, () -> purl.uriDecode("%"));
90+
assertThrows(ValidationException.class, () -> purl.uriDecode("%0"));
91+
assertThrows(ValidationException.class, () -> purl.uriDecode("aaaa%0%"));
92+
}
93+
7494
@Test
7595
void constructorParsing() throws Exception {
7696
for (int i = 0; i < json.length(); i++) {

0 commit comments

Comments
 (0)