diff --git a/user/super/com/google/gwt/emul/java/lang/CaseMapper.java b/user/super/com/google/gwt/emul/java/lang/CaseMapper.java
index b3413a8a28..ba8bcf2324 100644
--- a/user/super/com/google/gwt/emul/java/lang/CaseMapper.java
+++ b/user/super/com/google/gwt/emul/java/lang/CaseMapper.java
@@ -23,7 +23,23 @@ public static char charToLowerCase(char c) {
}
public static char charToUpperCase(char c) {
- return String.valueOf(c).toUpperCase().charAt(0);
+ String upper = String.valueOf(c).toUpperCase();
+ return hasExtraCodePoints(upper) ? c : upper.charAt(0);
+ }
+
+ public static int intToLowerCase(int codePoint) {
+ return String.NativeString.fromCodePoint(codePoint).toLowerCase().codePointAt(0);
+ }
+
+ public static int intToUpperCase(int codePoint) {
+ String upper = String.NativeString.fromCodePoint(codePoint).toUpperCase();
+ return hasExtraCodePoints(upper) ? codePoint : upper.codePointAt(0);
+ }
+
+ // If String.toUpperCase produces more than 1 codepoint, Character.toUpperCase should
+ // act either as identity or title-case conversion (not supported in GWT).
+ private static boolean hasExtraCodePoints(String str) {
+ return str.asNativeString().codePointAt(1) > 0;
}
private CaseMapper() {}
diff --git a/user/super/com/google/gwt/emul/java/lang/Character.java b/user/super/com/google/gwt/emul/java/lang/Character.java
index d675b1719e..767e125a3b 100644
--- a/user/super/com/google/gwt/emul/java/lang/Character.java
+++ b/user/super/com/google/gwt/emul/java/lang/Character.java
@@ -18,32 +18,34 @@
import static javaemul.internal.InternalPreconditions.checkCriticalArgument;
import java.io.Serializable;
+import java.util.Arrays;
+
import javaemul.internal.NativeRegExp;
import javaemul.internal.annotations.HasNoSideEffects;
/**
* Wraps a native char
as an object.
*
- * TODO(jat): many of the classification methods implemented here are not
- * correct in that they only handle ASCII characters, and many other methods
- * are not currently implemented. I think the proper approach is to introduce * a deferred binding parameter which substitutes an implementation using
- * a fully-correct Unicode character database, at the expense of additional
- * data being downloaded. That way developers that need the functionality
- * can get it without those who don't need it paying for it.
+ *
Some methods are not possible to implement without a Unicode database,
+ * which would blow up the code size.
+ *
+ * Methods such as isLetter, isDigit, ... use the JS native API for Unicode.
+ * Their output is only consistent with JVMs that have the same Unicode support
+ * as the target browser. As of 2025, most browsers provide Unicode 16.0 support
+ * which is on par with OpenJDK 24.
+ *
+ *
+ * See the conformance table for details.
+ *
*
*
* The following methods are still not implemented -- most would require Unicode
* character db to be useful:
- * - digit / is* / to*(int codePoint)
- * - isDefined(char)
* - isIdentifierIgnorable(char)
* - isJavaIdentifierPart(char)
* - isJavaIdentifierStart(char)
* - isJavaLetter(char) -- deprecated, so probably not
* - isJavaLetterOrDigit(char) -- deprecated, so probably not
- * - isISOControl(char)
- * - isMirrored(char)
- * - isSpaceChar(char)
* - isUnicodeIdentifierPart(char)
* - isUnicodeIdentifierStart(char)
* - getDirectionality(*)
@@ -52,14 +54,6 @@
* - reverseBytes(char) -- any use for this at all in the browser?
* - toTitleCase(*)
* - all the category constants for classification
- *
- * The following do not properly handle characters outside of ASCII:
- * - digit(char c, int radix)
- * - isDigit(char c)
- * - isLetter(char c)
- * - isLetterOrDigit(char c)
- * - isLowerCase(char c)
- * - isUpperCase(char c)
*
*/
public final class Character implements Comparable, Serializable {
@@ -72,11 +66,11 @@ static class CharSequenceAdapter implements CharSequence {
private int start;
private int end;
- public CharSequenceAdapter(char[] charArray) {
+ CharSequenceAdapter(char[] charArray) {
this(charArray, 0, charArray.length);
}
- public CharSequenceAdapter(char[] charArray, int start, int end) {
+ CharSequenceAdapter(char[] charArray, int start, int end) {
this.charArray = charArray;
this.start = start;
this.end = end;
@@ -190,28 +184,31 @@ public static int compare(char x, char y) {
return x - y;
}
- /*
- * TODO: correct Unicode handling.
- */
public static int digit(char c, int radix) {
+ return digit((int) c, radix);
+ }
+
+ public static int digit(int codePoint, int radix) {
if (radix < MIN_RADIX || radix > MAX_RADIX) {
return -1;
}
-
- if (c >= '0' && c < '0' + Math.min(radix, 10)) {
- return c - '0';
- }
-
- // The offset by 10 is to re-base the alpha values
- if (c >= 'a' && c < (radix + 'a' - 10)) {
- return c - 'a' + 10;
- }
-
- if (c >= 'A' && c < (radix + 'A' - 10)) {
- return c - 'A' + 10;
+ int digit;
+ if (isDigit(codePoint)) {
+ // we don't have to list all representations of 0, if two consecutive ones are the same
+ // mod 16 we only list the first one
+ int[] zeros = {0x30, 0x966, 0xe50, 0x1946, 0x19d0, 0x11066, 0x110f0, 0x11136,
+ 0x111d0, 0x116da, 0x11730, 0x1d7ce, 0x1d7d8, 0x1d7e2, 0x1d7ec, 0x1d7f6, 0x1e140,
+ 0x1e5f1, 0x1e950};
+ int pos = Arrays.binarySearch(zeros, codePoint);
+ digit = pos >= 0 ? 0 : ((codePoint - zeros[-pos - 2]) & 0xf);
+ } else if (codePoint >= 'a' && codePoint <= 'z' || codePoint >= 'A' && codePoint <= 'Z'
+ || codePoint >= 0xff21 && codePoint <= 0xff3a
+ || codePoint >= 0xff41 && codePoint <= 0xff5a) {
+ digit = (codePoint & 0x1f) + 9;
+ } else {
+ return -1;
}
-
- return -1;
+ return digit >= radix ? -1 : digit;
}
public static char forDigit(int digit, int radix) {
@@ -234,57 +231,143 @@ public static boolean isBmpCodePoint(int codePoint) {
return codePoint >= MIN_VALUE && codePoint <= MAX_VALUE;
}
+ private static NativeRegExp definedRegex;
+
+ public static boolean isDefined(char c) {
+ return isDefined(String.valueOf(c));
+ }
+
+ public static boolean isDefined(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isDefined(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isDefined(String str) {
+ if (definedRegex == null) {
+ definedRegex = new NativeRegExp("\\P{Cn}", "u");
+ }
+ return definedRegex.test(str);
+ }
+
private static NativeRegExp digitRegex;
- /*
- * TODO: correct Unicode handling.
- */
public static boolean isDigit(char c) {
+ return isDigit(String.valueOf(c));
+ }
+
+ public static boolean isDigit(int codePoint) {
+ return isValidCodePoint(codePoint) && isDigit(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isDigit(String str) {
if (digitRegex == null) {
- digitRegex = new NativeRegExp("\\d");
+ digitRegex = new NativeRegExp("\\p{Nd}", "u");
}
- return digitRegex.test(String.valueOf(c));
+ return digitRegex.test(String.valueOf(str));
}
public static boolean isHighSurrogate(char ch) {
return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
}
+ private static NativeRegExp ideographicRegex;
+
+ public static boolean isIdeographic(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isIdeographic(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isIdeographic(String str) {
+ if (ideographicRegex == null) {
+ ideographicRegex = new NativeRegExp("\\p{Ideographic}", "u");
+ }
+ return ideographicRegex.test(str);
+ }
+
private static NativeRegExp leterRegex;
- /*
- * TODO: correct Unicode handling.
- */
public static boolean isLetter(char c) {
+ return isLetter(String.valueOf(c));
+ }
+
+ public static boolean isLetter(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isLetter(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ public static boolean isLetter(String str) {
if (leterRegex == null) {
- leterRegex = new NativeRegExp("[A-Z]", "i");
+ leterRegex = new NativeRegExp("\\p{L}", "u");
}
- return leterRegex.test(String.valueOf(c));
+ return leterRegex.test(str);
}
private static NativeRegExp isLeterOrDigitRegex;
- /*
- * TODO: correct Unicode handling.
- */
public static boolean isLetterOrDigit(char c) {
+ return isLetterOrDigit(String.valueOf(c));
+ }
+
+ public static boolean isLetterOrDigit(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isLetterOrDigit(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isLetterOrDigit(String str) {
if (isLeterOrDigitRegex == null) {
- isLeterOrDigitRegex = new NativeRegExp("[A-Z\\d]", "i");
+ isLeterOrDigitRegex = new NativeRegExp("[\\p{Nd}\\p{L}]", "u");
}
- return isLeterOrDigitRegex.test(String.valueOf(c));
+ return isLeterOrDigitRegex.test(str);
}
- /*
- * TODO: correct Unicode handling.
- */
+ private static NativeRegExp lowerCaseRegex;
+
public static boolean isLowerCase(char c) {
- return toLowerCase(c) == c && isLetter(c);
+ return isLowerCase(String.valueOf(c));
+ }
+
+ public static boolean isLowerCase(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isLowerCase(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isLowerCase(String str) {
+ if (lowerCaseRegex == null) {
+ lowerCaseRegex = new NativeRegExp("\\p{Lowercase}", "u");
+ }
+ return lowerCaseRegex.test(str);
}
public static boolean isLowSurrogate(char ch) {
return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
}
+ private static NativeRegExp mirroredRegex;
+
+ public static boolean isMirrored(char c) {
+ return isMirrored(String.valueOf(c));
+ }
+
+ public static boolean isMirrored(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isMirrored(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isMirrored(String str) {
+ if (mirroredRegex == null) {
+ mirroredRegex = new NativeRegExp("\\p{Bidi_Mirrored}", "u");
+ }
+ return mirroredRegex.test(str);
+ }
+
+ public static boolean isISOControl(char ch) {
+ return ch <= '\u001F' || (ch >= '\u007F' && ch <= '\u009F');
+ }
+
+ public static boolean isISOControl(int codePoint) {
+ return codePoint <= '\u001F' || (codePoint >= '\u007F' && codePoint <= '\u009F');
+ }
+
/**
* Deprecated - see isWhitespace(char).
*/
@@ -306,12 +389,35 @@ public static boolean isSpace(char c) {
}
}
+ private static NativeRegExp spaceRegex;
+
+ public static boolean isSpaceChar(char c) {
+ return isSpaceChar(String.valueOf(c));
+ }
+
+ public static boolean isSpaceChar(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isSpaceChar(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isSpaceChar(String str) {
+ if (spaceRegex == null) {
+ spaceRegex = new NativeRegExp("\\p{Z}", "u");
+ }
+ return spaceRegex.test(str);
+ }
+
+ public static boolean isSurrogate(char ch) {
+ return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
+ }
+
public static boolean isWhitespace(char ch) {
return isWhitespace(String.valueOf(ch));
}
public static boolean isWhitespace(int codePoint) {
- return isWhitespace(String.fromCodePoint(codePoint));
+ return isValidCodePoint(codePoint)
+ && isWhitespace(String.NativeString.fromCodePoint(codePoint));
}
private static NativeRegExp whitespaceRegex;
@@ -339,14 +445,31 @@ public static boolean isSurrogatePair(char highSurrogate, char lowSurrogate) {
public static boolean isTitleCase(char c) {
// https://www.compart.com/en/unicode/category/Lt
- return c != toUpperCase(c) && c != toLowerCase(c);
+ // here we should use the semantic of String.toUpperCase
+ return c != String.valueOf(c).toUpperCase().charAt(0) && c != toLowerCase(c);
}
- /*
- * TODO: correct Unicode handling.
- */
+ public static boolean isTitleCase(int codePoint) {
+ // as of Unicode 16 there are no title-case chars beyond 0xffff
+ return codePoint > 0 && codePoint < 0xffff && isTitleCase((char) codePoint);
+ }
+
+ private static NativeRegExp upperCaseRegex;
+
public static boolean isUpperCase(char c) {
- return toUpperCase(c) == c && isLetter(c);
+ return isUpperCase(String.valueOf(c));
+ }
+
+ public static boolean isUpperCase(int codePoint) {
+ return isValidCodePoint(codePoint)
+ && isUpperCase(String.NativeString.fromCodePoint(codePoint));
+ }
+
+ private static boolean isUpperCase(String c) {
+ if (upperCaseRegex == null) {
+ upperCaseRegex = new NativeRegExp("\\p{Uppercase}", "u");
+ }
+ return upperCaseRegex.test(c);
}
public static boolean isValidCodePoint(int codePoint) {
@@ -390,8 +513,8 @@ public static char[] toChars(int codePoint) {
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
return new char[] {
- getHighSurrogate(codePoint),
- getLowSurrogate(codePoint),
+ highSurrogate(codePoint),
+ lowSurrogate(codePoint),
};
} else {
return new char[] {
@@ -404,8 +527,8 @@ public static int toChars(int codePoint, char[] dst, int dstIndex) {
checkCriticalArgument(codePoint >= 0 && codePoint <= MAX_CODE_POINT);
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
- dst[dstIndex++] = getHighSurrogate(codePoint);
- dst[dstIndex] = getLowSurrogate(codePoint);
+ dst[dstIndex++] = highSurrogate(codePoint);
+ dst[dstIndex] = lowSurrogate(codePoint);
return 2;
} else {
dst[dstIndex] = (char) codePoint;
@@ -426,14 +549,36 @@ public static char toLowerCase(char c) {
return CaseMapper.charToLowerCase(c);
}
+ public static int toLowerCase(int codePoint) {
+ if (codePoint > MAX_CODE_POINT) {
+ return codePoint;
+ }
+ return CaseMapper.intToLowerCase(codePoint);
+ }
+
public static String toString(char x) {
return String.valueOf(x);
}
+ public static String toString(int codePoint) {
+ if (isValidCodePoint(codePoint)) {
+ return String.NativeString.fromCodePoint(codePoint);
+ } else {
+ throw new IllegalArgumentException("Invalid code point: " + codePoint);
+ }
+ }
+
public static char toUpperCase(char c) {
return CaseMapper.charToUpperCase(c);
}
+ public static int toUpperCase(int codePoint) {
+ if (!isValidCodePoint(codePoint)) {
+ return codePoint;
+ }
+ return CaseMapper.intToUpperCase(codePoint);
+ }
+
public static Character valueOf(char c) {
if (c < 128) {
return BoxedValues.get(c);
@@ -473,26 +618,26 @@ static char forDigit(int digit) {
/**
* Computes the high surrogate character of the UTF16 representation of a
- * non-BMP code point. See {@link getLowSurrogate}.
+ * non-BMP code point. See {@link #lowSurrogate}.
*
* @param codePoint requested codePoint, required to be >=
* MIN_SUPPLEMENTARY_CODE_POINT
* @return high surrogate character
*/
- static char getHighSurrogate(int codePoint) {
+ public static char highSurrogate(int codePoint) {
return (char) (MIN_HIGH_SURROGATE
+ (((codePoint - MIN_SUPPLEMENTARY_CODE_POINT) >> 10) & 1023));
}
/**
* Computes the low surrogate character of the UTF16 representation of a
- * non-BMP code point. See {@link getHighSurrogate}.
+ * non-BMP code point. See {@link #highSurrogate}.
*
* @param codePoint requested codePoint, required to be >=
* MIN_SUPPLEMENTARY_CODE_POINT
* @return low surrogate character
*/
- static char getLowSurrogate(int codePoint) {
+ public static char lowSurrogate(int codePoint) {
return (char) (MIN_LOW_SURROGATE + ((codePoint - MIN_SUPPLEMENTARY_CODE_POINT) & 1023));
}
diff --git a/user/super/com/google/gwt/emul/java/lang/String.java b/user/super/com/google/gwt/emul/java/lang/String.java
index 693f53bdc7..616be7aec0 100644
--- a/user/super/com/google/gwt/emul/java/lang/String.java
+++ b/user/super/com/google/gwt/emul/java/lang/String.java
@@ -226,17 +226,6 @@ private static Charset getCharset(String charsetName) throws UnsupportedEncoding
}
}
- static String fromCodePoint(int codePoint) {
- if (codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
- char hiSurrogate = Character.getHighSurrogate(codePoint);
- char loSurrogate = Character.getLowSurrogate(codePoint);
- return String.valueOf(hiSurrogate)
- + String.valueOf(loSurrogate);
- } else {
- return String.valueOf((char) codePoint);
- }
- }
-
public String() {
/*
* Call to $create(args) must be here so that the method is referenced and not
@@ -356,7 +345,7 @@ public String(StringBuilder sb) {
$create(sb);
}
- private NativeString asNativeString() {
+ NativeString asNativeString() {
return JsUtils.uncheckedCast(this);
}
@@ -469,11 +458,17 @@ public int hashCode() {
}
public int indexOf(int codePoint) {
- return indexOf(fromCodePoint(codePoint));
+ if (codePoint > Character.MAX_CODE_POINT) {
+ return -1;
+ }
+ return indexOf(NativeString.fromCodePoint(codePoint));
}
public int indexOf(int codePoint, int startIndex) {
- return indexOf(fromCodePoint(codePoint), startIndex);
+ if (codePoint > Character.MAX_CODE_POINT) {
+ return -1;
+ }
+ return indexOf(NativeString.fromCodePoint(codePoint), startIndex);
}
public int indexOf(String str) {
@@ -493,11 +488,17 @@ public boolean isEmpty() {
}
public int lastIndexOf(int codePoint) {
- return lastIndexOf(fromCodePoint(codePoint));
+ if (codePoint > Character.MAX_CODE_POINT) {
+ return -1;
+ }
+ return lastIndexOf(NativeString.fromCodePoint(codePoint));
}
public int lastIndexOf(int codePoint, int startIndex) {
- return lastIndexOf(fromCodePoint(codePoint), startIndex);
+ if (codePoint > Character.MAX_CODE_POINT) {
+ return -1;
+ }
+ return lastIndexOf(NativeString.fromCodePoint(codePoint), startIndex);
}
public int lastIndexOf(String str) {
@@ -847,10 +848,12 @@ private int cappedIndexOf(char c) {
}
@JsType(isNative = true, name = "String", namespace = "")
- private static class NativeString {
+ static class NativeString {
public static native String fromCharCode(char x);
+ public static native String fromCodePoint(int codePoint);
public int length;
public native char charCodeAt(int index);
+ public native int codePointAt(int index);
public native int indexOf(String str);
public native int indexOf(String str, int startIndex);
public native int lastIndexOf(String str);
diff --git a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
index a2ccc9e6ad..507038ad19 100644
--- a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
+++ b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
@@ -15,25 +15,33 @@
*/
package com.google.gwt.emultest.java.lang;
+import com.google.gwt.junit.DoNotRunWith;
+import com.google.gwt.junit.Platform;
import com.google.gwt.junit.client.GWTTestCase;
import java.util.Arrays;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.stream.Stream;
/**
* Tests for java.lang.Character.
*/
+@DoNotRunWith(Platform.HtmlUnitBug)
public class CharacterTest extends GWTTestCase {
+ private static final char NUM_CHARS_HANDLED = 127;
+
private static class CharSequenceAdapter implements CharSequence {
private char[] charArray;
private int start;
private int end;
- public CharSequenceAdapter(char[] charArray) {
+ CharSequenceAdapter(char[] charArray) {
this(charArray, 0, charArray.length);
}
- public CharSequenceAdapter(char[] charArray, int start, int end) {
+ CharSequenceAdapter(char[] charArray, int start, int end) {
this.charArray = charArray;
this.start = start;
this.end = end;
@@ -57,123 +65,49 @@ public java.lang.CharSequence subSequence(int start, int end) {
}
/**
- * Helper class which applies some arbitrary char mutation function
- * to a string and returns it.
+ * Helper method which counts ASCII characters matching a predicate.
*/
- public abstract class Changer {
- String original;
-
- public Changer(String o) {
- original = o;
- }
-
- public abstract char change(char c);
-
- public String changed() {
- StringBuffer buf = new StringBuffer();
- for (int i = 0; i < original.length(); i++) {
- buf.append(change(original.charAt(i)));
+ public static int countAscii(Predicate test) {
+ int count = 0;
+ for (char ch = 0; ch < NUM_CHARS_HANDLED; ch++) {
+ if (test.test(ch)) {
+ count++;
}
- return buf.toString();
}
+ return count;
}
- /**
- * Helper class which collects the set of characters which pass some
- * arbitrary boolean function.
- */
- public abstract class Judge {
- String original;
-
- public Judge(String o) {
- original = o;
- }
- public String allPass() {
- StringBuffer buf = new StringBuffer();
- for (int i = 0; i < original.length(); i++) {
- if (pass(original.charAt(i))) {
- buf.append(original.charAt(i));
- }
+ public static int countAscii(Function transformer,
+ Predicate test) {
+ int count = 0;
+ for (char ch = 0; ch < NUM_CHARS_HANDLED; ch++) {
+ if (test.test(transformer.apply(ch))) {
+ count++;
}
- return buf.toString();
- }
-
- public abstract boolean pass(char c);
- }
-
- class LowerCaseJudge extends Judge {
- public LowerCaseJudge(String s) {
- super(s);
- }
-
- @Override
- public boolean pass(char c) {
- return Character.isLowerCase(c);
}
+ return count;
}
- class UpperCaseJudge extends Judge {
- public UpperCaseJudge(String s) {
- super(s);
- }
-
- @Override
- public boolean pass(char c) {
- return Character.isUpperCase(c);
+ public static int countUnicode(int[] codePoints, Predicate test) {
+ int c = 0;
+ for (int i: codePoints) {
+ if (test.test(i)) {
+ c++;
+ }
}
+ return c;
}
- public static String allChars;
-
- public static final int NUM_CHARS_HANDLED = 127;
-
- static {
- StringBuffer b = new StringBuffer();
- for (char c = 0; c < NUM_CHARS_HANDLED; c++) {
- b.append(c);
- }
- allChars = b.toString();
- }
-
- Judge digitJudge = new Judge(allChars) {
- @Override
- public boolean pass(char c) {
- return Character.isDigit(c);
- }
- };
- Judge letterJudge = new Judge(allChars) {
- @Override
- public boolean pass(char c) {
- return Character.isLetter(c);
- }
- };
- Judge letterOrDigitJudge = new Judge(allChars) {
- @Override
- public boolean pass(char c) {
- return Character.isLetterOrDigit(c);
- }
- };
- Changer lowerCaseChanger = new Changer(allChars) {
- @Override
- public char change(char c) {
- return Character.toLowerCase(c);
- }
- };
- Judge lowerCaseJudge = new LowerCaseJudge(allChars);
- Judge spaceJudge = new Judge(allChars) {
- @Override
- @SuppressWarnings("deprecation") // Character.isSpace()
- public boolean pass(char c) {
- return Character.isSpace(c); // suppress deprecation
- }
- };
- Changer upperCaseChanger = new Changer(allChars) {
- @Override
- public char change(char c) {
- return Character.toUpperCase(c);
- }
- };
- Judge upperCaseJudge = new UpperCaseJudge(allChars);
+ int[] letters = {'a', 'z', 'A', 'Z', 0x2c6, 0x2d1, 0x10380, 0x1039d};
+ int[] digits = {'0', '9', 0x660, 0x669, 0x104a0, 0x104a9};
+ int[] spaces = {' ', '\u00a0', '\u2028'};
+ int[] controls = {0, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0x1f, 0x7f, 0x9f};
+ int[] punctuation = {'@', '.'};
+ int[] symbols = {0x2c5};
+ int[] marks = {0x659, 0x10a39, 0x10379};
+ int[] others = {-1, Character.MAX_CODE_POINT + 1};
+ int[] allCodePoints = Stream.of(letters, digits, spaces, controls, punctuation, marks,
+ symbols, others).flatMapToInt(Arrays::stream).toArray();
@Override
public String getModuleName() {
@@ -267,7 +201,7 @@ public void testConstructor() {
}
public void testDigit() {
- assertEquals("wrong number of digits", 10, digitJudge.allPass().length());
+ assertEquals("wrong number of digits", 10, countAscii(Character::isDigit));
}
public void testSurrogates() {
@@ -298,34 +232,52 @@ public void testSurrogates() {
}
public void testLetter() {
- assertEquals("wrong number of letters", 52, letterJudge.allPass().length());
+ assertEquals("wrong number of letters", 52, countAscii(Character::isLetter));
}
public void testLetterOrDigit() {
- assertEquals("wrong number of letters", 62,
- letterOrDigitJudge.allPass().length());
+ assertEquals("wrong number of letters + digits", 62,
+ countAscii(Character::isLetterOrDigit));
}
public void testLowerCase() {
assertEquals("wrong number of lowercase letters", 26,
- lowerCaseJudge.allPass().length());
+ countAscii(Character::isLowerCase));
assertEquals("wrong number of lowercase letters after toLowerCase", 52,
- new LowerCaseJudge(lowerCaseChanger.changed()).allPass().length());
+ countAscii(Character::toLowerCase, Character::isLowerCase));
}
+ @SuppressWarnings("deprecation")
public void testSpace() {
- assertEquals("wrong number of spaces", 5, spaceJudge.allPass().length());
+ assertEquals("wrong number of spaces", 5, countAscii(Character::isSpace));
}
public void testToFromDigit() {
for (int i = 0; i < 16; i++) {
assertEquals(i, Character.digit(Character.forDigit(i, 16), 16));
}
- assertEquals(1, Character.digit('1', 10));
- assertEquals('9', Character.forDigit(9, 10));
- assertEquals(-1, Character.digit('7', 6));
- assertEquals(-1, Character.digit('8', 8));
- assertEquals(-1, Character.digit('A', 10));
+ assertEquals(1, Character.digit(hideFromCompiler('1'), 10));
+ assertEquals('9', Character.forDigit(hideFromCompiler(9), 10));
+ assertEquals(-1, Character.digit(hideFromCompiler('7'), 6));
+ assertEquals(-1, Character.digit(hideFromCompiler('8'), 8));
+ assertEquals(-1, Character.digit(hideFromCompiler('A'), 10));
+ assertEquals(35, Character.digit(hideFromCompiler('Z'), 36));
+ }
+
+ public void testToFromDigitInt() {
+ int[] zeros = {48, 1632, 1776, 1984, 2406, 2534, 2662, 2790,
+ 2918, 3046, 3174, 3302, 3430, 3558, 3664, 3792, 3872, 4160, 4240, 6112, 6160, 6470, 6608,
+ 6784, 6800, 6992, 7088, 7232, 7248, 42528, 43216, 43264, 43472, 43504, 43600, 44016,
+ 65296, 66720, 69734, 69872, 69942, 70096, 70384, 70736, 70864, 71248, 71360, 71472,
+ 71904, 72784, 73040, 92768, 93008, 120782, 120792, 120802, 120812, 120822, 125264};
+
+ for (int zero: zeros) {
+ assertEquals(0, Character.digit(zero, 10));
+ }
+ assertEquals(35, Character.digit(hideFromCompiler(65338), 36));
+ assertEquals(35, Character.digit(hideFromCompiler(65370), 36));
+ assertEquals("only letters and digits have numeric value", 0,
+ countUnicode(punctuation, c -> Character.digit(c, 10) != -1));
}
@SuppressWarnings("deprecation")
@@ -472,12 +424,90 @@ public void testToString() {
public void testUpperCase() {
assertEquals("wrong number of uppercase letters", 26,
- upperCaseJudge.allPass().length());
+ countAscii(Character::isUpperCase));
assertEquals("wrong number of uppercase letters after toUpperCase", 52,
- new UpperCaseJudge(upperCaseChanger.changed()).allPass().length());
+ countAscii(Character::toUpperCase, Character::isUpperCase));
}
public void testValueOf() {
assertEquals('A', Character.valueOf('A').charValue());
}
+
+ public void testIsLetterInt() {
+ assertEquals("No other characters should be letters",
+ letters.length, countUnicode(allCodePoints, Character::isLetter));
+ assertEquals("Unicode letters should be recognized",
+ letters.length, countUnicode(letters, Character::isLetter));
+ }
+
+ public void testIsDigitInt() {
+ assertEquals("Unicode digits should be recognized",
+ digits.length, countUnicode(digits, Character::isDigit));
+ assertEquals("No other characters should be digits",
+ digits.length, countUnicode(allCodePoints, Character::isDigit));
+ }
+
+ public void testIsDigitOrLetterInt() {
+ assertEquals("Unicode digits should be recognized",
+ digits.length, countUnicode(digits, Character::isLetterOrDigit));
+ assertEquals("Unicode letters should be recognized",
+ digits.length, countUnicode(digits, Character::isLetterOrDigit));
+ assertEquals("No other characters should match letter or digit",
+ digits.length + letters.length,
+ countUnicode(allCodePoints, Character::isLetterOrDigit));
+ }
+
+ public void testIsSpaceCharInt() {
+ assertEquals("Unicode spaces should be recognized",
+ spaces.length, countUnicode(spaces, Character::isSpaceChar));
+ assertEquals("No other characters should match space",
+ spaces.length,
+ countUnicode(allCodePoints, Character::isSpaceChar));
+ }
+
+ public void testIsDefined() {
+ assertEquals("Should recognize defined characters",
+ allCodePoints.length - others.length,
+ countUnicode(allCodePoints, Character::isDefined));
+ assertEquals("No other characters should be defined",
+ 0,
+ countUnicode(others, Character::isDefined));
+ }
+
+ public void testIsISOControl() {
+ assertTrue(Character.isISOControl(hideFromCompiler((char) 0)));
+ assertTrue(Character.isISOControl(hideFromCompiler((char) 0x1f)));
+ assertFalse(Character.isISOControl(hideFromCompiler((char) 0x20)));
+ assertFalse(Character.isISOControl(hideFromCompiler((char) 0x7E)));
+ assertTrue(Character.isISOControl(hideFromCompiler((char) 0x7F)));
+ assertTrue(Character.isISOControl(hideFromCompiler((char) 0x9F)));
+ assertFalse(Character.isISOControl(hideFromCompiler((char) 0xA0)));
+ }
+
+ public void testIsISOControlInt() {
+ assertTrue(Character.isISOControl(hideFromCompiler(0)));
+ assertTrue(Character.isISOControl(hideFromCompiler(0x1f)));
+ assertFalse(Character.isISOControl(hideFromCompiler(0x20)));
+ assertFalse(Character.isISOControl(hideFromCompiler(0x7E)));
+ assertTrue(Character.isISOControl(hideFromCompiler(0x7F)));
+ assertTrue(Character.isISOControl(hideFromCompiler(0x9F)));
+ assertFalse(Character.isISOControl(hideFromCompiler(0xA0)));
+ }
+
+ public void testIsSurrogate() {
+ assertFalse(Character.isSurrogate(hideFromCompiler((char) 0)));
+ assertTrue(Character.isSurrogate(hideFromCompiler(Character.MIN_HIGH_SURROGATE)));
+ assertTrue(Character.isSurrogate(hideFromCompiler(Character.MAX_HIGH_SURROGATE)));
+ assertTrue(Character.isSurrogate(hideFromCompiler(Character.MIN_LOW_SURROGATE)));
+ assertTrue(Character.isSurrogate(hideFromCompiler(Character.MAX_LOW_SURROGATE)));
+ assertFalse(Character.isSurrogate(hideFromCompiler((char) (Character.MAX_LOW_SURROGATE + 1))));
+ }
+
+ protected T hideFromCompiler(T value) {
+ if (Math.random() < -1) {
+ // Can never happen, but fools the compiler enough not to optimize this call.
+ fail();
+ }
+ return value;
+ }
}
diff --git a/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java b/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java
index fc58bdcbde..66042a2f58 100644
--- a/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java
+++ b/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java
@@ -137,7 +137,7 @@ public void testHashCode() {
for (int j = 0; j < str.length(); ++j) {
char ch = str.charAt(j);
assertTrue("Bad character '" + ch + "' (U+0" + Integer.toHexString(ch)
- + ")", ch == '-' || ch == ' ' || Character.isDigit(ch));
+ + ")", ch == '-' || ch == ' ' || (ch >= '0' && ch <= '9'));
}
}
// verify the hash codes are constant for a given string
diff --git a/user/test/com/google/gwt/emultest/java/lang/StringTest.java b/user/test/com/google/gwt/emultest/java/lang/StringTest.java
index ea12fee74c..a1ab8f7612 100644
--- a/user/test/com/google/gwt/emultest/java/lang/StringTest.java
+++ b/user/test/com/google/gwt/emultest/java/lang/StringTest.java
@@ -513,7 +513,7 @@ public void testHashCode() {
for (int j = 0; j < str.length(); ++j) {
char ch = str.charAt(j);
assertTrue("Bad character '" + ch + "' (U+0" + Integer.toHexString(ch)
- + ")", ch == '-' || ch == ' ' || Character.isDigit(ch));
+ + ")", ch == '-' || ch == ' ' || (ch >= '0' && ch <= '9'));
}
// get hashes again to verify the values are constant for a given string
diff --git a/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java b/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java
index d34fed2061..3da0287d72 100644
--- a/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java
+++ b/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java
@@ -36,7 +36,7 @@ public void testSupported() {
int idx = ua.indexOf("firefox/") + 8;
int endIdx = idx;
int len = ua.length();
- while (endIdx < len && Character.isDigit(ua.charAt(endIdx))) {
+ while (endIdx < len && '0' <= ua.charAt(endIdx) && '9' >= ua.charAt(endIdx)) {
endIdx++;
}
int majorVers = Integer.parseInt(ua.substring(idx, endIdx), 10);