diff --git a/user/super/com/google/gwt/emul/java/lang/CaseMapper.java b/user/super/com/google/gwt/emul/java/lang/CaseMapper.java index b3413a8a28..ba8bcf2324 100644 --- a/user/super/com/google/gwt/emul/java/lang/CaseMapper.java +++ b/user/super/com/google/gwt/emul/java/lang/CaseMapper.java @@ -23,7 +23,23 @@ public static char charToLowerCase(char c) { } public static char charToUpperCase(char c) { - return String.valueOf(c).toUpperCase().charAt(0); + String upper = String.valueOf(c).toUpperCase(); + return hasExtraCodePoints(upper) ? c : upper.charAt(0); + } + + public static int intToLowerCase(int codePoint) { + return String.NativeString.fromCodePoint(codePoint).toLowerCase().codePointAt(0); + } + + public static int intToUpperCase(int codePoint) { + String upper = String.NativeString.fromCodePoint(codePoint).toUpperCase(); + return hasExtraCodePoints(upper) ? codePoint : upper.codePointAt(0); + } + + // If String.toUpperCase produces more than 1 codepoint, Character.toUpperCase should + // act either as identity or title-case conversion (not supported in GWT). + private static boolean hasExtraCodePoints(String str) { + return str.asNativeString().codePointAt(1) > 0; } private CaseMapper() {} diff --git a/user/super/com/google/gwt/emul/java/lang/Character.java b/user/super/com/google/gwt/emul/java/lang/Character.java index d675b1719e..767e125a3b 100644 --- a/user/super/com/google/gwt/emul/java/lang/Character.java +++ b/user/super/com/google/gwt/emul/java/lang/Character.java @@ -18,32 +18,34 @@ import static javaemul.internal.InternalPreconditions.checkCriticalArgument; import java.io.Serializable; +import java.util.Arrays; + import javaemul.internal.NativeRegExp; import javaemul.internal.annotations.HasNoSideEffects; /** * Wraps a native char as an object. * - * TODO(jat): many of the classification methods implemented here are not - * correct in that they only handle ASCII characters, and many other methods - * are not currently implemented. I think the proper approach is to introduce * a deferred binding parameter which substitutes an implementation using - * a fully-correct Unicode character database, at the expense of additional - * data being downloaded. That way developers that need the functionality - * can get it without those who don't need it paying for it. + *

Some methods are not possible to implement without a Unicode database, + * which would blow up the code size.

+ * + *

Methods such as isLetter, isDigit, ... use the JS native API for Unicode. + * Their output is only consistent with JVMs that have the same Unicode support + * as the target browser. As of 2025, most browsers provide Unicode 16.0 support + * which is on par with OpenJDK 24.

+ * + * + * See the conformance table for details. + * * *
  * The following methods are still not implemented -- most would require Unicode
  * character db to be useful:
- *  - digit / is* / to*(int codePoint)
- *  - isDefined(char)
  *  - isIdentifierIgnorable(char)
  *  - isJavaIdentifierPart(char)
  *  - isJavaIdentifierStart(char)
  *  - isJavaLetter(char) -- deprecated, so probably not
  *  - isJavaLetterOrDigit(char) -- deprecated, so probably not
- *  - isISOControl(char)
- *  - isMirrored(char)
- *  - isSpaceChar(char)
  *  - isUnicodeIdentifierPart(char)
  *  - isUnicodeIdentifierStart(char)
  *  - getDirectionality(*)
@@ -52,14 +54,6 @@
  *  - reverseBytes(char) -- any use for this at all in the browser?
  *  - toTitleCase(*)
  *  - all the category constants for classification
- *
- * The following do not properly handle characters outside of ASCII:
- *  - digit(char c, int radix)
- *  - isDigit(char c)
- *  - isLetter(char c)
- *  - isLetterOrDigit(char c)
- *  - isLowerCase(char c)
- *  - isUpperCase(char c)
  * 
*/ public final class Character implements Comparable, Serializable { @@ -72,11 +66,11 @@ static class CharSequenceAdapter implements CharSequence { private int start; private int end; - public CharSequenceAdapter(char[] charArray) { + CharSequenceAdapter(char[] charArray) { this(charArray, 0, charArray.length); } - public CharSequenceAdapter(char[] charArray, int start, int end) { + CharSequenceAdapter(char[] charArray, int start, int end) { this.charArray = charArray; this.start = start; this.end = end; @@ -190,28 +184,31 @@ public static int compare(char x, char y) { return x - y; } - /* - * TODO: correct Unicode handling. - */ public static int digit(char c, int radix) { + return digit((int) c, radix); + } + + public static int digit(int codePoint, int radix) { if (radix < MIN_RADIX || radix > MAX_RADIX) { return -1; } - - if (c >= '0' && c < '0' + Math.min(radix, 10)) { - return c - '0'; - } - - // The offset by 10 is to re-base the alpha values - if (c >= 'a' && c < (radix + 'a' - 10)) { - return c - 'a' + 10; - } - - if (c >= 'A' && c < (radix + 'A' - 10)) { - return c - 'A' + 10; + int digit; + if (isDigit(codePoint)) { + // we don't have to list all representations of 0, if two consecutive ones are the same + // mod 16 we only list the first one + int[] zeros = {0x30, 0x966, 0xe50, 0x1946, 0x19d0, 0x11066, 0x110f0, 0x11136, + 0x111d0, 0x116da, 0x11730, 0x1d7ce, 0x1d7d8, 0x1d7e2, 0x1d7ec, 0x1d7f6, 0x1e140, + 0x1e5f1, 0x1e950}; + int pos = Arrays.binarySearch(zeros, codePoint); + digit = pos >= 0 ? 0 : ((codePoint - zeros[-pos - 2]) & 0xf); + } else if (codePoint >= 'a' && codePoint <= 'z' || codePoint >= 'A' && codePoint <= 'Z' + || codePoint >= 0xff21 && codePoint <= 0xff3a + || codePoint >= 0xff41 && codePoint <= 0xff5a) { + digit = (codePoint & 0x1f) + 9; + } else { + return -1; } - - return -1; + return digit >= radix ? -1 : digit; } public static char forDigit(int digit, int radix) { @@ -234,57 +231,143 @@ public static boolean isBmpCodePoint(int codePoint) { return codePoint >= MIN_VALUE && codePoint <= MAX_VALUE; } + private static NativeRegExp definedRegex; + + public static boolean isDefined(char c) { + return isDefined(String.valueOf(c)); + } + + public static boolean isDefined(int codePoint) { + return isValidCodePoint(codePoint) + && isDefined(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isDefined(String str) { + if (definedRegex == null) { + definedRegex = new NativeRegExp("\\P{Cn}", "u"); + } + return definedRegex.test(str); + } + private static NativeRegExp digitRegex; - /* - * TODO: correct Unicode handling. - */ public static boolean isDigit(char c) { + return isDigit(String.valueOf(c)); + } + + public static boolean isDigit(int codePoint) { + return isValidCodePoint(codePoint) && isDigit(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isDigit(String str) { if (digitRegex == null) { - digitRegex = new NativeRegExp("\\d"); + digitRegex = new NativeRegExp("\\p{Nd}", "u"); } - return digitRegex.test(String.valueOf(c)); + return digitRegex.test(String.valueOf(str)); } public static boolean isHighSurrogate(char ch) { return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; } + private static NativeRegExp ideographicRegex; + + public static boolean isIdeographic(int codePoint) { + return isValidCodePoint(codePoint) + && isIdeographic(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isIdeographic(String str) { + if (ideographicRegex == null) { + ideographicRegex = new NativeRegExp("\\p{Ideographic}", "u"); + } + return ideographicRegex.test(str); + } + private static NativeRegExp leterRegex; - /* - * TODO: correct Unicode handling. - */ public static boolean isLetter(char c) { + return isLetter(String.valueOf(c)); + } + + public static boolean isLetter(int codePoint) { + return isValidCodePoint(codePoint) + && isLetter(String.NativeString.fromCodePoint(codePoint)); + } + + public static boolean isLetter(String str) { if (leterRegex == null) { - leterRegex = new NativeRegExp("[A-Z]", "i"); + leterRegex = new NativeRegExp("\\p{L}", "u"); } - return leterRegex.test(String.valueOf(c)); + return leterRegex.test(str); } private static NativeRegExp isLeterOrDigitRegex; - /* - * TODO: correct Unicode handling. - */ public static boolean isLetterOrDigit(char c) { + return isLetterOrDigit(String.valueOf(c)); + } + + public static boolean isLetterOrDigit(int codePoint) { + return isValidCodePoint(codePoint) + && isLetterOrDigit(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isLetterOrDigit(String str) { if (isLeterOrDigitRegex == null) { - isLeterOrDigitRegex = new NativeRegExp("[A-Z\\d]", "i"); + isLeterOrDigitRegex = new NativeRegExp("[\\p{Nd}\\p{L}]", "u"); } - return isLeterOrDigitRegex.test(String.valueOf(c)); + return isLeterOrDigitRegex.test(str); } - /* - * TODO: correct Unicode handling. - */ + private static NativeRegExp lowerCaseRegex; + public static boolean isLowerCase(char c) { - return toLowerCase(c) == c && isLetter(c); + return isLowerCase(String.valueOf(c)); + } + + public static boolean isLowerCase(int codePoint) { + return isValidCodePoint(codePoint) + && isLowerCase(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isLowerCase(String str) { + if (lowerCaseRegex == null) { + lowerCaseRegex = new NativeRegExp("\\p{Lowercase}", "u"); + } + return lowerCaseRegex.test(str); } public static boolean isLowSurrogate(char ch) { return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; } + private static NativeRegExp mirroredRegex; + + public static boolean isMirrored(char c) { + return isMirrored(String.valueOf(c)); + } + + public static boolean isMirrored(int codePoint) { + return isValidCodePoint(codePoint) + && isMirrored(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isMirrored(String str) { + if (mirroredRegex == null) { + mirroredRegex = new NativeRegExp("\\p{Bidi_Mirrored}", "u"); + } + return mirroredRegex.test(str); + } + + public static boolean isISOControl(char ch) { + return ch <= '\u001F' || (ch >= '\u007F' && ch <= '\u009F'); + } + + public static boolean isISOControl(int codePoint) { + return codePoint <= '\u001F' || (codePoint >= '\u007F' && codePoint <= '\u009F'); + } + /** * Deprecated - see isWhitespace(char). */ @@ -306,12 +389,35 @@ public static boolean isSpace(char c) { } } + private static NativeRegExp spaceRegex; + + public static boolean isSpaceChar(char c) { + return isSpaceChar(String.valueOf(c)); + } + + public static boolean isSpaceChar(int codePoint) { + return isValidCodePoint(codePoint) + && isSpaceChar(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isSpaceChar(String str) { + if (spaceRegex == null) { + spaceRegex = new NativeRegExp("\\p{Z}", "u"); + } + return spaceRegex.test(str); + } + + public static boolean isSurrogate(char ch) { + return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; + } + public static boolean isWhitespace(char ch) { return isWhitespace(String.valueOf(ch)); } public static boolean isWhitespace(int codePoint) { - return isWhitespace(String.fromCodePoint(codePoint)); + return isValidCodePoint(codePoint) + && isWhitespace(String.NativeString.fromCodePoint(codePoint)); } private static NativeRegExp whitespaceRegex; @@ -339,14 +445,31 @@ public static boolean isSurrogatePair(char highSurrogate, char lowSurrogate) { public static boolean isTitleCase(char c) { // https://www.compart.com/en/unicode/category/Lt - return c != toUpperCase(c) && c != toLowerCase(c); + // here we should use the semantic of String.toUpperCase + return c != String.valueOf(c).toUpperCase().charAt(0) && c != toLowerCase(c); } - /* - * TODO: correct Unicode handling. - */ + public static boolean isTitleCase(int codePoint) { + // as of Unicode 16 there are no title-case chars beyond 0xffff + return codePoint > 0 && codePoint < 0xffff && isTitleCase((char) codePoint); + } + + private static NativeRegExp upperCaseRegex; + public static boolean isUpperCase(char c) { - return toUpperCase(c) == c && isLetter(c); + return isUpperCase(String.valueOf(c)); + } + + public static boolean isUpperCase(int codePoint) { + return isValidCodePoint(codePoint) + && isUpperCase(String.NativeString.fromCodePoint(codePoint)); + } + + private static boolean isUpperCase(String c) { + if (upperCaseRegex == null) { + upperCaseRegex = new NativeRegExp("\\p{Uppercase}", "u"); + } + return upperCaseRegex.test(c); } public static boolean isValidCodePoint(int codePoint) { @@ -390,8 +513,8 @@ public static char[] toChars(int codePoint) { if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { return new char[] { - getHighSurrogate(codePoint), - getLowSurrogate(codePoint), + highSurrogate(codePoint), + lowSurrogate(codePoint), }; } else { return new char[] { @@ -404,8 +527,8 @@ public static int toChars(int codePoint, char[] dst, int dstIndex) { checkCriticalArgument(codePoint >= 0 && codePoint <= MAX_CODE_POINT); if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { - dst[dstIndex++] = getHighSurrogate(codePoint); - dst[dstIndex] = getLowSurrogate(codePoint); + dst[dstIndex++] = highSurrogate(codePoint); + dst[dstIndex] = lowSurrogate(codePoint); return 2; } else { dst[dstIndex] = (char) codePoint; @@ -426,14 +549,36 @@ public static char toLowerCase(char c) { return CaseMapper.charToLowerCase(c); } + public static int toLowerCase(int codePoint) { + if (codePoint > MAX_CODE_POINT) { + return codePoint; + } + return CaseMapper.intToLowerCase(codePoint); + } + public static String toString(char x) { return String.valueOf(x); } + public static String toString(int codePoint) { + if (isValidCodePoint(codePoint)) { + return String.NativeString.fromCodePoint(codePoint); + } else { + throw new IllegalArgumentException("Invalid code point: " + codePoint); + } + } + public static char toUpperCase(char c) { return CaseMapper.charToUpperCase(c); } + public static int toUpperCase(int codePoint) { + if (!isValidCodePoint(codePoint)) { + return codePoint; + } + return CaseMapper.intToUpperCase(codePoint); + } + public static Character valueOf(char c) { if (c < 128) { return BoxedValues.get(c); @@ -473,26 +618,26 @@ static char forDigit(int digit) { /** * Computes the high surrogate character of the UTF16 representation of a - * non-BMP code point. See {@link getLowSurrogate}. + * non-BMP code point. See {@link #lowSurrogate}. * * @param codePoint requested codePoint, required to be >= * MIN_SUPPLEMENTARY_CODE_POINT * @return high surrogate character */ - static char getHighSurrogate(int codePoint) { + public static char highSurrogate(int codePoint) { return (char) (MIN_HIGH_SURROGATE + (((codePoint - MIN_SUPPLEMENTARY_CODE_POINT) >> 10) & 1023)); } /** * Computes the low surrogate character of the UTF16 representation of a - * non-BMP code point. See {@link getHighSurrogate}. + * non-BMP code point. See {@link #highSurrogate}. * * @param codePoint requested codePoint, required to be >= * MIN_SUPPLEMENTARY_CODE_POINT * @return low surrogate character */ - static char getLowSurrogate(int codePoint) { + public static char lowSurrogate(int codePoint) { return (char) (MIN_LOW_SURROGATE + ((codePoint - MIN_SUPPLEMENTARY_CODE_POINT) & 1023)); } diff --git a/user/super/com/google/gwt/emul/java/lang/String.java b/user/super/com/google/gwt/emul/java/lang/String.java index 693f53bdc7..616be7aec0 100644 --- a/user/super/com/google/gwt/emul/java/lang/String.java +++ b/user/super/com/google/gwt/emul/java/lang/String.java @@ -226,17 +226,6 @@ private static Charset getCharset(String charsetName) throws UnsupportedEncoding } } - static String fromCodePoint(int codePoint) { - if (codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT) { - char hiSurrogate = Character.getHighSurrogate(codePoint); - char loSurrogate = Character.getLowSurrogate(codePoint); - return String.valueOf(hiSurrogate) - + String.valueOf(loSurrogate); - } else { - return String.valueOf((char) codePoint); - } - } - public String() { /* * Call to $create(args) must be here so that the method is referenced and not @@ -356,7 +345,7 @@ public String(StringBuilder sb) { $create(sb); } - private NativeString asNativeString() { + NativeString asNativeString() { return JsUtils.uncheckedCast(this); } @@ -469,11 +458,17 @@ public int hashCode() { } public int indexOf(int codePoint) { - return indexOf(fromCodePoint(codePoint)); + if (codePoint > Character.MAX_CODE_POINT) { + return -1; + } + return indexOf(NativeString.fromCodePoint(codePoint)); } public int indexOf(int codePoint, int startIndex) { - return indexOf(fromCodePoint(codePoint), startIndex); + if (codePoint > Character.MAX_CODE_POINT) { + return -1; + } + return indexOf(NativeString.fromCodePoint(codePoint), startIndex); } public int indexOf(String str) { @@ -493,11 +488,17 @@ public boolean isEmpty() { } public int lastIndexOf(int codePoint) { - return lastIndexOf(fromCodePoint(codePoint)); + if (codePoint > Character.MAX_CODE_POINT) { + return -1; + } + return lastIndexOf(NativeString.fromCodePoint(codePoint)); } public int lastIndexOf(int codePoint, int startIndex) { - return lastIndexOf(fromCodePoint(codePoint), startIndex); + if (codePoint > Character.MAX_CODE_POINT) { + return -1; + } + return lastIndexOf(NativeString.fromCodePoint(codePoint), startIndex); } public int lastIndexOf(String str) { @@ -847,10 +848,12 @@ private int cappedIndexOf(char c) { } @JsType(isNative = true, name = "String", namespace = "") - private static class NativeString { + static class NativeString { public static native String fromCharCode(char x); + public static native String fromCodePoint(int codePoint); public int length; public native char charCodeAt(int index); + public native int codePointAt(int index); public native int indexOf(String str); public native int indexOf(String str, int startIndex); public native int lastIndexOf(String str); diff --git a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java index a2ccc9e6ad..507038ad19 100644 --- a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java +++ b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java @@ -15,25 +15,33 @@ */ package com.google.gwt.emultest.java.lang; +import com.google.gwt.junit.DoNotRunWith; +import com.google.gwt.junit.Platform; import com.google.gwt.junit.client.GWTTestCase; import java.util.Arrays; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Stream; /** * Tests for java.lang.Character. */ +@DoNotRunWith(Platform.HtmlUnitBug) public class CharacterTest extends GWTTestCase { + private static final char NUM_CHARS_HANDLED = 127; + private static class CharSequenceAdapter implements CharSequence { private char[] charArray; private int start; private int end; - public CharSequenceAdapter(char[] charArray) { + CharSequenceAdapter(char[] charArray) { this(charArray, 0, charArray.length); } - public CharSequenceAdapter(char[] charArray, int start, int end) { + CharSequenceAdapter(char[] charArray, int start, int end) { this.charArray = charArray; this.start = start; this.end = end; @@ -57,123 +65,49 @@ public java.lang.CharSequence subSequence(int start, int end) { } /** - * Helper class which applies some arbitrary char mutation function - * to a string and returns it. + * Helper method which counts ASCII characters matching a predicate. */ - public abstract class Changer { - String original; - - public Changer(String o) { - original = o; - } - - public abstract char change(char c); - - public String changed() { - StringBuffer buf = new StringBuffer(); - for (int i = 0; i < original.length(); i++) { - buf.append(change(original.charAt(i))); + public static int countAscii(Predicate test) { + int count = 0; + for (char ch = 0; ch < NUM_CHARS_HANDLED; ch++) { + if (test.test(ch)) { + count++; } - return buf.toString(); } + return count; } - /** - * Helper class which collects the set of characters which pass some - * arbitrary boolean function. - */ - public abstract class Judge { - String original; - - public Judge(String o) { - original = o; - } - public String allPass() { - StringBuffer buf = new StringBuffer(); - for (int i = 0; i < original.length(); i++) { - if (pass(original.charAt(i))) { - buf.append(original.charAt(i)); - } + public static int countAscii(Function transformer, + Predicate test) { + int count = 0; + for (char ch = 0; ch < NUM_CHARS_HANDLED; ch++) { + if (test.test(transformer.apply(ch))) { + count++; } - return buf.toString(); - } - - public abstract boolean pass(char c); - } - - class LowerCaseJudge extends Judge { - public LowerCaseJudge(String s) { - super(s); - } - - @Override - public boolean pass(char c) { - return Character.isLowerCase(c); } + return count; } - class UpperCaseJudge extends Judge { - public UpperCaseJudge(String s) { - super(s); - } - - @Override - public boolean pass(char c) { - return Character.isUpperCase(c); + public static int countUnicode(int[] codePoints, Predicate test) { + int c = 0; + for (int i: codePoints) { + if (test.test(i)) { + c++; + } } + return c; } - public static String allChars; - - public static final int NUM_CHARS_HANDLED = 127; - - static { - StringBuffer b = new StringBuffer(); - for (char c = 0; c < NUM_CHARS_HANDLED; c++) { - b.append(c); - } - allChars = b.toString(); - } - - Judge digitJudge = new Judge(allChars) { - @Override - public boolean pass(char c) { - return Character.isDigit(c); - } - }; - Judge letterJudge = new Judge(allChars) { - @Override - public boolean pass(char c) { - return Character.isLetter(c); - } - }; - Judge letterOrDigitJudge = new Judge(allChars) { - @Override - public boolean pass(char c) { - return Character.isLetterOrDigit(c); - } - }; - Changer lowerCaseChanger = new Changer(allChars) { - @Override - public char change(char c) { - return Character.toLowerCase(c); - } - }; - Judge lowerCaseJudge = new LowerCaseJudge(allChars); - Judge spaceJudge = new Judge(allChars) { - @Override - @SuppressWarnings("deprecation") // Character.isSpace() - public boolean pass(char c) { - return Character.isSpace(c); // suppress deprecation - } - }; - Changer upperCaseChanger = new Changer(allChars) { - @Override - public char change(char c) { - return Character.toUpperCase(c); - } - }; - Judge upperCaseJudge = new UpperCaseJudge(allChars); + int[] letters = {'a', 'z', 'A', 'Z', 0x2c6, 0x2d1, 0x10380, 0x1039d}; + int[] digits = {'0', '9', 0x660, 0x669, 0x104a0, 0x104a9}; + int[] spaces = {' ', '\u00a0', '\u2028'}; + int[] controls = {0, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0x1f, 0x7f, 0x9f}; + int[] punctuation = {'@', '.'}; + int[] symbols = {0x2c5}; + int[] marks = {0x659, 0x10a39, 0x10379}; + int[] others = {-1, Character.MAX_CODE_POINT + 1}; + int[] allCodePoints = Stream.of(letters, digits, spaces, controls, punctuation, marks, + symbols, others).flatMapToInt(Arrays::stream).toArray(); @Override public String getModuleName() { @@ -267,7 +201,7 @@ public void testConstructor() { } public void testDigit() { - assertEquals("wrong number of digits", 10, digitJudge.allPass().length()); + assertEquals("wrong number of digits", 10, countAscii(Character::isDigit)); } public void testSurrogates() { @@ -298,34 +232,52 @@ public void testSurrogates() { } public void testLetter() { - assertEquals("wrong number of letters", 52, letterJudge.allPass().length()); + assertEquals("wrong number of letters", 52, countAscii(Character::isLetter)); } public void testLetterOrDigit() { - assertEquals("wrong number of letters", 62, - letterOrDigitJudge.allPass().length()); + assertEquals("wrong number of letters + digits", 62, + countAscii(Character::isLetterOrDigit)); } public void testLowerCase() { assertEquals("wrong number of lowercase letters", 26, - lowerCaseJudge.allPass().length()); + countAscii(Character::isLowerCase)); assertEquals("wrong number of lowercase letters after toLowerCase", 52, - new LowerCaseJudge(lowerCaseChanger.changed()).allPass().length()); + countAscii(Character::toLowerCase, Character::isLowerCase)); } + @SuppressWarnings("deprecation") public void testSpace() { - assertEquals("wrong number of spaces", 5, spaceJudge.allPass().length()); + assertEquals("wrong number of spaces", 5, countAscii(Character::isSpace)); } public void testToFromDigit() { for (int i = 0; i < 16; i++) { assertEquals(i, Character.digit(Character.forDigit(i, 16), 16)); } - assertEquals(1, Character.digit('1', 10)); - assertEquals('9', Character.forDigit(9, 10)); - assertEquals(-1, Character.digit('7', 6)); - assertEquals(-1, Character.digit('8', 8)); - assertEquals(-1, Character.digit('A', 10)); + assertEquals(1, Character.digit(hideFromCompiler('1'), 10)); + assertEquals('9', Character.forDigit(hideFromCompiler(9), 10)); + assertEquals(-1, Character.digit(hideFromCompiler('7'), 6)); + assertEquals(-1, Character.digit(hideFromCompiler('8'), 8)); + assertEquals(-1, Character.digit(hideFromCompiler('A'), 10)); + assertEquals(35, Character.digit(hideFromCompiler('Z'), 36)); + } + + public void testToFromDigitInt() { + int[] zeros = {48, 1632, 1776, 1984, 2406, 2534, 2662, 2790, + 2918, 3046, 3174, 3302, 3430, 3558, 3664, 3792, 3872, 4160, 4240, 6112, 6160, 6470, 6608, + 6784, 6800, 6992, 7088, 7232, 7248, 42528, 43216, 43264, 43472, 43504, 43600, 44016, + 65296, 66720, 69734, 69872, 69942, 70096, 70384, 70736, 70864, 71248, 71360, 71472, + 71904, 72784, 73040, 92768, 93008, 120782, 120792, 120802, 120812, 120822, 125264}; + + for (int zero: zeros) { + assertEquals(0, Character.digit(zero, 10)); + } + assertEquals(35, Character.digit(hideFromCompiler(65338), 36)); + assertEquals(35, Character.digit(hideFromCompiler(65370), 36)); + assertEquals("only letters and digits have numeric value", 0, + countUnicode(punctuation, c -> Character.digit(c, 10) != -1)); } @SuppressWarnings("deprecation") @@ -472,12 +424,90 @@ public void testToString() { public void testUpperCase() { assertEquals("wrong number of uppercase letters", 26, - upperCaseJudge.allPass().length()); + countAscii(Character::isUpperCase)); assertEquals("wrong number of uppercase letters after toUpperCase", 52, - new UpperCaseJudge(upperCaseChanger.changed()).allPass().length()); + countAscii(Character::toUpperCase, Character::isUpperCase)); } public void testValueOf() { assertEquals('A', Character.valueOf('A').charValue()); } + + public void testIsLetterInt() { + assertEquals("No other characters should be letters", + letters.length, countUnicode(allCodePoints, Character::isLetter)); + assertEquals("Unicode letters should be recognized", + letters.length, countUnicode(letters, Character::isLetter)); + } + + public void testIsDigitInt() { + assertEquals("Unicode digits should be recognized", + digits.length, countUnicode(digits, Character::isDigit)); + assertEquals("No other characters should be digits", + digits.length, countUnicode(allCodePoints, Character::isDigit)); + } + + public void testIsDigitOrLetterInt() { + assertEquals("Unicode digits should be recognized", + digits.length, countUnicode(digits, Character::isLetterOrDigit)); + assertEquals("Unicode letters should be recognized", + digits.length, countUnicode(digits, Character::isLetterOrDigit)); + assertEquals("No other characters should match letter or digit", + digits.length + letters.length, + countUnicode(allCodePoints, Character::isLetterOrDigit)); + } + + public void testIsSpaceCharInt() { + assertEquals("Unicode spaces should be recognized", + spaces.length, countUnicode(spaces, Character::isSpaceChar)); + assertEquals("No other characters should match space", + spaces.length, + countUnicode(allCodePoints, Character::isSpaceChar)); + } + + public void testIsDefined() { + assertEquals("Should recognize defined characters", + allCodePoints.length - others.length, + countUnicode(allCodePoints, Character::isDefined)); + assertEquals("No other characters should be defined", + 0, + countUnicode(others, Character::isDefined)); + } + + public void testIsISOControl() { + assertTrue(Character.isISOControl(hideFromCompiler((char) 0))); + assertTrue(Character.isISOControl(hideFromCompiler((char) 0x1f))); + assertFalse(Character.isISOControl(hideFromCompiler((char) 0x20))); + assertFalse(Character.isISOControl(hideFromCompiler((char) 0x7E))); + assertTrue(Character.isISOControl(hideFromCompiler((char) 0x7F))); + assertTrue(Character.isISOControl(hideFromCompiler((char) 0x9F))); + assertFalse(Character.isISOControl(hideFromCompiler((char) 0xA0))); + } + + public void testIsISOControlInt() { + assertTrue(Character.isISOControl(hideFromCompiler(0))); + assertTrue(Character.isISOControl(hideFromCompiler(0x1f))); + assertFalse(Character.isISOControl(hideFromCompiler(0x20))); + assertFalse(Character.isISOControl(hideFromCompiler(0x7E))); + assertTrue(Character.isISOControl(hideFromCompiler(0x7F))); + assertTrue(Character.isISOControl(hideFromCompiler(0x9F))); + assertFalse(Character.isISOControl(hideFromCompiler(0xA0))); + } + + public void testIsSurrogate() { + assertFalse(Character.isSurrogate(hideFromCompiler((char) 0))); + assertTrue(Character.isSurrogate(hideFromCompiler(Character.MIN_HIGH_SURROGATE))); + assertTrue(Character.isSurrogate(hideFromCompiler(Character.MAX_HIGH_SURROGATE))); + assertTrue(Character.isSurrogate(hideFromCompiler(Character.MIN_LOW_SURROGATE))); + assertTrue(Character.isSurrogate(hideFromCompiler(Character.MAX_LOW_SURROGATE))); + assertFalse(Character.isSurrogate(hideFromCompiler((char) (Character.MAX_LOW_SURROGATE + 1)))); + } + + protected T hideFromCompiler(T value) { + if (Math.random() < -1) { + // Can never happen, but fools the compiler enough not to optimize this call. + fail(); + } + return value; + } } diff --git a/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java b/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java index fc58bdcbde..66042a2f58 100644 --- a/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java +++ b/user/test/com/google/gwt/emultest/java/lang/CompilerConstantStringTest.java @@ -137,7 +137,7 @@ public void testHashCode() { for (int j = 0; j < str.length(); ++j) { char ch = str.charAt(j); assertTrue("Bad character '" + ch + "' (U+0" + Integer.toHexString(ch) - + ")", ch == '-' || ch == ' ' || Character.isDigit(ch)); + + ")", ch == '-' || ch == ' ' || (ch >= '0' && ch <= '9')); } } // verify the hash codes are constant for a given string diff --git a/user/test/com/google/gwt/emultest/java/lang/StringTest.java b/user/test/com/google/gwt/emultest/java/lang/StringTest.java index ea12fee74c..a1ab8f7612 100644 --- a/user/test/com/google/gwt/emultest/java/lang/StringTest.java +++ b/user/test/com/google/gwt/emultest/java/lang/StringTest.java @@ -513,7 +513,7 @@ public void testHashCode() { for (int j = 0; j < str.length(); ++j) { char ch = str.charAt(j); assertTrue("Bad character '" + ch + "' (U+0" + Integer.toHexString(ch) - + ")", ch == '-' || ch == ' ' || Character.isDigit(ch)); + + ")", ch == '-' || ch == ' ' || (ch >= '0' && ch <= '9')); } // get hashes again to verify the values are constant for a given string diff --git a/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java b/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java index d34fed2061..3da0287d72 100644 --- a/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java +++ b/user/test/com/google/gwt/typedarrays/client/ClientSupportTest.java @@ -36,7 +36,7 @@ public void testSupported() { int idx = ua.indexOf("firefox/") + 8; int endIdx = idx; int len = ua.length(); - while (endIdx < len && Character.isDigit(ua.charAt(endIdx))) { + while (endIdx < len && '0' <= ua.charAt(endIdx) && '9' >= ua.charAt(endIdx)) { endIdx++; } int majorVers = Integer.parseInt(ua.substring(idx, endIdx), 10);