Skip to content

Commit 7ca2baa

Browse files
committed
Fix #71 and #233 (support for binary, octal, hex numbers)
1 parent 88c2880 commit 7ca2baa

File tree

3 files changed

+279
-40
lines changed

3 files changed

+279
-40
lines changed

release-notes/VERSION-2.x

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Modules:
1010

1111
2.12.0 (not yet released)
1212

13+
#71: (yaml) Hex number as an entry of an Object causing problem(s) with binding to POJO
1314
#175: (yaml) Add `YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR` to indent by 2 spaces
1415
(requested by Jesper N; fix contributed by Damian S)
1516
#199: (csv) Empty Lists can only be String-typed in CSV
@@ -20,9 +21,10 @@ Modules:
2021
#226: (yaml) Quote 'y'/'Y'/'n'/'N' as names too (to avoid problems with Boolean keys)
2122
(requested by pnepywoda@github)
2223
#229: (yaml) Allow configuring the way "must quote" is determined for property names, String values
23-
#231: Typed object with anchor throws Already had POJO for id (note: actual
24+
#231: (yaml) Typed object with anchor throws Already had POJO for id (note: actual
2425
fix in `jackson-annotations`)
2526
(reported by almson@github)
27+
#233: (yaml) Support decoding Binary, Octal and Hex numbers as integers
2628
- Add configurability of "YAML version generator is to follow" via "YAMLFactory.builder()"
2729
- SnakeYAML 1.26 -> 1.27
2830
- Add Gradle Module Metadata (https://blog.gradle.org/alignment-with-gradle-module-metadata)

yaml/src/main/java/com/fasterxml/jackson/dataformat/yaml/YAMLParser.java

+176-37
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import java.io.*;
44
import java.math.BigDecimal;
55
import java.math.BigInteger;
6-
import java.util.regex.Pattern;
76

87
import org.yaml.snakeyaml.error.Mark;
98
import org.yaml.snakeyaml.events.*;
@@ -67,12 +66,10 @@ private Feature(boolean defaultState) {
6766
// note: does NOT include '0', handled separately
6867
// private final static Pattern PATTERN_INT = Pattern.compile("-?[1-9][0-9]*");
6968

70-
/**
71-
* We will use pattern that is bit stricter than YAML definition,
72-
* but we will still allow things like extra '_' in there.
73-
*/
74-
private final static Pattern PATTERN_FLOAT = Pattern.compile(
75-
"[-+]?([0-9][0-9_]*)?\\.[0-9]*([eE][-+][0-9]+)?");
69+
// 22-Nov-2020, tatu: Not needed as of 2.12 since SnakeYAML tags
70+
// doubles correctly
71+
// private final static Pattern PATTERN_FLOAT = Pattern.compile(
72+
// "[-+]?([0-9][0-9_]*)?\\.[0-9]*([eE][-+][0-9]+)?");
7673

7774
/*
7875
/**********************************************************************
@@ -603,25 +600,16 @@ protected Boolean _matchYAMLBoolean(String value, int len)
603600

604601
protected JsonToken _decodeNumberScalar(String value, final int len)
605602
{
606-
if ("0".equals(value)) { // special case for regexp (can't take minus etc)
607-
_numberNegative = false;
608-
_numberInt = 0;
609-
_numTypesValid = NR_INT;
610-
return JsonToken.VALUE_NUMBER_INT;
611-
}
612-
/* 05-May-2012, tatu: Turns out this is a hot spot; so let's write it
613-
* out and avoid regexp overhead...
614-
*/
603+
// 05-May-2012, tatu: Turns out this is a hot spot; so let's write it
604+
// out and avoid regexp overhead...
605+
615606
//if (PATTERN_INT.matcher(value).matches()) {
616607
int i;
617-
char sign = value.charAt(0);
618-
if (sign == '-') {
608+
char ch = value.charAt(0);
609+
if (ch == '-') {
619610
_numberNegative = true;
620-
if (len == 1) {
621-
return null;
622-
}
623611
i = 1;
624-
} else if (sign == '+') {
612+
} else if (ch == '+') {
625613
_numberNegative = false;
626614
if (len == 1) {
627615
return null;
@@ -631,42 +619,177 @@ protected JsonToken _decodeNumberScalar(String value, final int len)
631619
_numberNegative = false;
632620
i = 0;
633621
}
634-
// !!! 11-Jan-2018, tatu: Should check for binary/octal/hex/sexagesimal
635-
// as per http://yaml.org/type/int.html
622+
if (len == i) { // should not occur but play it safe
623+
return null;
624+
}
625+
// Next: either "0" ("-0" and "+0" also accepted), or non-decimal. So:
626+
if (value.charAt(i) == '0') {
627+
if (++i == len) {
628+
// can leave "_numberNegative" as is, does not matter
629+
_numberInt = 0;
630+
_numTypesValid = NR_INT;
631+
return JsonToken.VALUE_NUMBER_INT;
632+
}
633+
ch = value.charAt(i);
634+
635+
switch (ch) {
636+
case 'b': case 'B': // binary
637+
return _decodeNumberIntBinary(value, i+1, len, _numberNegative);
638+
case 'x': case 'X': // hex
639+
return _decodeNumberIntHex(value, i+1, len, _numberNegative);
640+
case '0': case '1': case '2': case '3': case '4':
641+
case '5': case '6': case '7': case '8': case '9':
642+
case '_':
643+
return _decodeNumberIntOctal(value, i, len, _numberNegative);
644+
default:
645+
}
646+
// should never occur, but in abundance of caution, let's not
647+
// throw exception but just return as String
648+
return JsonToken.VALUE_STRING;
649+
}
650+
651+
// 23-Nov-2020, tatu: will now check and support all formats EXCEPT
652+
// for 60-base; 60-base is trickier not just because decoding gets
653+
// more involved but also because it can accidentally "detect" values
654+
// that we most likely expressing either Times or IP numbers.
655+
656+
boolean underscores = false;
636657

637-
int underscores = 0;
638658
while (true) {
639659
int c = value.charAt(i);
640660
if (c > '9' || c < '0') {
641-
if (c != '_') {
661+
if (c == '_') {
662+
underscores = true;
663+
} else {
642664
break;
643665
}
644-
++underscores;
645666
}
646667
if (++i == len) {
647668
_numTypesValid = 0;
648-
if (underscores > 0) {
649-
return _cleanYamlInt(_textValue);
669+
if (underscores) {
670+
return _cleanYamlInt(value);
650671
}
651672
_cleanedTextValue = _textValue;
652673
return JsonToken.VALUE_NUMBER_INT;
653674
}
654675
}
655-
if (PATTERN_FLOAT.matcher(value).matches()) {
656-
_numTypesValid = 0;
657-
return _cleanYamlFloat(_textValue);
658-
}
676+
// 22-Nov-2020, tatu: Should not be needed; SnakeYAML does not
677+
// tag things this way...
678+
// if (PATTERN_FLOAT.matcher(value).matches()) {
679+
// _numTypesValid = 0;
680+
// return _cleanYamlFloat(_textValue);
681+
// }
659682

660683
// 25-Aug-2016, tatu: If we can't actually match it to valid number,
661-
// consider String; better than claiming there's not toekn
684+
// consider String; better than claiming there's not token
662685
return JsonToken.VALUE_STRING;
663-
}
686+
}
664687

665-
protected JsonToken _decodeIntWithUnderscores(String value, final int len)
688+
// @since 2.12
689+
protected JsonToken _decodeNumberIntBinary(final String value, int i, final int origLen,
690+
boolean negative)
666691
{
692+
final String cleansed = _cleanUnderscores(value, i, origLen);
693+
int digitLen = cleansed.length();
694+
695+
if (digitLen <= 31) {
696+
int v = Integer.parseInt(cleansed, 2);
697+
if (negative) {
698+
v = -v;
699+
}
700+
_numberInt = v;
701+
_numTypesValid = NR_INT;
702+
return JsonToken.VALUE_NUMBER_INT;
703+
}
704+
if (digitLen <= 63) {
705+
return _decodeFromLong(Long.parseLong(cleansed, 2), negative,
706+
(digitLen == 32));
707+
}
708+
return _decodeFromBigInteger(new BigInteger(cleansed, 2), negative);
709+
}
710+
711+
// @since 2.12
712+
protected JsonToken _decodeNumberIntOctal(final String value, int i, final int origLen,
713+
boolean negative)
714+
{
715+
final String cleansed = _cleanUnderscores(value, i, origLen);
716+
int digitLen = cleansed.length();
717+
718+
if (digitLen <= 10) { // 30 bits
719+
int v = Integer.parseInt(cleansed, 8);
720+
if (negative) {
721+
v = -v;
722+
}
723+
_numberInt = v;
724+
_numTypesValid = NR_INT;
725+
return JsonToken.VALUE_NUMBER_INT;
726+
}
727+
if (digitLen <= 21) { // 63 bits
728+
return _decodeFromLong(Long.parseLong(cleansed, 8), negative, false);
729+
}
730+
return _decodeFromBigInteger(new BigInteger(cleansed, 8), negative);
731+
}
732+
733+
// @since 2.12
734+
protected JsonToken _decodeNumberIntHex(final String value, int i, final int origLen,
735+
boolean negative)
736+
{
737+
final String cleansed = _cleanUnderscores(value, i, origLen);
738+
int digitLen = cleansed.length();
739+
740+
if (digitLen <= 7) { // 28 bits
741+
int v = Integer.parseInt(cleansed, 16);
742+
if (negative) {
743+
v = -v;
744+
}
745+
_numberInt = v;
746+
_numTypesValid = NR_INT;
747+
return JsonToken.VALUE_NUMBER_INT;
748+
}
749+
if (digitLen <= 15) { // 60 bits
750+
return _decodeFromLong(Long.parseLong(cleansed, 16), negative,
751+
(digitLen == 8));
752+
}
753+
return _decodeFromBigInteger(new BigInteger(cleansed, 16), negative);
754+
}
755+
756+
private JsonToken _decodeFromLong(long unsignedValue, boolean negative,
757+
boolean checkIfInt)
758+
{
759+
long actualValue;
760+
761+
if (negative) {
762+
actualValue = -unsignedValue;
763+
if (checkIfInt && (actualValue >= MIN_INT_L)) {
764+
_numberInt = (int) actualValue;
765+
_numTypesValid = NR_INT;
766+
return JsonToken.VALUE_NUMBER_INT;
767+
}
768+
} else {
769+
if (checkIfInt && (unsignedValue < MAX_INT_L)) {
770+
_numberInt = (int) unsignedValue;
771+
_numTypesValid = NR_INT;
772+
return JsonToken.VALUE_NUMBER_INT;
773+
}
774+
actualValue = unsignedValue;
775+
}
776+
_numberLong = actualValue;
777+
_numTypesValid = NR_LONG;
667778
return JsonToken.VALUE_NUMBER_INT;
668779
}
669-
780+
781+
private JsonToken _decodeFromBigInteger(BigInteger unsignedValue, boolean negative)
782+
{
783+
// Should we check for bounds here too? Let's not bother yet
784+
if (negative) {
785+
_numberBigInt = unsignedValue.negate();
786+
} else {
787+
_numberBigInt = unsignedValue;
788+
}
789+
_numTypesValid = NR_BIGINT;
790+
return JsonToken.VALUE_NUMBER_INT;
791+
}
792+
670793
/*
671794
/**********************************************************
672795
/* String value handling
@@ -923,6 +1046,22 @@ private JsonToken _cleanYamlInt(String str)
9231046
return JsonToken.VALUE_NUMBER_INT;
9241047
}
9251048

1049+
private String _cleanUnderscores(String str, int i, final int len)
1050+
{
1051+
final StringBuilder sb = new StringBuilder(len);
1052+
for (; i < len; ++i) {
1053+
char ch = str.charAt(i);
1054+
if (ch != '_') {
1055+
sb.append(ch);
1056+
}
1057+
}
1058+
// tiny optimization: if nothing was trimmed, return String
1059+
if (sb.length() == len) {
1060+
return str;
1061+
}
1062+
return sb.toString();
1063+
}
1064+
9261065
private JsonToken _cleanYamlFloat(String str)
9271066
{
9281067
// Here we do NOT yet know whether we might have underscores so check

0 commit comments

Comments
 (0)