Skip to content

Commit a64561a

Browse files
committed
Fixed #548
1 parent 580c987 commit a64561a

File tree

3 files changed

+67
-35
lines changed

3 files changed

+67
-35
lines changed

release-notes/CREDITS-2.x

+2
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ Alex Rebert (alpire@github)
171171
* Reported #547: `CharsToNameCanonicalizer`: Internal error on `SymbolTable.rehash()` with high
172172
number of hash collisions
173173
(2.10.0)
174+
* Reported #548: ByteQuadsCanonicalizer: ArrayIndexOutOfBoundsException in addName
175+
(2.10.0)
174176
175177
Philippe Marschall (marschall@github)
176178
* Requested #480: `SerializableString` value can not directly render to Writer

release-notes/VERSION-2.x

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ JSON library.
3737
#547: `CharsToNameCanonicalizer`: Internal error on `SymbolTable.rehash()` with high
3838
number of hash collisions
3939
(reported by Alex R)
40+
#548: ByteQuadsCanonicalizer: ArrayIndexOutOfBoundsException in addName
41+
(reported by Alex R)
4042

4143
2.9.10 (not yet released)
4244

src/main/java/com/fasterxml/jackson/core/sym/ByteQuadsCanonicalizer.java

+63-35
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,18 @@
1111
* memory access due to flattening of name quad data.
1212
* Performance improvement modest for simple JSON document data binding (maybe 3%),
1313
* but should help more for larger symbol tables, or for binary formats like Smile.
14+
*<p>
15+
* Hash area is divided into 4 sections:
16+
*<ol>
17+
* <li>Primary area (1/2 of total size), direct match from hash (LSB)</li>
18+
* <li>Secondary area (1/4 of total size), match from {@code hash (LSB) >> 1}</li>
19+
* <li>Tertiary area (1/8 of total size), match from {@code hash (LSB) >> 2}</li>
20+
* <li>Spill-over area (remaining 1/8) with linear scan, insertion order</li>
21+
* <li></li>
22+
* </ol>
23+
* and within every area, entries are 4 {@code int}s, where 1 - 3 ints contain 1 - 12
24+
* UTF-8 encoded bytes of name (null-padded), and last int is offset in
25+
* {@code _names} that contains actual name Strings.
1426
*
1527
* @since 2.6
1628
*/
@@ -176,12 +188,6 @@ public final class ByteQuadsCanonicalizer
176188
*/
177189
private int _longNameOffset;
178190

179-
/**
180-
* This flag is set if, after adding a new entry, it is deemed
181-
* that a rehash is warranted if any more entries are to be added.
182-
*/
183-
private transient boolean _needRehash;
184-
185191
/*
186192
/**********************************************************
187193
/* Sharing, versioning
@@ -266,7 +272,6 @@ private ByteQuadsCanonicalizer(ByteQuadsCanonicalizer parent, boolean intern,
266272
_longNameOffset = state.longNameOffset;
267273

268274
// and then set other state to reflect sharing status
269-
_needRehash = false;
270275
_hashShared = true;
271276
}
272277

@@ -317,7 +322,7 @@ public void release()
317322
{
318323
// we will try to merge if child table has new entries
319324
// 28-Jul-2019, tatu: From [core#548]: do not share if immediate rehash needed
320-
if ((_parent != null) && maybeDirty() && !_needRehash) {
325+
if ((_parent != null) && maybeDirty()) {
321326
_parent.mergeChild(new TableInfo(this));
322327
// Let's also mark this instance as dirty, so that just in
323328
// case release was too early, there's no corruption of possibly shared data.
@@ -768,7 +773,6 @@ public String addName(String name, int q1) {
768773
_hashArea[offset+3] = 1;
769774
_names[offset >> 2] = name;
770775
++_count;
771-
_verifyNeedForRehash();
772776
return name;
773777
}
774778

@@ -784,7 +788,6 @@ public String addName(String name, int q1, int q2) {
784788
_hashArea[offset+3] = 2;
785789
_names[offset >> 2] = name;
786790
++_count;
787-
_verifyNeedForRehash();
788791
return name;
789792
}
790793

@@ -800,7 +803,6 @@ public String addName(String name, int q1, int q2, int q3) {
800803
_hashArea[offset+3] = 3;
801804
_names[offset >> 2] = name;
802805
++_count;
803-
_verifyNeedForRehash();
804806
return name;
805807
}
806808

@@ -851,33 +853,15 @@ public String addName(String name, int[] q, int qlen)
851853

852854
// and finally; see if we really should rehash.
853855
++_count;
854-
_verifyNeedForRehash();
855856
return name;
856857
}
857858

858-
private void _verifyNeedForRehash() {
859-
// Yes if above 80%, or above 50% AND have ~1% spill-overs
860-
if (_count > (_hashSize >> 1)) { // over 50%
861-
int spillCount = (_spilloverEnd - _spilloverStart()) >> 2;
862-
if ((spillCount > (1 + _count >> 7))
863-
|| (_count > (_hashSize * 0.80))) {
864-
_needRehash = true;
865-
}
866-
}
867-
}
868-
869859
private void _verifySharing()
870860
{
871861
if (_hashShared) {
872862
_hashArea = Arrays.copyOf(_hashArea, _hashArea.length);
873863
_names = Arrays.copyOf(_names, _names.length);
874864
_hashShared = false;
875-
// 09-Sep-2015, tatu: As per [jackson-core#216], also need to ensure
876-
// we rehash as needed, as need-rehash flag is not copied from parent
877-
_verifyNeedForRehash();
878-
}
879-
if (_needRehash) {
880-
rehash();
881865
}
882866
}
883867

@@ -886,14 +870,20 @@ private void _verifySharing()
886870
*/
887871
private int _findOffsetForAdd(int hash)
888872
{
889-
// first, check the primary:
873+
// first, check the primary: if slot found, no need for resize
890874
int offset = _calcOffset(hash);
891875
final int[] hashArea = _hashArea;
892876
if (hashArea[offset+3] == 0) {
893877
//System.err.printf(" PRImary slot #%d, hash %X\n", (offset>>2), hash & 0x7F);
894878
return offset;
895879
}
896-
// then secondary
880+
881+
// Otherwise let's see if we are due resize():
882+
if (_checkNeedForRehash()) {
883+
return _resizeAndFindOffsetForAdd(hash);
884+
}
885+
886+
// If not, proceed with secondary slot
897887
int offset2 = _secondaryStart + ((offset >> 3) << 2);
898888
if (hashArea[offset2+3] == 0) {
899889
//System.err.printf(" SECondary slot #%d (start x%X), hash %X\n",(offset >> 3), _secondaryStart, (hash & 0x7F));
@@ -927,13 +917,52 @@ private int _findOffsetForAdd(int hash)
927917
if (_failOnDoS) {
928918
_reportTooManyCollisions();
929919
}
930-
// and if we didn't fail, we'll simply force rehash for next add
931-
// (which, in turn, may double up or nuke contents, depending on size etc)
932-
_needRehash = true;
920+
return _resizeAndFindOffsetForAdd(hash);
921+
}
922+
return offset;
923+
}
924+
925+
// @since 2.10
926+
private int _resizeAndFindOffsetForAdd(int hash)
927+
{
928+
// First things first: we need to resize+rehash (or, if too big, nuke contents)
929+
rehash();
930+
931+
// Copy of main _findOffsetForAdd except for checks to resize: can not be needed
932+
int offset = _calcOffset(hash);
933+
final int[] hashArea = _hashArea;
934+
if (hashArea[offset+3] == 0) {
935+
return offset;
936+
}
937+
int offset2 = _secondaryStart + ((offset >> 3) << 2);
938+
if (hashArea[offset2+3] == 0) {
939+
return offset2;
940+
}
941+
offset2 = _tertiaryStart + ((offset >> (_tertiaryShift + 2)) << _tertiaryShift);
942+
final int bucketSize = (1 << _tertiaryShift);
943+
for (int end = offset2 + bucketSize; offset2 < end; offset2 += 4) {
944+
if (hashArea[offset2+3] == 0) {
945+
return offset2;
946+
}
933947
}
948+
offset = _spilloverEnd;
949+
_spilloverEnd += 4;
934950
return offset;
935951
}
936952

953+
// Helper method for checking if we should simply rehash() before add
954+
private boolean _checkNeedForRehash() {
955+
// Yes if above 80%, or above 50% AND have ~1% spill-overs
956+
if (_count > (_hashSize >> 1)) { // over 50%
957+
int spillCount = (_spilloverEnd - _spilloverStart()) >> 2;
958+
if ((spillCount > (1 + _count >> 7))
959+
|| (_count > (_hashSize * 0.80))) {
960+
return true;
961+
}
962+
}
963+
return false;
964+
}
965+
937966
private int _appendLongName(int[] quads, int qlen)
938967
{
939968
int start = _longNameOffset;
@@ -1061,7 +1090,6 @@ public int calcHash(int[] q, int qlen)
10611090

10621091
private void rehash()
10631092
{
1064-
_needRehash = false;
10651093
// Note: since we'll make copies, no need to unshare, can just mark as such:
10661094
_hashShared = false;
10671095

0 commit comments

Comments
 (0)