11
11
* memory access due to flattening of name quad data.
12
12
* Performance improvement modest for simple JSON document data binding (maybe 3%),
13
13
* but should help more for larger symbol tables, or for binary formats like Smile.
14
+ *<p>
15
+ * Hash area is divided into 4 sections:
16
+ *<ol>
17
+ * <li>Primary area (1/2 of total size), direct match from hash (LSB)</li>
18
+ * <li>Secondary area (1/4 of total size), match from {@code hash (LSB) >> 1}</li>
19
+ * <li>Tertiary area (1/8 of total size), match from {@code hash (LSB) >> 2}</li>
20
+ * <li>Spill-over area (remaining 1/8) with linear scan, insertion order</li>
21
+ * <li></li>
22
+ * </ol>
23
+ * and within every area, entries are 4 {@code int}s, where 1 - 3 ints contain 1 - 12
24
+ * UTF-8 encoded bytes of name (null-padded), and last int is offset in
25
+ * {@code _names} that contains actual name Strings.
14
26
*
15
27
* @since 2.6
16
28
*/
@@ -176,12 +188,6 @@ public final class ByteQuadsCanonicalizer
176
188
*/
177
189
private int _longNameOffset ;
178
190
179
- /**
180
- * This flag is set if, after adding a new entry, it is deemed
181
- * that a rehash is warranted if any more entries are to be added.
182
- */
183
- private transient boolean _needRehash ;
184
-
185
191
/*
186
192
/**********************************************************
187
193
/* Sharing, versioning
@@ -266,7 +272,6 @@ private ByteQuadsCanonicalizer(ByteQuadsCanonicalizer parent, boolean intern,
266
272
_longNameOffset = state .longNameOffset ;
267
273
268
274
// and then set other state to reflect sharing status
269
- _needRehash = false ;
270
275
_hashShared = true ;
271
276
}
272
277
@@ -317,7 +322,7 @@ public void release()
317
322
{
318
323
// we will try to merge if child table has new entries
319
324
// 28-Jul-2019, tatu: From [core#548]: do not share if immediate rehash needed
320
- if ((_parent != null ) && maybeDirty () && ! _needRehash ) {
325
+ if ((_parent != null ) && maybeDirty ()) {
321
326
_parent .mergeChild (new TableInfo (this ));
322
327
// Let's also mark this instance as dirty, so that just in
323
328
// case release was too early, there's no corruption of possibly shared data.
@@ -768,7 +773,6 @@ public String addName(String name, int q1) {
768
773
_hashArea [offset +3 ] = 1 ;
769
774
_names [offset >> 2 ] = name ;
770
775
++_count ;
771
- _verifyNeedForRehash ();
772
776
return name ;
773
777
}
774
778
@@ -784,7 +788,6 @@ public String addName(String name, int q1, int q2) {
784
788
_hashArea [offset +3 ] = 2 ;
785
789
_names [offset >> 2 ] = name ;
786
790
++_count ;
787
- _verifyNeedForRehash ();
788
791
return name ;
789
792
}
790
793
@@ -800,7 +803,6 @@ public String addName(String name, int q1, int q2, int q3) {
800
803
_hashArea [offset +3 ] = 3 ;
801
804
_names [offset >> 2 ] = name ;
802
805
++_count ;
803
- _verifyNeedForRehash ();
804
806
return name ;
805
807
}
806
808
@@ -851,33 +853,15 @@ public String addName(String name, int[] q, int qlen)
851
853
852
854
// and finally; see if we really should rehash.
853
855
++_count ;
854
- _verifyNeedForRehash ();
855
856
return name ;
856
857
}
857
858
858
- private void _verifyNeedForRehash () {
859
- // Yes if above 80%, or above 50% AND have ~1% spill-overs
860
- if (_count > (_hashSize >> 1 )) { // over 50%
861
- int spillCount = (_spilloverEnd - _spilloverStart ()) >> 2 ;
862
- if ((spillCount > (1 + _count >> 7 ))
863
- || (_count > (_hashSize * 0.80 ))) {
864
- _needRehash = true ;
865
- }
866
- }
867
- }
868
-
869
859
private void _verifySharing ()
870
860
{
871
861
if (_hashShared ) {
872
862
_hashArea = Arrays .copyOf (_hashArea , _hashArea .length );
873
863
_names = Arrays .copyOf (_names , _names .length );
874
864
_hashShared = false ;
875
- // 09-Sep-2015, tatu: As per [jackson-core#216], also need to ensure
876
- // we rehash as needed, as need-rehash flag is not copied from parent
877
- _verifyNeedForRehash ();
878
- }
879
- if (_needRehash ) {
880
- rehash ();
881
865
}
882
866
}
883
867
@@ -886,14 +870,20 @@ private void _verifySharing()
886
870
*/
887
871
private int _findOffsetForAdd (int hash )
888
872
{
889
- // first, check the primary:
873
+ // first, check the primary: if slot found, no need for resize
890
874
int offset = _calcOffset (hash );
891
875
final int [] hashArea = _hashArea ;
892
876
if (hashArea [offset +3 ] == 0 ) {
893
877
//System.err.printf(" PRImary slot #%d, hash %X\n", (offset>>2), hash & 0x7F);
894
878
return offset ;
895
879
}
896
- // then secondary
880
+
881
+ // Otherwise let's see if we are due resize():
882
+ if (_checkNeedForRehash ()) {
883
+ return _resizeAndFindOffsetForAdd (hash );
884
+ }
885
+
886
+ // If not, proceed with secondary slot
897
887
int offset2 = _secondaryStart + ((offset >> 3 ) << 2 );
898
888
if (hashArea [offset2 +3 ] == 0 ) {
899
889
//System.err.printf(" SECondary slot #%d (start x%X), hash %X\n",(offset >> 3), _secondaryStart, (hash & 0x7F));
@@ -927,13 +917,52 @@ private int _findOffsetForAdd(int hash)
927
917
if (_failOnDoS ) {
928
918
_reportTooManyCollisions ();
929
919
}
930
- // and if we didn't fail, we'll simply force rehash for next add
931
- // (which, in turn, may double up or nuke contents, depending on size etc)
932
- _needRehash = true ;
920
+ return _resizeAndFindOffsetForAdd (hash );
921
+ }
922
+ return offset ;
923
+ }
924
+
925
+ // @since 2.10
926
+ private int _resizeAndFindOffsetForAdd (int hash )
927
+ {
928
+ // First things first: we need to resize+rehash (or, if too big, nuke contents)
929
+ rehash ();
930
+
931
+ // Copy of main _findOffsetForAdd except for checks to resize: can not be needed
932
+ int offset = _calcOffset (hash );
933
+ final int [] hashArea = _hashArea ;
934
+ if (hashArea [offset +3 ] == 0 ) {
935
+ return offset ;
936
+ }
937
+ int offset2 = _secondaryStart + ((offset >> 3 ) << 2 );
938
+ if (hashArea [offset2 +3 ] == 0 ) {
939
+ return offset2 ;
940
+ }
941
+ offset2 = _tertiaryStart + ((offset >> (_tertiaryShift + 2 )) << _tertiaryShift );
942
+ final int bucketSize = (1 << _tertiaryShift );
943
+ for (int end = offset2 + bucketSize ; offset2 < end ; offset2 += 4 ) {
944
+ if (hashArea [offset2 +3 ] == 0 ) {
945
+ return offset2 ;
946
+ }
933
947
}
948
+ offset = _spilloverEnd ;
949
+ _spilloverEnd += 4 ;
934
950
return offset ;
935
951
}
936
952
953
+ // Helper method for checking if we should simply rehash() before add
954
+ private boolean _checkNeedForRehash () {
955
+ // Yes if above 80%, or above 50% AND have ~1% spill-overs
956
+ if (_count > (_hashSize >> 1 )) { // over 50%
957
+ int spillCount = (_spilloverEnd - _spilloverStart ()) >> 2 ;
958
+ if ((spillCount > (1 + _count >> 7 ))
959
+ || (_count > (_hashSize * 0.80 ))) {
960
+ return true ;
961
+ }
962
+ }
963
+ return false ;
964
+ }
965
+
937
966
private int _appendLongName (int [] quads , int qlen )
938
967
{
939
968
int start = _longNameOffset ;
@@ -1061,7 +1090,6 @@ public int calcHash(int[] q, int qlen)
1061
1090
1062
1091
private void rehash ()
1063
1092
{
1064
- _needRehash = false ;
1065
1093
// Note: since we'll make copies, no need to unshare, can just mark as such:
1066
1094
_hashShared = false ;
1067
1095
0 commit comments