@@ -29,21 +29,22 @@ static class JsonNode {
2929 private final String [] emptyResult ;
3030 private JsonNode ptr ;
3131 private byte [] buffer ;
32- private final int targetParseNum ;
32+ private final int expectParseCols ;
33+ // every time json string is processed, currentVersion will be incremented by 1
3334 private long currentVersion = 0 ;
3435 // pruning, when alreadyProcessedCols == NUM
35- private long alreadyProcessedCols = 0 ;
36+ private long parseCols = 0 ;
3637
3738 public SimdJsonParser2 (String ... args ) {
3839 parser = new SimdJsonParser ();
39- targetParseNum = args .length ;
40- row = new JsonNode [targetParseNum ];
41- result = new String [targetParseNum ];
42- emptyResult = new String [targetParseNum ];
40+ expectParseCols = args .length ;
41+ row = new JsonNode [expectParseCols ];
42+ result = new String [expectParseCols ];
43+ emptyResult = new String [expectParseCols ];
4344 for (int i = 0 ; i < args .length ; i ++) {
4445 emptyResult [i ] = null ;
4546 }
46- for (int i = 0 ; i < targetParseNum ; i ++) {
47+ for (int i = 0 ; i < expectParseCols ; i ++) {
4748 JsonNode cur = root ;
4849 String [] paths = args [i ].split ("\\ ." );
4950 for (int j = 0 ; j < paths .length ; j ++) {
@@ -65,7 +66,7 @@ public String[] parse(byte[] buffer, int len) {
6566 if (buffer == null || buffer .length == 0 ) {
6667 return emptyResult ;
6768 }
68- this .alreadyProcessedCols = 0 ;
69+ this .parseCols = 0 ;
6970 this .currentVersion ++;
7071 this .ptr = root ;
7172 this .buffer = buffer ;
@@ -84,22 +85,34 @@ public String[] parse(byte[] buffer, int len) {
8485 return getResult ();
8586 }
8687
87- private void parseElement (String fieldName ) {
88- if (fieldName == null ) {
89- int start = bitIndexes .advance ();
90- int realEnd = bitIndexes .advance ();
91- while (realEnd > start ) {
92- if (buffer [--realEnd ] == '"' ) {
93- break ;
94- }
95- }
96- fieldName = new String (buffer , start + 1 , realEnd - start - 1 );
88+ private String parseField () {
89+ int start = bitIndexes .advance ();
90+ int next = bitIndexes .peek ();
91+ String field = new String (buffer , start , next - start ).trim ();
92+ if ("null" .equalsIgnoreCase (field )) {
93+ return null ;
94+ }
95+ // field type is string or type is decimal
96+ if (field .startsWith ("\" " )) {
97+ field = field .substring (1 , field .length () - 1 );
98+ }
99+ return field ;
100+ }
101+
102+ private void parseElement (String expectFieldName ) {
103+ if (parseCols >= expectParseCols ) {
104+ return ;
105+ }
106+ // if expectFieldName is null, parent is map, else is list
107+ if (expectFieldName == null ) {
108+ expectFieldName = parseField ();
109+ bitIndexes .advance (); // skip :
97110 }
98- if (!ptr .getChildren ().containsKey (fieldName )) {
111+ if (!ptr .getChildren ().containsKey (expectFieldName )) {
99112 skip (false );
100113 return ;
101114 }
102- ptr = ptr .getChildren ().get (fieldName );
115+ ptr = ptr .getChildren ().get (expectFieldName );
103116 switch (buffer [bitIndexes .peek ()]) {
104117 case '{' -> {
105118 parseMap ();
@@ -110,7 +123,7 @@ private void parseElement(String fieldName) {
110123 default -> {
111124 ptr .setValue (skip (true ));
112125 ptr .setVersion (currentVersion );
113- ++alreadyProcessedCols ;
126+ ++parseCols ;
114127 }
115128 }
116129 ptr = ptr .getParent ();
@@ -120,12 +133,12 @@ private void parseMap() {
120133 if (ptr .getChildren () == null ) {
121134 ptr .setValue (skip (true ));
122135 ptr .setVersion (currentVersion );
123- ++alreadyProcessedCols ;
136+ ++parseCols ;
124137 return ;
125138 }
126139 ptr .setStart (bitIndexes .peek ());
127140 bitIndexes .advance ();
128- while (bitIndexes .hasNext () && buffer [bitIndexes .peek ()] != '}' && alreadyProcessedCols < targetParseNum ) {
141+ while (bitIndexes .hasNext () && buffer [bitIndexes .peek ()] != '}' && parseCols <= expectParseCols ) {
129142 parseElement (null );
130143 if (buffer [bitIndexes .peek ()] == ',' ) {
131144 bitIndexes .advance ();
@@ -135,7 +148,7 @@ private void parseMap() {
135148 if (ptr .isLeaf ()) {
136149 ptr .setValue (new String (buffer , ptr .getStart (), ptr .getEnd () - ptr .getStart () + 1 ));
137150 ptr .setVersion (currentVersion );
138- ++alreadyProcessedCols ;
151+ ++parseCols ;
139152 }
140153 bitIndexes .advance ();
141154 }
@@ -144,13 +157,13 @@ private void parseList() {
144157 if (ptr .getChildren () == null ) {
145158 ptr .setValue (skip (true ));
146159 ptr .setVersion (currentVersion );
147- ++alreadyProcessedCols ;
160+ ++parseCols ;
148161 return ;
149162 }
150163 ptr .setStart (bitIndexes .peek ());
151164 bitIndexes .advance ();
152165 int i = 0 ;
153- while (bitIndexes .hasNext () && buffer [bitIndexes .peek ()] != ']' && alreadyProcessedCols < targetParseNum ) {
166+ while (bitIndexes .hasNext () && buffer [bitIndexes .peek ()] != ']' && parseCols <= expectParseCols ) {
154167 parseElement ("" + i );
155168 if (buffer [bitIndexes .peek ()] == ',' ) {
156169 bitIndexes .advance ();
@@ -161,7 +174,7 @@ private void parseList() {
161174 if (ptr .isLeaf ()) {
162175 ptr .setValue (new String (buffer , ptr .getStart (), ptr .getEnd () - ptr .getStart () + 1 ));
163176 ptr .setVersion (currentVersion );
164- ++alreadyProcessedCols ;
177+ ++parseCols ;
165178 }
166179 bitIndexes .advance ();
167180 }
@@ -198,32 +211,14 @@ private String skip(boolean retainValue) {
198211 bitIndexes .advance ();
199212 return retainValue ? new String (buffer , start , end - start + 1 ) : null ;
200213 }
201- case '"' -> {
202- bitIndexes .advance ();
203- int realEnd = bitIndexes .peek ();
204- while (realEnd > start ) {
205- if (buffer [--realEnd ] == '"' ) {
206- break ;
207- }
208- }
209- return retainValue ? new String (buffer , start + 1 , realEnd - start - 1 ) : null ;
210- }
211214 default -> {
212- bitIndexes .advance ();
213- int realEnd = bitIndexes .peek ();
214- while (realEnd >= start ) {
215- --realEnd ;
216- if (buffer [realEnd ] >= '0' && buffer [realEnd ] <= '9' ) {
217- break ;
218- }
219- }
220- return retainValue ? new String (buffer , start , realEnd - start + 1 ) : null ;
215+ return parseField ();
221216 }
222217 }
223218 }
224219
225220 private String [] getResult () {
226- for (int i = 0 ; i < targetParseNum ; i ++) {
221+ for (int i = 0 ; i < expectParseCols ; i ++) {
227222 if (row [i ].getVersion () < currentVersion ) {
228223 result [i ] = null ;
229224 continue ;
0 commit comments