@@ -12,7 +12,7 @@ struct variant2tuple<std::variant<As...>> { using type = std::tuple<As...>; };
12
12
// Re: printing
13
13
// -----------------------------------------------------------------------------
14
14
15
- // name: general
15
+ // name: specific cases
16
16
#define JSON_NAME (type ) \
17
17
inline std::string name (const type &) { return #type; }
18
18
@@ -39,6 +39,17 @@ JSON_NAME(literal)
39
39
40
40
#undef JSON_NAME
41
41
42
+ // name: for atom
43
+ // Remark: If preprocessor directives are such that atom<T> == T, then name(),
44
+ // as defined below, without the class = require<> SFINAE, would be infinitely
45
+ // recursive. It presumably wouldn't be called, because, above, we define each
46
+ // case we intend to use. To be exceedingly proper, though, we have the SFINAE.
47
+ template <class T , class = require<!same<atom<T>,T>>>
48
+ std::string name (const atom<T> &)
49
+ {
50
+ return name (T ());
51
+ }
52
+
42
53
// name: for number
43
54
inline std::string name (const number &n)
44
55
{
@@ -48,7 +59,7 @@ inline std::string name(const number &n)
48
59
);
49
60
}
50
61
51
- // detail:: print, for array and object
62
+ // print, for array and object
52
63
template <class ACTION , class T >
53
64
void print (
54
65
const T &obj, std::ostream &os,
@@ -62,7 +73,7 @@ void print(
62
73
os << std::setw (0 ) << str + ch + (colors ? json::color::reset : " " );
63
74
}
64
75
65
- // detail:: print, for other types
76
+ // print, for other types
66
77
template <class ACTION , class T >
67
78
void print (
68
79
const T &obj, std::ostream &os, const std::string &content,
@@ -147,7 +158,7 @@ inline void expect(
147
158
// nocasecmp
148
159
// Case-insensitive std::string comparison.
149
160
// The old C language strcasecmp() is nonstandard. A modern, true caseless
150
- // std::string comparison would depend on, e.g., locale; but the following
161
+ // std::string comparison would depend on, e.g., locale, but the following
151
162
// should suffice for our purposes.
152
163
inline bool nocasecmp (const std::string &one, const std::string &two)
153
164
{
@@ -207,4 +218,102 @@ literal many(
207
218
return literal (flags & literal::self ? text+suffix : " " );
208
219
} // many
209
220
221
+
222
+ // -----------------------------------------------------------------------------
223
+ // Support for reading string escapes of the form \u####
224
+ // -----------------------------------------------------------------------------
225
+
226
+ // ------------------------
227
+ // Constants, functions
228
+ // ------------------------
229
+
230
+ // Ranges for surrogates.
231
+ // Remark: each of [himin..himax] and [lomin..lomax]
232
+ // has 1024 values inclusive. Also, lomin == himax+1.
233
+ inline constexpr int himin = 0xD800 ; // 0b 11011 000 00000000
234
+ inline constexpr int himax = 0xDBFF ; // 0b 11011 011 11111111
235
+ inline constexpr int lomin = 0xDC00 ; // 0b 11011 100 00000000
236
+ inline constexpr int lomax = 0xDFFF ; // 0b 11011 111 11111111
237
+ inline constexpr unsigned tweak =
238
+ unsigned (himin << 10 ) + unsigned (lomin) - 0x10000u ;
239
+
240
+ // High/low surrogate?
241
+ inline bool hi (const int p) { return himin <= p && p <= himax; }
242
+ inline bool lo (const int p) { return lomin <= p && p <= lomax; }
243
+
244
+ // ------------------------
245
+ // codepoint
246
+ // ------------------------
247
+
248
+ // \uabcd ==> (a << 12) + (b << 8) + (c << 4) + (d << 0)
249
+ inline int codepoint (
250
+ const std::string &context, std::istream &is,
251
+ std::string &token // representation of hex number, for diagnostic printing
252
+ ) {
253
+ int ret = 0 , ch;
254
+ token = " \\ u" ;
255
+ for (const unsigned shift : { 12u , 8u , 4u , 0u }) {
256
+ if ((ch = is.get ()) == EOF)
257
+ error (context +
258
+ " Expected 4-character hex code; reached EOF instead." , &is);
259
+ else if (' 0' <= ch && ch <= ' 9' ) ret += int ((unsigned (ch)-48u ) << shift);
260
+ else if (' A' <= ch && ch <= ' F' ) ret += int ((unsigned (ch)-55u ) << shift);
261
+ else if (' a' <= ch && ch <= ' f' ) ret += int ((unsigned (ch)-87u ) << shift);
262
+ else
263
+ error (context +
264
+ " Invalid hex digit found while reading \\ u####." , &is);
265
+ token += ch;
266
+ }
267
+ return ret;
268
+ }
269
+
270
+ // ------------------------
271
+ // unicode
272
+ // ------------------------
273
+
274
+ inline void unicode (
275
+ const std::string &context, std::istream &is,
276
+ std::string &str
277
+ ) {
278
+ std::string one, two;
279
+ int first = codepoint (context,is,one), second;
280
+
281
+ static const std::string
282
+ hi_before_lo = " A high surrogate must precede the low surrogate" ,
283
+ lo_follow_hi = " A low surrogate must follow the high surrogate" ;
284
+
285
+ if (lo (first))
286
+ error (context + hi_before_lo + " " + one + " .\n " +
287
+ " There was no such high surrogate." , &is);
288
+ if (hi (first)) {
289
+ if (is.get () != ' \\ ' || is.get () != ' u' )
290
+ error (context + lo_follow_hi + " " + one + " .\n " +
291
+ " There is no such low surrogate." , &is);
292
+ if (!lo (second = codepoint (context,is,two)))
293
+ error (context + lo_follow_hi + " " + one + " .\n " +
294
+ two + " is not a low surrogate." , &is);
295
+ first = (unsigned (first) << 10u ) + unsigned (second) - tweak;
296
+ }
297
+
298
+ if (first <= 127 ) {
299
+ // 0bbbbbbb (ASCII case)
300
+ str += int (first);
301
+ } else if (first <= 2047 ) {
302
+ // 110bbbbb 10bbbbbb
303
+ str += int (0b11000000u | ((unsigned (first) >> 6u ) ));
304
+ str += int (0b10000000u | ((unsigned (first) ) & 0b00111111u ));
305
+ } else if (first <= 65535 ) {
306
+ // 1110bbbb 10bbbbbb 10bbbbbb
307
+ str += int (0b11100000u | ((unsigned (first) >> 12u ) ));
308
+ str += int (0b10000000u | ((unsigned (first) >> 6u ) & 0b00111111u ));
309
+ str += int (0b10000000u | ((unsigned (first) ) & 0b00111111u ));
310
+ } else {
311
+ // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
312
+ str += int (0b11110000u | ((unsigned (first) >> 18u ) ));
313
+ str += int (0b10000000u | ((unsigned (first) >> 12u ) & 0b00111111u ));
314
+ str += int (0b10000000u | ((unsigned (first) >> 6u ) & 0b00111111u ));
315
+ str += int (0b10000000u | ((unsigned (first) ) & 0b00111111u ));
316
+ }
317
+ }
318
+
210
319
} // namespace detail
0 commit comments