1
|
#include "Core.h"
|
2
|
|
3
|
namespace Upp {
|
4
|
|
5
|
#define LLOG(x)
|
6
|
#define LTIMING(x)
|
7
|
|
8
|
void CParser::ThrowError(const char *s) {
|
9
|
LLOG("CParser::Error: " << s);
|
10
|
LLOG(~String(term, min((int)strlen((const char *)term), 512)));
|
11
|
Pos pos = GetPos();
|
12
|
Error err(fn + Format("(%d,%d): ", line, pos.GetColumn()) + s);
|
13
|
|
14
|
throw err;
|
15
|
}
|
16
|
|
17
|
CParser& CParser::SkipComments(bool b)
|
18
|
{
|
19
|
skipcomments = b;
|
20
|
term = wspc;
|
21
|
Spaces0();
|
22
|
return *this;
|
23
|
}
|
24
|
|
25
|
CParser& CParser::NestComments(bool b)
|
26
|
{
|
27
|
nestcomments = b;
|
28
|
term = wspc;
|
29
|
Spaces0();
|
30
|
return *this;
|
31
|
}
|
32
|
|
33
|
bool CParser::Spaces0() {
|
34
|
LTIMING("Spaces");
|
35
|
if((byte)*term > ' ' &&
|
36
|
!(term[0] == '/' && term[1] == '/') &&
|
37
|
!(term[0] == '/' && term[1] == '*'))
|
38
|
return false;
|
39
|
for(;;) {
|
40
|
if(*term == LINEINFO_ESC) {
|
41
|
term++;
|
42
|
fn.Clear();
|
43
|
while(*term) {
|
44
|
if(*term == LINEINFO_ESC) {
|
45
|
++term;
|
46
|
break;
|
47
|
}
|
48
|
if(*term == '\3') {
|
49
|
line = atoi(++term);
|
50
|
while(*term) {
|
51
|
if(*term == LINEINFO_ESC) {
|
52
|
++term;
|
53
|
break;
|
54
|
}
|
55
|
term++;
|
56
|
}
|
57
|
break;
|
58
|
}
|
59
|
fn.Cat(*term++);
|
60
|
}
|
61
|
continue;
|
62
|
}
|
63
|
else
|
64
|
if(term[0] == '/' && term[1] == '/' && skipcomments) {
|
65
|
term += 2;
|
66
|
while(*term && *term != '\n')
|
67
|
term++;
|
68
|
}
|
69
|
else
|
70
|
if(term[0] == '/' && term[1] == '*' && skipcomments) {
|
71
|
if(nestcomments == false) {
|
72
|
term += 2;
|
73
|
while(*term) {
|
74
|
if(term[0] == '*' && term[1] == '/') {
|
75
|
term += 2;
|
76
|
break;
|
77
|
}
|
78
|
if(*term++ == '\n') line++;
|
79
|
}
|
80
|
}
|
81
|
else {
|
82
|
int count = 1;
|
83
|
term += 2;
|
84
|
while(*term) {
|
85
|
if(term[0] == '*' && term[1] == '/') {
|
86
|
term += 2;
|
87
|
count--;
|
88
|
if (count == 0)
|
89
|
break;
|
90
|
}
|
91
|
else if(term[0] == '/' && term[1] == '*')
|
92
|
count++;
|
93
|
|
94
|
if(*term++ == '\n') line++;
|
95
|
}
|
96
|
}
|
97
|
}
|
98
|
if(!*term) break;
|
99
|
if((byte)*term > ' ') break;
|
100
|
if(*term == '\n') {
|
101
|
line++;
|
102
|
lineptr = term + 1;
|
103
|
}
|
104
|
term++;
|
105
|
}
|
106
|
return true;
|
107
|
}
|
108
|
|
109
|
String CParser::LineInfoComment(const String& file, int line, int column)
|
110
|
{
|
111
|
return String().Cat() << (char)LINEINFO_ESC << file << '\3'
|
112
|
<< line << '\3' << column << (char)LINEINFO_ESC;
|
113
|
}
|
114
|
|
115
|
String CParser::GetLineInfoComment(int tabsize) const
|
116
|
{
|
117
|
return LineInfoComment(GetFileName(), GetLine(), GetColumn(tabsize));
|
118
|
}
|
119
|
|
120
|
const char *CParser::IsId0(const char *s) const {
|
121
|
const char *t = term + 1;
|
122
|
s++;
|
123
|
while(*s) {
|
124
|
if(*t != *s)
|
125
|
return NULL;
|
126
|
t++;
|
127
|
s++;
|
128
|
}
|
129
|
return IsAlNum(*t) || *t == '_' ? NULL : t;
|
130
|
}
|
131
|
|
132
|
bool CParser::Id0(const char *s) {
|
133
|
LTIMING("Id");
|
134
|
const char *t = IsId0(s);
|
135
|
if(!t)
|
136
|
return false;
|
137
|
term = t;
|
138
|
DoSpaces();
|
139
|
return true;
|
140
|
}
|
141
|
|
142
|
void CParser::PassId(const char *s) throw(Error) {
|
143
|
LTIMING("PassId");
|
144
|
if(!Id(s))
|
145
|
ThrowError(String("missing '") + s + "\'");
|
146
|
}
|
147
|
|
148
|
void CParser::PassChar(char c) throw(Error) {
|
149
|
LTIMING("PassChar");
|
150
|
if(!Char(c))
|
151
|
ThrowError(String("missing '") + c + "\'");
|
152
|
}
|
153
|
|
154
|
void CParser::PassChar2(char c1, char c2) throw(Error) {
|
155
|
LTIMING("PassChar2");
|
156
|
if(!Char2(c1, c2))
|
157
|
ThrowError(String("missing '") + c1 + c2 + "\'");
|
158
|
}
|
159
|
|
160
|
void CParser::PassChar3(char c1, char c2, char c3) throw(Error) {
|
161
|
LTIMING("PassChar3");
|
162
|
if(!Char3(c1, c2, c3))
|
163
|
ThrowError(String("missing '") + c1 + c2 + c3 + "\'");
|
164
|
}
|
165
|
|
166
|
String CParser::ReadId() throw(Error) {
|
167
|
LTIMING("ReadId");
|
168
|
if(!IsId())
|
169
|
ThrowError("missing id");
|
170
|
String result;
|
171
|
const char *b = term;
|
172
|
const char *p = b;
|
173
|
while(iscid(*p))
|
174
|
p++;
|
175
|
term = p;
|
176
|
DoSpaces();
|
177
|
return String(b, (int)(uintptr_t)(p - b));
|
178
|
}
|
179
|
|
180
|
String CParser::ReadIdt() throw(Error) {
|
181
|
if(!IsId())
|
182
|
ThrowError("missing id");
|
183
|
StringBuffer result;
|
184
|
int lvl = 0;
|
185
|
while(IsAlNum(*term) || *term == '_' || *term == '<' || *term == '>' ||
|
186
|
*term == ':' || (*term == ',' || *term == ' ' ) && lvl > 0 ) {
|
187
|
if(*term == '<') lvl++;
|
188
|
if(*term == '>') lvl--;
|
189
|
result.Cat(*term++);
|
190
|
}
|
191
|
DoSpaces();
|
192
|
return result;
|
193
|
}
|
194
|
|
195
|
bool CParser::IsInt() const {
|
196
|
LTIMING("IsInt");
|
197
|
const char *t = term;
|
198
|
if(*t == '-' || *t == '+') {
|
199
|
t++;
|
200
|
while(*t <= ' ')
|
201
|
t++;
|
202
|
}
|
203
|
return IsDigit(*t);
|
204
|
}
|
205
|
|
206
|
int CParser::Sgn()
|
207
|
{
|
208
|
int sign = 1;
|
209
|
if(*term == '-') {
|
210
|
sign = -1;
|
211
|
term++;
|
212
|
}
|
213
|
else
|
214
|
if(*term == '+')
|
215
|
term++;
|
216
|
Spaces();
|
217
|
return sign;
|
218
|
}
|
219
|
|
220
|
int CParser::ReadInt() throw(Error) {
|
221
|
LTIMING("ReadInt");
|
222
|
int sign = Sgn();
|
223
|
uint32 n = ReadNumber(10);
|
224
|
if(sign > 0 ? n > INT_MAX : n > (uint32)INT_MAX + 1)
|
225
|
ThrowError("number is too big");
|
226
|
return sign * (int)n;
|
227
|
}
|
228
|
|
229
|
int CParser::ReadInt(int min, int max) throw(Error)
|
230
|
{
|
231
|
int n = ReadInt();
|
232
|
if(n < min || n > max)
|
233
|
ThrowError("number is out of range");
|
234
|
return n;
|
235
|
}
|
236
|
|
237
|
int64 CParser::ReadInt64() throw(Error)
|
238
|
{
|
239
|
LTIMING("ReadInt64");
|
240
|
int sign = Sgn();
|
241
|
uint64 n = ReadNumber64(10);
|
242
|
if(sign > 0 ? n > INT64_MAX : n > (uint64)INT64_MAX + 1)
|
243
|
ThrowError("number is too big");
|
244
|
return sign * n;
|
245
|
}
|
246
|
|
247
|
int64 CParser::ReadInt64(int64 min, int64 max) throw(Error)
|
248
|
{
|
249
|
int64 n = ReadInt64();
|
250
|
if(n < min || n > max)
|
251
|
ThrowError("number is out of range");
|
252
|
return n;
|
253
|
}
|
254
|
|
255
|
bool CParser::IsNumber(int base) const
|
256
|
{
|
257
|
if(IsDigit(*term))
|
258
|
return true;
|
259
|
int q = ToUpper(*term) - 'A';
|
260
|
return q >= 0 && q < base - 10;
|
261
|
}
|
262
|
|
263
|
uint32 CParser::ReadNumber(int base) throw(Error)
|
264
|
{
|
265
|
LTIMING("ReadNumber");
|
266
|
uint32 n = 0;
|
267
|
int q = ctoi(*term);
|
268
|
if(q < 0 || q >= base)
|
269
|
ThrowError("missing number");
|
270
|
for(;;) {
|
271
|
int c = ctoi(*term);
|
272
|
if(c < 0 || c >= base)
|
273
|
break;
|
274
|
uint32 n1 = n;
|
275
|
n = base * n + c;
|
276
|
if(n1 > n)
|
277
|
ThrowError("number is too big");
|
278
|
term++;
|
279
|
}
|
280
|
DoSpaces();
|
281
|
return n;
|
282
|
}
|
283
|
|
284
|
uint64 CParser::ReadNumber64(int base) throw(Error)
|
285
|
{
|
286
|
LTIMING("ReadNumber");
|
287
|
uint64 n = 0;
|
288
|
int q = ctoi(*term);
|
289
|
if(q < 0 || q >= base)
|
290
|
ThrowError("missing number");
|
291
|
for(;;) {
|
292
|
int c = ctoi(*term);
|
293
|
if(c < 0 || c >= base)
|
294
|
break;
|
295
|
uint64 n1 = n;
|
296
|
n = base * n + c;
|
297
|
if(n1 > n)
|
298
|
ThrowError("number is too big");
|
299
|
term++;
|
300
|
}
|
301
|
DoSpaces();
|
302
|
return n;
|
303
|
}
|
304
|
|
305
|
double CParser::ReadDouble() throw(Error)
|
306
|
{
|
307
|
LTIMING("ReadDouble");
|
308
|
int sign = Sgn();
|
309
|
if(!IsDigit(*term) && *term != '.')
|
310
|
ThrowError("missing number");
|
311
|
double n = 0;
|
312
|
while(IsDigit(*term))
|
313
|
n = 10 * n + *term++ - '0';
|
314
|
if(Char('.')) {
|
315
|
double q = 1;
|
316
|
while(IsDigit(*term)) {
|
317
|
q = q / 10;
|
318
|
n += q * (*term++ - '0');
|
319
|
}
|
320
|
}
|
321
|
if(Char('e') || Char('E'))
|
322
|
n *= pow(10.0, ReadInt());
|
323
|
DoSpaces();
|
324
|
n = sign * n;
|
325
|
if(!IsFin(n))
|
326
|
ThrowError("number is too big");
|
327
|
return n;
|
328
|
}
|
329
|
|
330
|
String CParser::ReadOneString(int delim, bool chkend) throw(Error) {
|
331
|
if(!IsChar(delim))
|
332
|
ThrowError("missing string");
|
333
|
term++;
|
334
|
StringBuffer result;
|
335
|
for(;;) {
|
336
|
if(*term == delim) {
|
337
|
term++;
|
338
|
DoSpaces();
|
339
|
return result;
|
340
|
}
|
341
|
else
|
342
|
if(*term == '\\') {
|
343
|
switch(*++term) {
|
344
|
case 'a': result.Cat('\a'); term++; break;
|
345
|
case 'b': result.Cat('\b'); term++; break;
|
346
|
case 't': result.Cat('\t'); term++; break;
|
347
|
case 'v': result.Cat('\v'); term++; break;
|
348
|
case 'n': result.Cat('\n'); term++; break;
|
349
|
case 'r': result.Cat('\r'); term++; break;
|
350
|
case 'f': result.Cat('\f'); term++; break;
|
351
|
case 'x': {
|
352
|
int hex = 0;
|
353
|
if(IsXDigit(*++term)) {
|
354
|
hex = ctoi(*term);
|
355
|
if(IsXDigit(*++term)) {
|
356
|
hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
|
357
|
term++;
|
358
|
}
|
359
|
}
|
360
|
result.Cat(hex);
|
361
|
break;
|
362
|
}
|
363
|
case 'u':
|
364
|
if(uescape) {
|
365
|
int hex = 0;
|
366
|
if(IsXDigit(*++term)) {
|
367
|
hex = ctoi(*term);
|
368
|
if(IsXDigit(*++term)) {
|
369
|
hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
|
370
|
if(IsXDigit(*++term)) {
|
371
|
hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
|
372
|
if(IsXDigit(*++term)) {
|
373
|
hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
|
374
|
term++;
|
375
|
}
|
376
|
}
|
377
|
}
|
378
|
}
|
379
|
result.Cat(WString(hex, 1).ToString());
|
380
|
}
|
381
|
else
|
382
|
result.Cat(*term++);
|
383
|
break;
|
384
|
default:
|
385
|
if(*term >= '0' && *term <= '7') {
|
386
|
int oct = *term++ - '0';
|
387
|
if(*term >= '0' && *term <= '7')
|
388
|
oct = 8 * oct + *term++ - '0';
|
389
|
if(*term >= '0' && *term <= '7')
|
390
|
oct = 8 * oct + *term++ - '0';
|
391
|
result.Cat(oct);
|
392
|
}
|
393
|
else
|
394
|
result.Cat(*term++);
|
395
|
break;
|
396
|
}
|
397
|
}
|
398
|
else {
|
399
|
if((byte)*term < ' ' && *term != '\t') {
|
400
|
if(chkend) {
|
401
|
ThrowError("Unterminated string");
|
402
|
return result;
|
403
|
}
|
404
|
if(*term == '\0')
|
405
|
return result;
|
406
|
}
|
407
|
result.Cat(*term++);
|
408
|
}
|
409
|
}
|
410
|
DoSpaces();
|
411
|
return result;
|
412
|
}
|
413
|
|
414
|
String CParser::ReadOneString(bool chkend) throw(Error)
|
415
|
{
|
416
|
return ReadOneString('\"', chkend);
|
417
|
}
|
418
|
|
419
|
String CParser::ReadString(int delim, bool chkend) throw(Error) {
|
420
|
LTIMING("ReadString");
|
421
|
String result;
|
422
|
do
|
423
|
result.Cat(ReadOneString(delim, chkend));
|
424
|
while(IsChar(delim));
|
425
|
return result;
|
426
|
}
|
427
|
|
428
|
String CParser::ReadString(bool chkend) throw(Error)
|
429
|
{
|
430
|
return ReadString('\"', chkend);
|
431
|
}
|
432
|
|
433
|
char CParser::GetChar()
|
434
|
{
|
435
|
char c = *term++;
|
436
|
if(c == '\n')
|
437
|
line++;
|
438
|
return c;
|
439
|
}
|
440
|
|
441
|
void CParser::SkipTerm()
|
442
|
{
|
443
|
LTIMING("SkipTerm");
|
444
|
if(IsId())
|
445
|
while(iscid(*term))
|
446
|
term++;
|
447
|
else
|
448
|
if(IsNumber())
|
449
|
while(IsDigit(*term))
|
450
|
term++;
|
451
|
else
|
452
|
if(IsString())
|
453
|
ReadString();
|
454
|
else
|
455
|
if(IsChar('\''))
|
456
|
ReadString('\'', false);
|
457
|
else
|
458
|
if(*term) {
|
459
|
if(*term == '\n')
|
460
|
line++;
|
461
|
term++;
|
462
|
}
|
463
|
DoSpaces();
|
464
|
}
|
465
|
|
466
|
CParser::Pos CParser::GetPos() const
|
467
|
{
|
468
|
Pos p;
|
469
|
p.line = line;
|
470
|
p.fn = fn;
|
471
|
p.ptr = term;
|
472
|
p.wspc = wspc;
|
473
|
p.lineptr = lineptr;
|
474
|
return p;
|
475
|
}
|
476
|
|
477
|
int CParser::Pos::GetColumn(int tabsize) const
|
478
|
{
|
479
|
int pos = 1;
|
480
|
for(const char *s = lineptr; s < ptr; s++) {
|
481
|
if(*s == CParser::LINEINFO_ESC) {
|
482
|
s++;
|
483
|
while(s < ptr && *s != CParser::LINEINFO_ESC)
|
484
|
if(*s++ == '\3')
|
485
|
pos = atoi(s);
|
486
|
}
|
487
|
else
|
488
|
if(*s == '\t')
|
489
|
pos = (pos + tabsize - 1) / tabsize * tabsize + 1;
|
490
|
else
|
491
|
pos++;
|
492
|
}
|
493
|
return pos;
|
494
|
}
|
495
|
|
496
|
int CParser::GetColumn(int tabsize) const
|
497
|
{
|
498
|
return GetPos().GetColumn(tabsize);
|
499
|
}
|
500
|
|
501
|
void CParser::SetPos(const CParser::Pos& p)
|
502
|
{
|
503
|
LLOG("SetPos " << p.fn << ":" << p.line);
|
504
|
line = p.line;
|
505
|
fn = p.fn;
|
506
|
term = p.ptr;
|
507
|
wspc = p.wspc;
|
508
|
lineptr = p.lineptr;
|
509
|
if(skipspaces)
|
510
|
DoSpaces();
|
511
|
}
|
512
|
|
513
|
CParser::CParser(const char *ptr)
|
514
|
: term(ptr), wspc(ptr), lineptr(ptr)
|
515
|
{
|
516
|
line = 1;
|
517
|
skipspaces = skipcomments = true;
|
518
|
nestcomments = false;
|
519
|
uescape = false;
|
520
|
Spaces();
|
521
|
}
|
522
|
|
523
|
CParser::CParser(const char *ptr, const char *fn, int line)
|
524
|
: term(ptr), wspc(ptr), lineptr(ptr), line(line), fn(fn)
|
525
|
{
|
526
|
skipspaces = skipcomments = true;
|
527
|
nestcomments = false;
|
528
|
uescape = false;
|
529
|
Spaces();
|
530
|
}
|
531
|
|
532
|
CParser::CParser()
|
533
|
{
|
534
|
term = lineptr = wspc = NULL;
|
535
|
line = 0;
|
536
|
skipspaces = skipcomments = true;
|
537
|
nestcomments = false;
|
538
|
uescape = false;
|
539
|
}
|
540
|
|
541
|
void CParser::Set(const char *_ptr, const char *_fn, int _line)
|
542
|
{
|
543
|
term = lineptr = wspc = _ptr;
|
544
|
fn = _fn;
|
545
|
line = _line;
|
546
|
if(skipspaces)
|
547
|
Spaces();
|
548
|
LLOG("Set " << fn << ":" << line);
|
549
|
}
|
550
|
|
551
|
void CParser::Set(const char *_ptr)
|
552
|
{
|
553
|
Set(_ptr, "", 1);
|
554
|
}
|
555
|
|
556
|
inline void NextCStringLine(String& t, const char *linepfx, int& pl)
|
557
|
{
|
558
|
t << "\"\r\n" << (linepfx ? linepfx : "") << "\"";
|
559
|
pl = t.GetLength();
|
560
|
}
|
561
|
|
562
|
inline int HexDigit(int c)
|
563
|
{
|
564
|
return "0123456789ABCDEF"[c & 15];
|
565
|
}
|
566
|
|
567
|
String AsCString(const char *s, const char *lim, int linemax, const char *linepfx, dword flags)
|
568
|
{
|
569
|
String t;
|
570
|
t.Cat('\"');
|
571
|
int pl = 0;
|
572
|
bool wasspace = false;
|
573
|
while(s < lim) {
|
574
|
if(t.GetLength() - pl > linemax && (!(flags & ASCSTRING_SMART) || wasspace))
|
575
|
NextCStringLine(t, linepfx, pl);
|
576
|
wasspace = false;
|
577
|
switch(*s) {
|
578
|
case '\a': t.Cat("\\a"); break;
|
579
|
case '\b': t.Cat("\\b"); break;
|
580
|
case '\f': t.Cat("\\f"); break;
|
581
|
case '\t': t.Cat("\\t"); break;
|
582
|
case '\v': t.Cat("\\v"); break;
|
583
|
case '\r': t.Cat("\\r"); break;
|
584
|
case '\"': t.Cat("\\\""); break;
|
585
|
case '\\': t.Cat("\\\\"); break;
|
586
|
case '\n': t.Cat("\\n"); wasspace = true; break;
|
587
|
default:
|
588
|
if(byte(*s) < 32 || (byte)*s >= 0x7f && (flags & ASCSTRING_OCTALHI) || (byte)*s == 0xff || (byte)*s == 0x7f) {
|
589
|
if(flags & ASCSTRING_JSON) {
|
590
|
char h[6];
|
591
|
int q = (byte)*s;
|
592
|
h[0] = '\\';
|
593
|
h[1] = 'u';
|
594
|
h[2] = '0';
|
595
|
h[3] = '0';
|
596
|
h[4] = HexDigit(q >> 4);
|
597
|
h[5] = HexDigit(q);
|
598
|
t.Cat(h, 6);
|
599
|
}
|
600
|
else {
|
601
|
char h[4];
|
602
|
int q = (byte)*s;
|
603
|
h[0] = '\\';
|
604
|
h[1] = (3 & (q >> 6)) + '0';
|
605
|
h[2] = (7 & (q >> 3)) + '0';
|
606
|
h[3] = (7 & q) + '0';
|
607
|
t.Cat(h, 4);
|
608
|
}
|
609
|
}
|
610
|
else {
|
611
|
t.Cat(*s);
|
612
|
if(*s == ' ')
|
613
|
wasspace = true;
|
614
|
}
|
615
|
break;
|
616
|
}
|
617
|
s++;
|
618
|
}
|
619
|
t.Cat('\"');
|
620
|
return t;
|
621
|
}
|
622
|
|
623
|
String AsCString(const char *s, int linemax, const char *linepfx, dword flags)
|
624
|
{
|
625
|
return AsCString(s, s + strlen(s), linemax, linepfx, flags);
|
626
|
}
|
627
|
|
628
|
String AsCString(const String& s, int linemax, const char *linepfx, dword flags)
|
629
|
{
|
630
|
return AsCString(s, s.End(), linemax, linepfx, flags);
|
631
|
}
|
632
|
|
633
|
}
|