parser.cpp

cbpporter cbpporter, 01/03/2017 12:33 PM

Download (12.2 KB)

 
1
#include "Core.h"
2

    
3
namespace Upp {
4

    
5
#define LLOG(x)    // DLOG(x)
6
#define LTIMING(x) // RTIMING(x)
7

    
8
void CParser::ThrowError(const char *s) {
9
        LLOG("CParser::Error: " << s);
10
        LLOG(~String(term, min((int)strlen((const char *)term), 512)));
11
        Pos pos = GetPos();
12
        Error err(fn + Format("(%d,%d): ", line, pos.GetColumn()) + s);
13
//        err.term = (const char *)term;
14
        throw err;
15
}
16

    
17
CParser& CParser::SkipComments(bool b)
18
{
19
        skipcomments = b;
20
        term = wspc;
21
        Spaces0();
22
        return *this;
23
}
24

    
25
CParser& CParser::NestComments(bool b)
26
{
27
        nestcomments = b;
28
        term = wspc;
29
        Spaces0();
30
        return *this;
31
}
32

    
33
bool CParser::Spaces0() {
34
        LTIMING("Spaces");
35
        if((byte)*term > ' ' &&
36
           !(term[0] == '/' && term[1] == '/') &&
37
           !(term[0] == '/' && term[1] == '*'))
38
                return false;
39
        for(;;) {
40
                if(*term == LINEINFO_ESC) {
41
                        term++;
42
                        fn.Clear();
43
                        while(*term) {
44
                                if(*term == LINEINFO_ESC) {
45
                                        ++term;
46
                                        break;
47
                                }
48
                                if(*term == '\3') {
49
                                        line = atoi(++term);
50
                                        while(*term) {
51
                                                if(*term == LINEINFO_ESC) {
52
                                                        ++term;
53
                                                        break;
54
                                                }
55
                                                term++;
56
                                        }
57
                                        break;
58
                                }
59
                                fn.Cat(*term++);
60
                        }
61
                        continue;
62
                }
63
                else
64
                if(term[0] == '/' && term[1] == '/' && skipcomments) {
65
                        term += 2;
66
                        while(*term && *term != '\n')
67
                                term++;
68
                }
69
                else
70
                if(term[0] == '/' && term[1] == '*' && skipcomments) {
71
                        if(nestcomments == false) {
72
                                term += 2;
73
                                while(*term) {
74
                                        if(term[0] == '*' && term[1] == '/') {
75
                                                term += 2;
76
                                                break;
77
                                        }
78
                                        if(*term++ == '\n') line++;
79
                                }
80
                        }
81
                        else {
82
                                int count = 1;
83
                                term += 2;
84
                                while(*term) {
85
                                        if(term[0] == '*' && term[1] == '/') {
86
                                                term += 2;
87
                                                count--;
88
                                                if (count == 0)
89
                                                        break;
90
                                        }
91
                                        else if(term[0] == '/' && term[1] == '*')
92
                                                count++;
93
                                        
94
                                        if(*term++ == '\n') line++;
95
                                }
96
                        }
97
                }
98
                if(!*term) break;
99
                if((byte)*term > ' ') break;
100
                if(*term == '\n') {
101
                        line++;
102
                        lineptr = term + 1;
103
                }
104
                term++;
105
        }
106
        return true;
107
}
108

    
109
String CParser::LineInfoComment(const String& file, int line, int column)
110
{
111
        return String().Cat() << (char)LINEINFO_ESC << file << '\3'
112
                              << line << '\3' << column << (char)LINEINFO_ESC;
113
}
114

    
115
String CParser::GetLineInfoComment(int tabsize) const
116
{
117
        return LineInfoComment(GetFileName(), GetLine(), GetColumn(tabsize));
118
}
119

    
120
const char *CParser::IsId0(const char *s) const {
121
        const char *t = term + 1;
122
        s++;
123
        while(*s) {
124
                if(*t != *s)
125
                        return NULL;
126
                t++;
127
                s++;
128
        }
129
        return IsAlNum(*t) || *t == '_' ? NULL : t;
130
}
131

    
132
bool CParser::Id0(const char *s) {
133
        LTIMING("Id");
134
        const char *t = IsId0(s);
135
        if(!t)
136
                return false;
137
        term = t;
138
        DoSpaces();
139
        return true;
140
}
141

    
142
void CParser::PassId(const char *s) throw(Error) {
143
        LTIMING("PassId");
144
        if(!Id(s))
145
                ThrowError(String("missing '") + s + "\'");
146
}
147

    
148
void CParser::PassChar(char c) throw(Error) {
149
        LTIMING("PassChar");
150
        if(!Char(c))
151
                ThrowError(String("missing '") + c + "\'");
152
}
153

    
154
void CParser::PassChar2(char c1, char c2) throw(Error) {
155
        LTIMING("PassChar2");
156
        if(!Char2(c1, c2))
157
                ThrowError(String("missing '") + c1 + c2 + "\'");
158
}
159

    
160
void CParser::PassChar3(char c1, char c2, char c3) throw(Error) {
161
        LTIMING("PassChar3");
162
        if(!Char3(c1, c2, c3))
163
                ThrowError(String("missing '") + c1 + c2 + c3 + "\'");
164
}
165

    
166
String CParser::ReadId() throw(Error) {
167
        LTIMING("ReadId");
168
        if(!IsId())
169
                ThrowError("missing id");
170
        String result;
171
        const char *b = term;
172
        const char *p = b;
173
        while(iscid(*p))
174
                p++;
175
        term = p;
176
        DoSpaces();
177
        return String(b, (int)(uintptr_t)(p - b));
178
}
179

    
180
String CParser::ReadIdt() throw(Error) {
181
        if(!IsId())
182
                ThrowError("missing id");
183
        StringBuffer result;
184
        int lvl = 0;
185
        while(IsAlNum(*term) || *term == '_' || *term == '<' || *term == '>' ||
186
              *term == ':' || (*term == ',' || *term == ' ' ) && lvl > 0 ) {
187
                if(*term == '<') lvl++;
188
                if(*term == '>') lvl--;
189
                result.Cat(*term++);
190
        }
191
        DoSpaces();
192
        return result;
193
}
194

    
195
bool CParser::IsInt() const {
196
        LTIMING("IsInt");
197
        const char *t = term;
198
        if(*t == '-' || *t == '+') {
199
                t++;
200
                while(*t <= ' ')
201
                        t++;
202
        }
203
        return IsDigit(*t);
204
}
205

    
206
int  CParser::Sgn()
207
{
208
        int sign = 1;
209
        if(*term == '-') {
210
                sign = -1;
211
                term++;
212
        }
213
        else
214
        if(*term == '+')
215
                term++;
216
        Spaces();
217
        return sign;
218
}
219

    
220
int  CParser::ReadInt() throw(Error) {
221
        LTIMING("ReadInt");
222
        int sign = Sgn();
223
        uint32 n = ReadNumber(10);
224
        if(sign > 0 ? n > INT_MAX : n > (uint32)INT_MAX + 1)
225
                ThrowError("number is too big");
226
        return sign * (int)n;
227
}
228

    
229
int CParser::ReadInt(int min, int max) throw(Error)
230
{
231
        int n = ReadInt();
232
        if(n < min || n > max)
233
                ThrowError("number is out of range");
234
        return n;
235
}
236

    
237
int64 CParser::ReadInt64() throw(Error)
238
{
239
        LTIMING("ReadInt64");
240
        int sign = Sgn();
241
        uint64 n = ReadNumber64(10);
242
        if(sign > 0 ? n > INT64_MAX : n > (uint64)INT64_MAX + 1)
243
                ThrowError("number is too big");
244
        return sign * n;
245
}
246

    
247
int64 CParser::ReadInt64(int64 min, int64 max) throw(Error)
248
{
249
        int64 n = ReadInt64();
250
        if(n < min || n > max)
251
                ThrowError("number is out of range");
252
        return n;
253
}
254

    
255
bool CParser::IsNumber(int base) const
256
{
257
        if(IsDigit(*term))
258
                return true;
259
        int q = ToUpper(*term) - 'A';
260
        return q >= 0 && q < base - 10;
261
}
262

    
263
uint32  CParser::ReadNumber(int base) throw(Error)
264
{
265
        LTIMING("ReadNumber");
266
        uint32 n = 0;
267
        int q = ctoi(*term);
268
        if(q < 0 || q >= base)
269
                ThrowError("missing number");
270
        for(;;) {
271
                int c = ctoi(*term);
272
                if(c < 0 || c >= base)
273
                        break;
274
                uint32 n1 = n;
275
                n = base * n + c;
276
                if(n1 > n)
277
                        ThrowError("number is too big");
278
                term++;
279
        }
280
        DoSpaces();
281
        return n;
282
}
283

    
284
uint64  CParser::ReadNumber64(int base) throw(Error)
285
{
286
        LTIMING("ReadNumber");
287
        uint64 n = 0;
288
        int q = ctoi(*term);
289
        if(q < 0 || q >= base)
290
                ThrowError("missing number");
291
        for(;;) {
292
                int c = ctoi(*term);
293
                if(c < 0 || c >= base)
294
                        break;
295
                uint64 n1 = n;
296
                n = base * n + c;
297
                if(n1 > n)
298
                        ThrowError("number is too big");
299
                term++;
300
        }
301
        DoSpaces();
302
        return n;
303
}
304

    
305
double CParser::ReadDouble() throw(Error)
306
{
307
        LTIMING("ReadDouble");
308
        int sign = Sgn();
309
        if(!IsDigit(*term) && *term != '.')
310
                ThrowError("missing number");
311
        double n = 0;
312
        while(IsDigit(*term))
313
                n = 10 * n + *term++ - '0';
314
        if(Char('.')) {
315
                double q = 1;
316
                while(IsDigit(*term)) {
317
                        q = q / 10;
318
                        n += q * (*term++ - '0');
319
                }
320
        }
321
        if(Char('e') || Char('E'))
322
                n *= pow(10.0, ReadInt());
323
        DoSpaces();
324
        n = sign * n;
325
        if(!IsFin(n))
326
                ThrowError("number is too big");
327
        return n;
328
}
329

    
330
String CParser::ReadOneString(int delim, bool chkend) throw(Error) {
331
        if(!IsChar(delim))
332
                ThrowError("missing string");
333
        term++;
334
        StringBuffer result;
335
        for(;;) {
336
                if(*term == delim) {
337
                        term++;
338
                        DoSpaces();
339
                        return result;
340
                }
341
                else
342
                if(*term == '\\') {
343
                        switch(*++term) {
344
                        case 'a': result.Cat('\a'); term++; break;
345
                        case 'b': result.Cat('\b'); term++; break;
346
                        case 't': result.Cat('\t'); term++; break;
347
                        case 'v': result.Cat('\v'); term++; break;
348
                        case 'n': result.Cat('\n'); term++; break;
349
                        case 'r': result.Cat('\r'); term++; break;
350
                        case 'f': result.Cat('\f'); term++; break;
351
                        case 'x': {
352
                                int hex = 0;
353
                                if(IsXDigit(*++term)) {
354
                                        hex = ctoi(*term);
355
                                        if(IsXDigit(*++term)) {
356
                                                hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
357
                                                term++;
358
                                        }
359
                                }
360
                                result.Cat(hex);
361
                                break;
362
                        }
363
                        case 'u':
364
                                if(uescape) {
365
                                        int hex = 0;
366
                                        if(IsXDigit(*++term)) {
367
                                                hex = ctoi(*term);
368
                                                if(IsXDigit(*++term)) {
369
                                                        hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
370
                                                        if(IsXDigit(*++term)) {
371
                                                                hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
372
                                                                if(IsXDigit(*++term)) {
373
                                                                        hex = 16 * hex + (*term >= 'A' ? ToUpper(*term) - 'A' + 10 : *term - '0');
374
                                                                        term++;
375
                                                                }
376
                                                        }
377
                                                }
378
                                        }
379
                                        result.Cat(WString(hex, 1).ToString());
380
                                }
381
                                else
382
                                        result.Cat(*term++);
383
                                break;
384
                        default:
385
                                if(*term >= '0' && *term <= '7') {
386
                                        int oct = *term++ - '0';
387
                                        if(*term >= '0' && *term <= '7')
388
                                                oct = 8 * oct + *term++ - '0';
389
                                        if(*term >= '0' && *term <= '7')
390
                                                oct = 8 * oct + *term++ - '0';
391
                                        result.Cat(oct);
392
                                }
393
                                else
394
                                        result.Cat(*term++);
395
                                break;
396
                        }
397
                }
398
                else {
399
                        if((byte)*term < ' ' && *term != '\t') {
400
                                if(chkend) {
401
                                        ThrowError("Unterminated string");
402
                                        return result;
403
                                }
404
                                if(*term == '\0')
405
                                        return result;
406
                        }
407
                        result.Cat(*term++);
408
                }
409
        }
410
        DoSpaces();
411
        return result;
412
}
413

    
414
String CParser::ReadOneString(bool chkend) throw(Error)
415
{
416
        return ReadOneString('\"', chkend);
417
}
418

    
419
String CParser::ReadString(int delim, bool chkend) throw(Error) {
420
        LTIMING("ReadString");
421
        String result;
422
        do
423
                result.Cat(ReadOneString(delim, chkend));
424
        while(IsChar(delim));
425
        return result;
426
}
427

    
428
String CParser::ReadString(bool chkend) throw(Error)
429
{
430
        return ReadString('\"', chkend);
431
}
432

    
433
char CParser::GetChar()
434
{
435
        char c = *term++;
436
        if(c == '\n')
437
                line++;
438
        return c;
439
}
440

    
441
void CParser::SkipTerm()
442
{
443
        LTIMING("SkipTerm");
444
        if(IsId())
445
                while(iscid(*term))
446
                        term++;
447
        else
448
        if(IsNumber())
449
                while(IsDigit(*term))
450
                        term++;
451
        else
452
        if(IsString())
453
                ReadString();
454
        else
455
        if(IsChar('\''))
456
                ReadString('\'', false);
457
        else                
458
        if(*term) {
459
                if(*term == '\n')
460
                        line++;
461
                term++;
462
        }
463
        DoSpaces();
464
}
465

    
466
CParser::Pos CParser::GetPos() const
467
{
468
        Pos p;
469
        p.line = line;
470
        p.fn = fn;
471
        p.ptr = term;
472
        p.wspc = wspc;
473
        p.lineptr = lineptr;
474
        return p;
475
}
476

    
477
int CParser::Pos::GetColumn(int tabsize) const
478
{
479
        int pos = 1;
480
        for(const char *s = lineptr; s < ptr; s++) {
481
                if(*s == CParser::LINEINFO_ESC) {
482
                        s++;
483
                        while(s < ptr && *s != CParser::LINEINFO_ESC)
484
                                if(*s++ == '\3')
485
                                        pos = atoi(s);
486
                }
487
                else
488
                if(*s == '\t')
489
                        pos = (pos + tabsize - 1) / tabsize * tabsize + 1;
490
                else
491
                        pos++;
492
        }
493
        return pos;
494
}
495

    
496
int CParser::GetColumn(int tabsize) const
497
{
498
        return GetPos().GetColumn(tabsize);
499
}
500

    
501
void CParser::SetPos(const CParser::Pos& p)
502
{
503
        LLOG("SetPos " << p.fn << ":" << p.line);
504
        line = p.line;
505
        fn = p.fn;
506
        term = p.ptr;
507
        wspc = p.wspc;
508
        lineptr = p.lineptr;
509
        if(skipspaces)
510
                DoSpaces();
511
}
512

    
513
CParser::CParser(const char *ptr)
514
: term(ptr), wspc(ptr), lineptr(ptr)
515
{
516
        line = 1;
517
        skipspaces = skipcomments = true;
518
        nestcomments = false;
519
        uescape = false;
520
        Spaces();
521
}
522

    
523
CParser::CParser(const char *ptr, const char *fn, int line)
524
: term(ptr), wspc(ptr), lineptr(ptr), line(line), fn(fn)
525
{
526
        skipspaces = skipcomments = true;
527
        nestcomments = false;
528
        uescape = false;
529
        Spaces();
530
}
531

    
532
CParser::CParser()
533
{
534
        term = lineptr = wspc = NULL;
535
        line = 0;
536
        skipspaces = skipcomments = true;
537
        nestcomments = false;
538
        uescape = false;
539
}
540

    
541
void CParser::Set(const char *_ptr, const char *_fn, int _line)
542
{
543
        term = lineptr = wspc = _ptr;
544
        fn = _fn;
545
        line = _line;
546
        if(skipspaces)
547
                Spaces();
548
        LLOG("Set " << fn << ":" << line);
549
}
550

    
551
void CParser::Set(const char *_ptr)
552
{
553
        Set(_ptr, "", 1);
554
}
555

    
556
inline void NextCStringLine(String& t, const char *linepfx, int& pl)
557
{
558
        t << "\"\r\n" << (linepfx ? linepfx : "") << "\"";
559
        pl = t.GetLength();
560
}
561

    
562
inline int HexDigit(int c)
563
{
564
        return "0123456789ABCDEF"[c & 15];
565
}
566

    
567
String AsCString(const char *s, const char *lim, int linemax, const char *linepfx, dword flags)
568
{
569
        String t;
570
        t.Cat('\"');
571
        int pl = 0;
572
        bool wasspace = false;
573
        while(s < lim) {
574
                if(t.GetLength() - pl > linemax && (!(flags & ASCSTRING_SMART) || wasspace))
575
                        NextCStringLine(t, linepfx, pl);
576
                wasspace = false;
577
                switch(*s) {
578
                case '\a': t.Cat("\\a"); break;
579
                case '\b': t.Cat("\\b"); break;
580
                case '\f': t.Cat("\\f"); break;
581
                case '\t': t.Cat("\\t"); break;
582
                case '\v': t.Cat("\\v"); break;
583
                case '\r': t.Cat("\\r"); break;
584
                case '\"': t.Cat("\\\""); break;
585
                case '\\': t.Cat("\\\\"); break;
586
                case '\n': t.Cat("\\n"); wasspace = true; break;
587
                default:
588
                        if(byte(*s) < 32 || (byte)*s >= 0x7f && (flags & ASCSTRING_OCTALHI) || (byte)*s == 0xff || (byte)*s == 0x7f) {
589
                                if(flags & ASCSTRING_JSON) {
590
                                        char h[6];
591
                                        int q = (byte)*s;
592
                                        h[0] = '\\';
593
                                        h[1] = 'u';
594
                                        h[2] = '0';
595
                                        h[3] = '0';
596
                                        h[4] = HexDigit(q >> 4);
597
                                        h[5] = HexDigit(q);
598
                                        t.Cat(h, 6);
599
                                }
600
                                else {
601
                                        char h[4];
602
                                        int q = (byte)*s;
603
                                        h[0] = '\\';
604
                                        h[1] = (3 & (q >> 6)) + '0';
605
                                        h[2] = (7 & (q >> 3)) + '0';
606
                                        h[3] = (7 & q) + '0';
607
                                        t.Cat(h, 4);
608
                                }
609
                        }
610
                        else {
611
                                t.Cat(*s);
612
                                if(*s == ' ')
613
                                        wasspace = true;
614
                        }
615
                        break;
616
                }
617
                s++;
618
        }
619
        t.Cat('\"');
620
        return t;
621
}
622

    
623
String AsCString(const char *s, int linemax, const char *linepfx, dword flags)
624
{
625
        return AsCString(s, s + strlen(s), linemax, linepfx, flags);
626
}
627

    
628
String AsCString(const String& s, int linemax, const char *linepfx, dword flags)
629
{
630
        return AsCString(s, s.End(), linemax, linepfx, flags);
631
}
632

    
633
}