Lexer.cpp 20.4 KB
Newer Older
con's avatar
con committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
// Copyright (c) 2008 Roberto Raggi <roberto.raggi@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include "Lexer.h"
#include "Control.h"
#include "TranslationUnit.h"
24
#include "Literals.h"
25
26
27

#include "cppassert.h"

con's avatar
con committed
28
29
#include <cctype>

Roberto Raggi's avatar
Roberto Raggi committed
30
using namespace CPlusPlus;
con's avatar
con committed
31
32
33

Lexer::Lexer(TranslationUnit *unit)
    : _translationUnit(unit),
hjk's avatar
hjk committed
34
      _control(unit->control()),
35
      _state(State_Default),
con's avatar
con committed
36
37
38
      _flags(0),
      _currentLine(1)
{
39
    f._scanKeywords = true;
con's avatar
con committed
40
41
42
43
44
45
    setSource(_translationUnit->firstSourceChar(),
              _translationUnit->lastSourceChar());
}

Lexer::Lexer(const char *firstChar, const char *lastChar)
    : _translationUnit(0),
hjk's avatar
hjk committed
46
      _control(0),
47
      _state(State_Default),
con's avatar
con committed
48
49
50
      _flags(0),
      _currentLine(1)
{
51
    f._scanKeywords = true;
con's avatar
con committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
    setSource(firstChar, lastChar);
}

Lexer::~Lexer()
{ }

TranslationUnit *Lexer::translationUnit() const
{ return _translationUnit; }

void Lexer::setSource(const char *firstChar, const char *lastChar)
{
    _firstChar = firstChar;
    _lastChar = lastChar;
    _currentChar = _firstChar - 1;
    _tokenStart = _currentChar;
    _yychar = '\n';
}

void Lexer::setStartWithNewline(bool enabled)
{
    if (enabled)
        _yychar = '\n';
    else
        _yychar = ' ';
}

int Lexer::state() const
{ return _state; }

void Lexer::setState(int state)
{ _state = state; }

bool Lexer::isIncremental() const
85
{ return f._isIncremental; }
con's avatar
con committed
86
87

void Lexer::setIncremental(bool isIncremental)
88
{ f._isIncremental = isIncremental; }
con's avatar
con committed
89
90

bool Lexer::scanCommentTokens() const
91
{ return f._scanCommentTokens; }
con's avatar
con committed
92
93

void Lexer::setScanCommentTokens(bool onoff)
94
{ f._scanCommentTokens = onoff; }
con's avatar
con committed
95
96

bool Lexer::scanKeywords() const
97
{ return f._scanKeywords; }
con's avatar
con committed
98
99

void Lexer::setScanKeywords(bool onoff)
100
{ f._scanKeywords = onoff; }
con's avatar
con committed
101
102

void Lexer::setScanAngleStringLiteralTokens(bool onoff)
103
{ f._scanAngleStringLiteralTokens = onoff; }
con's avatar
con committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

void Lexer::pushLineStartOffset()
{
    ++_currentLine;

    if (_translationUnit)
        _translationUnit->pushLineOffset(_currentChar - _firstChar);
}

unsigned Lexer::tokenOffset() const
{ return _tokenStart - _firstChar; }

unsigned Lexer::tokenLength() const
{ return _currentChar - _tokenStart; }

const char *Lexer::tokenBegin() const
{ return _tokenStart; }

const char *Lexer::tokenEnd() const
{ return _currentChar; }

unsigned Lexer::currentLine() const
{ return _currentLine; }

void Lexer::scan(Token *tok)
{
    tok->reset();
    scan_helper(tok);
132
    tok->f.length = _currentChar - _tokenStart;
con's avatar
con committed
133
134
135
136
137
138
}

void Lexer::scan_helper(Token *tok)
{
  _Lagain:
    while (_yychar && std::isspace(_yychar)) {
Erik Verbruggen's avatar
Erik Verbruggen committed
139
140
        if (_yychar == '\n') {
            tok->f.joined = false;
141
            tok->f.newline = true;
Erik Verbruggen's avatar
Erik Verbruggen committed
142
        } else {
143
            tok->f.whitespace = true;
Erik Verbruggen's avatar
Erik Verbruggen committed
144
        }
con's avatar
con committed
145
146
147
148
149
150
        yyinp();
    }

    if (! _translationUnit)
        tok->lineno = _currentLine;

151
152
153
    _tokenStart = _currentChar;
    tok->offset = _currentChar - _firstChar;

154
    if (_state == State_MultiLineComment || _state == State_MultiLineDoxyComment) {
155
156
        const int originalState = _state;

con's avatar
con committed
157
        if (! _yychar) {
158
            tok->f.kind = T_EOF_SYMBOL;
con's avatar
con committed
159
160
161
162
163
164
165
166
167
168
            return;
        }

        while (_yychar) {
            if (_yychar != '*')
                yyinp();
            else {
                yyinp();
                if (_yychar == '/') {
                    yyinp();
169
                    _state = State_Default;
con's avatar
con committed
170
171
172
173
174
                    break;
                }
            }
        }

175
        if (! f._scanCommentTokens)
con's avatar
con committed
176
177
            goto _Lagain;

178
        else if (originalState == State_MultiLineComment)
179
            tok->f.kind = T_COMMENT;
180
        else
181
            tok->f.kind = T_DOXY_COMMENT;
con's avatar
con committed
182
183
184
185
        return; // done
    }

    if (! _yychar) {
186
        tok->f.kind = T_EOF_SYMBOL;
con's avatar
con committed
187
188
189
        return;
    }

190
    unsigned char ch = _yychar;
con's avatar
con committed
191
192
193
194
195
196
    yyinp();

    switch (ch) {
    case '\\':
        while (_yychar != '\n' && std::isspace(_yychar))
            yyinp();
197
        // ### CPP_CHECK(! _yychar || _yychar == '\n');
con's avatar
con committed
198
        if (_yychar == '\n') {
199
200
            tok->f.joined = true;
            tok->f.newline = false;
con's avatar
con committed
201
202
203
204
            yyinp();
        }
        goto _Lagain;

205
206
207
    case '"':
        scanStringLiteral(tok);
        break;
con's avatar
con committed
208

209
210
211
    case '\'':
        scanCharLiteral(tok);
        break;
con's avatar
con committed
212
213

    case '{':
214
        tok->f.kind = T_LBRACE;
con's avatar
con committed
215
216
217
        break;

    case '}':
218
        tok->f.kind = T_RBRACE;
con's avatar
con committed
219
220
221
        break;

    case '[':
222
        tok->f.kind = T_LBRACKET;
con's avatar
con committed
223
224
225
        break;

    case ']':
226
        tok->f.kind = T_RBRACKET;
con's avatar
con committed
227
228
229
230
        break;

    case '#':
        if (_yychar == '#') {
231
            tok->f.kind = T_POUND_POUND;
con's avatar
con committed
232
233
            yyinp();
        } else {
234
            tok->f.kind = T_POUND;
con's avatar
con committed
235
236
237
238
        }
        break;

    case '(':
239
        tok->f.kind = T_LPAREN;
con's avatar
con committed
240
241
242
        break;

    case ')':
243
        tok->f.kind = T_RPAREN;
con's avatar
con committed
244
245
246
        break;

    case ';':
247
        tok->f.kind = T_SEMICOLON;
con's avatar
con committed
248
249
250
251
252
        break;

    case ':':
        if (_yychar == ':') {
            yyinp();
253
            tok->f.kind = T_COLON_COLON;
254
255
256
        } else if (_yychar == '>') {
            yyinp();
            tok->f.kind = T_RBRACKET;
con's avatar
con committed
257
        } else {
258
            tok->f.kind = T_COLON;
con's avatar
con committed
259
260
261
262
263
264
        }
        break;

    case '.':
        if (_yychar == '*') {
            yyinp();
265
            tok->f.kind = T_DOT_STAR;
con's avatar
con committed
266
267
        } else if (_yychar == '.') {
            yyinp();
268
            // ### CPP_CHECK(_yychar);
con's avatar
con committed
269
270
            if (_yychar == '.') {
                yyinp();
271
                tok->f.kind = T_DOT_DOT_DOT;
con's avatar
con committed
272
            } else {
273
                tok->f.kind = T_ERROR;
con's avatar
con committed
274
275
276
277
278
279
280
281
            }
        } else if (std::isdigit(_yychar)) {
            const char *yytext = _currentChar - 2;
            do {
                if (_yychar == 'e' || _yychar == 'E') {
                    yyinp();
                    if (_yychar == '-' || _yychar == '+') {
                        yyinp();
282
                        // ### CPP_CHECK(std::isdigit(_yychar));
con's avatar
con committed
283
284
285
286
287
288
289
290
                    }
                } else if (std::isalnum(_yychar) || _yychar == '.') {
                    yyinp();
                } else {
                    break;
                }
            } while (_yychar);
            int yylen = _currentChar - yytext;
291
            tok->f.kind = T_NUMERIC_LITERAL;
con's avatar
con committed
292
            if (control())
293
                tok->number = control()->numericLiteral(yytext, yylen);
con's avatar
con committed
294
        } else {
295
            tok->f.kind = T_DOT;
con's avatar
con committed
296
297
298
299
        }
        break;

    case '?':
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
        if (_yychar == '?') {
            yyinp();
            if (_yychar == '(') {
                yyinp();
                tok->f.kind = T_LBRACKET;
            } else if (_yychar == ')') {
                yyinp();
                tok->f.kind = T_RBRACKET;
            } else if (_yychar == '<') {
                yyinp();
                tok->f.kind = T_LBRACE;
            } else if (_yychar == '>') {
                yyinp();
                tok->f.kind = T_RBRACE;
            }
        } else {
            tok->f.kind = T_QUESTION;
        }
con's avatar
con committed
318
319
320
321
322
        break;

    case '+':
        if (_yychar == '+') {
            yyinp();
323
            tok->f.kind = T_PLUS_PLUS;
con's avatar
con committed
324
325
        } else if (_yychar == '=') {
            yyinp();
326
            tok->f.kind = T_PLUS_EQUAL;
con's avatar
con committed
327
        } else {
328
            tok->f.kind = T_PLUS;
con's avatar
con committed
329
330
331
332
333
334
        }
        break;

    case '-':
        if (_yychar == '-') {
            yyinp();
335
            tok->f.kind = T_MINUS_MINUS;
con's avatar
con committed
336
337
        } else if (_yychar == '=') {
            yyinp();
338
            tok->f.kind = T_MINUS_EQUAL;
con's avatar
con committed
339
340
341
342
        } else if (_yychar == '>') {
            yyinp();
            if (_yychar == '*') {
                yyinp();
343
                tok->f.kind = T_ARROW_STAR;
con's avatar
con committed
344
            } else {
345
                tok->f.kind = T_ARROW;
con's avatar
con committed
346
347
            }
        } else {
348
            tok->f.kind = T_MINUS;
con's avatar
con committed
349
350
351
352
353
354
        }
        break;

    case '*':
        if (_yychar == '=') {
            yyinp();
355
            tok->f.kind = T_STAR_EQUAL;
con's avatar
con committed
356
        } else {
357
            tok->f.kind = T_STAR;
con's avatar
con committed
358
359
360
361
362
        }
        break;

    case '/':
        if (_yychar == '/') {
363
364
365
366
367
368
            yyinp();

            bool doxy = false;

            if (_yychar == '/' || _yychar == '!') {
                yyinp();
369
                doxy = true;
370
371
372
            }

            while (_yychar && _yychar != '\n')
con's avatar
con committed
373
                yyinp();
374

375
            if (! f._scanCommentTokens)
con's avatar
con committed
376
                goto _Lagain;
377

378
            tok->f.kind = doxy ? T_CPP_DOXY_COMMENT : T_CPP_COMMENT;
379

con's avatar
con committed
380
381
        } else if (_yychar == '*') {
            yyinp();
382

383
384
385
            bool doxy = false;

            if (_yychar == '*' || _yychar == '!') {
Roberto Raggi's avatar
Roberto Raggi committed
386
387
                const char ch = _yychar;

388
389
                yyinp();

Roberto Raggi's avatar
Roberto Raggi committed
390
391
392
                if (ch == '*' && _yychar == '/')
                    goto _Ldone;

393
394
395
                if (_yychar == '<')
                    yyinp();

396
397
398
                if (! _yychar || std::isspace(_yychar))
                    doxy = true;
            }
399

con's avatar
con committed
400
401
402
403
404
405
406
407
408
409
            while (_yychar) {
                if (_yychar != '*') {
                    yyinp();
                } else {
                    yyinp();
                    if (_yychar == '/')
                        break;
                }
            }

Roberto Raggi's avatar
Roberto Raggi committed
410
        _Ldone:
con's avatar
con committed
411
412
413
            if (_yychar)
                yyinp();
            else
414
                _state = doxy ? State_MultiLineDoxyComment : State_MultiLineComment;
con's avatar
con committed
415

416
            if (! f._scanCommentTokens)
con's avatar
con committed
417
                goto _Lagain;
418

419
            tok->f.kind = doxy ? T_DOXY_COMMENT : T_COMMENT;
420

con's avatar
con committed
421
422
        } else if (_yychar == '=') {
            yyinp();
423
            tok->f.kind = T_SLASH_EQUAL;
con's avatar
con committed
424
        } else {
425
            tok->f.kind = T_SLASH;
con's avatar
con committed
426
427
428
429
430
431
        }
        break;

    case '%':
        if (_yychar == '=') {
            yyinp();
432
            tok->f.kind = T_PERCENT_EQUAL;
433
434
435
436
437
438
        } else if (_yychar == '>') {
            yyinp();
            tok->f.kind = T_RBRACE;
        } else if (_yychar == ':') {
            yyinp();
            tok->f.kind = T_POUND;
con's avatar
con committed
439
        } else {
440
            tok->f.kind = T_PERCENT;
con's avatar
con committed
441
442
443
444
445
446
        }
        break;

    case '^':
        if (_yychar == '=') {
            yyinp();
447
            tok->f.kind = T_CARET_EQUAL;
con's avatar
con committed
448
        } else {
449
            tok->f.kind = T_CARET;
con's avatar
con committed
450
451
452
453
454
455
        }
        break;

    case '&':
        if (_yychar == '&') {
            yyinp();
456
            tok->f.kind = T_AMPER_AMPER;
con's avatar
con committed
457
458
        } else if (_yychar == '=') {
            yyinp();
459
            tok->f.kind = T_AMPER_EQUAL;
con's avatar
con committed
460
        } else {
461
            tok->f.kind = T_AMPER;
con's avatar
con committed
462
463
464
465
466
467
        }
        break;

    case '|':
        if (_yychar == '|') {
            yyinp();
468
            tok->f.kind = T_PIPE_PIPE;
con's avatar
con committed
469
470
        } else if (_yychar == '=') {
            yyinp();
471
            tok->f.kind = T_PIPE_EQUAL;
con's avatar
con committed
472
        } else {
473
            tok->f.kind = T_PIPE;
con's avatar
con committed
474
475
476
477
478
479
        }
        break;

    case '~':
        if (_yychar == '=') {
            yyinp();
480
            tok->f.kind = T_TILDE_EQUAL;
con's avatar
con committed
481
        } else {
482
            tok->f.kind = T_TILDE;
con's avatar
con committed
483
484
485
486
487
488
        }
        break;

    case '!':
        if (_yychar == '=') {
            yyinp();
489
            tok->f.kind = T_EXCLAIM_EQUAL;
con's avatar
con committed
490
        } else {
491
            tok->f.kind = T_EXCLAIM;
con's avatar
con committed
492
493
494
495
496
497
        }
        break;

    case '=':
        if (_yychar == '=') {
            yyinp();
498
            tok->f.kind = T_EQUAL_EQUAL;
con's avatar
con committed
499
        } else {
500
            tok->f.kind = T_EQUAL;
con's avatar
con committed
501
502
503
504
        }
        break;

    case '<':
505
        if (f._scanAngleStringLiteralTokens) {
con's avatar
con committed
506
507
508
509
            const char *yytext = _currentChar;
            while (_yychar && _yychar != '>')
                yyinp();
            int yylen = _currentChar - yytext;
510
            // ### CPP_CHECK(_yychar == '>');
con's avatar
con committed
511
512
513
            if (_yychar == '>')
                yyinp();
            if (control())
514
                tok->string = control()->stringLiteral(yytext, yylen);
515
            tok->f.kind = T_ANGLE_STRING_LITERAL;
con's avatar
con committed
516
517
518
519
        } else if (_yychar == '<') {
            yyinp();
            if (_yychar == '=') {
                yyinp();
520
                tok->f.kind = T_LESS_LESS_EQUAL;
con's avatar
con committed
521
            } else
522
                tok->f.kind = T_LESS_LESS;
con's avatar
con committed
523
524
        } else if (_yychar == '=') {
            yyinp();
525
            tok->f.kind = T_LESS_EQUAL;
526
527
528
529
530
531
        } else if (_yychar == ':') {
            yyinp();
            tok->f.kind = T_LBRACKET;
        } else if (_yychar == '%') {
            yyinp();
            tok->f.kind = T_LBRACE;
con's avatar
con committed
532
        } else {
533
            tok->f.kind = T_LESS;
con's avatar
con committed
534
535
536
537
538
539
540
541
        }
        break;

    case '>':
        if (_yychar == '>') {
            yyinp();
            if (_yychar == '=') {
                yyinp();
542
                tok->f.kind = T_GREATER_GREATER_EQUAL;
con's avatar
con committed
543
            } else
544
545
                tok->f.kind = T_LESS_LESS;
            tok->f.kind = T_GREATER_GREATER;
con's avatar
con committed
546
547
        } else if (_yychar == '=') {
            yyinp();
548
            tok->f.kind = T_GREATER_EQUAL;
con's avatar
con committed
549
        } else {
550
            tok->f.kind = T_GREATER;
con's avatar
con committed
551
552
553
554
        }
        break;

    case ',':
555
        tok->f.kind = T_COMMA;
con's avatar
con committed
556
557
        break;

558
    default: {
559
        if (_languageFeatures.objCEnabled) {
560
561
562
563
564
565
566
567
568
569
570
571
572
573
            if (ch == '@' && _yychar >= 'a' && _yychar <= 'z') {
                const char *yytext = _currentChar;

                do {
                    yyinp();
                    if (! (isalnum(_yychar) || _yychar == '_' || _yychar == '$'))
                        break;
                } while (_yychar);

                const int yylen = _currentChar - yytext;
                tok->f.kind = classifyObjCAtKeyword(yytext, yylen);
                break;
            } else if (ch == '@' && _yychar == '"') {
                yyinp();
574
575
576
577
                scanStringLiteral(tok, '"');
                break;
            }
        }
578

579
        if (ch == 'L' || ch == 'u' || ch == 'U' || ch == 'R') {
580
581
582
            // Either a literal or still an identifier.
            if (_yychar == '"') {
                yyinp();
583
584
585
586
                if (ch == 'R')
                    scanRawStringLiteral(tok);
                else
                    scanStringLiteral(tok, ch);
587
588
589
            } else if (_yychar == '\'') {
                yyinp();
                scanCharLiteral(tok, ch);
590
591
592
593
594
595
596
597
            } else if (ch != 'R' && _yychar == 'R') {
                yyinp();
                if (_yychar == '"') {
                    yyinp();
                    scanRawStringLiteral(tok, ch);
                } else {
                    scanIdentifier(tok, 1);
                }
598
            } else if (ch == 'u' && _yychar == '8') {
599
600
                yyinp();
                if (_yychar == '"') {
601
602
                    yyinp();
                    scanStringLiteral(tok, '8');
603
                } else if (_yychar == '\'') {
604
605
                    yyinp();
                    scanCharLiteral(tok, '8');
606
607
608
609
610
611
612
613
                } else if (_yychar == 'R') {
                    yyinp();
                    if (_yychar == '"') {
                        yyinp();
                        scanRawStringLiteral(tok, '8');
                    } else {
                        scanIdentifier(tok, 2);
                    }
614
                } else {
615
                    scanIdentifier(tok, 1);
616
                }
617
618
            } else {
                scanIdentifier(tok);
619
            }
620
621
622
623
624
625
        } else if (std::isalpha(ch) || ch == '_' || ch == '$') {
            scanIdentifier(tok);
        } else if (std::isdigit(ch)) {
            scanNumericLiteral(tok);
        } else {
            tok->f.kind = T_ERROR;
626
        }
627
628
        break;
    } // default
629

630
631
    } // switch
}
con's avatar
con committed
632

633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
{
    scanUntilQuote(tok, '"');

    if (hint == 'L')
        tok->f.kind = T_WIDE_STRING_LITERAL;
    else if (hint == 'U')
        tok->f.kind = T_UTF32_STRING_LITERAL;
    else if (hint == 'u')
        tok->f.kind = T_UTF16_STRING_LITERAL;
    else if (hint == '8')
        tok->f.kind = T_UTF8_STRING_LITERAL;
    else if (hint == '@')
        tok->f.kind = T_AT_STRING_LITERAL;
    else
        tok->f.kind = T_STRING_LITERAL;
}
con's avatar
con committed
650

651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
{
    const char *yytext = _currentChar;

    int delimLength = -1;
    const char *closingDelimCandidate = 0;
    while (_yychar) {
        if (_yychar == '(' && delimLength == -1) {
            delimLength = _currentChar - yytext;
            yyinp();
        } else if (_yychar == ')') {
            yyinp();
            if (delimLength == -1)
                break;
            closingDelimCandidate = _currentChar;
        } else {
            if (delimLength == -1) {
                if (_yychar == '\\' || std::isspace(_yychar))
                    break;
                yyinp();
            } else {
                if (!closingDelimCandidate) {
                    yyinp();
                } else {
                    if (_yychar == '"') {
                        if (delimLength == _currentChar - closingDelimCandidate) {
                            // Got a matching closing delimiter.
                            break;
                        }
                    }

                    // Make sure this continues to be a valid candidate.
                    if (_yychar != *(yytext + (_currentChar - closingDelimCandidate)))
                        closingDelimCandidate = 0;

                    yyinp();
                }
            }
        }
    }

    int yylen = _currentChar - yytext;

    if (_yychar == '"')
        yyinp();

    if (control())
        tok->string = control()->stringLiteral(yytext, yylen);

    if (hint == 'L')
        tok->f.kind = T_RAW_WIDE_STRING_LITERAL;
    else if (hint == 'U')
        tok->f.kind = T_RAW_UTF32_STRING_LITERAL;
    else if (hint == 'u')
        tok->f.kind = T_RAW_UTF16_STRING_LITERAL;
    else if (hint == '8')
        tok->f.kind = T_RAW_UTF8_STRING_LITERAL;
    else
        tok->f.kind = T_RAW_STRING_LITERAL;
}

712
713
714
715
716
717
718
719
720
721
722
723
724
void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
{
    scanUntilQuote(tok, '\'');

    if (hint == 'L')
        tok->f.kind = T_WIDE_CHAR_LITERAL;
    else if (hint == 'U')
        tok->f.kind = T_UTF32_CHAR_LITERAL;
    else if (hint == 'u')
        tok->f.kind = T_UTF16_CHAR_LITERAL;
    else
        tok->f.kind = T_CHAR_LITERAL;
}
con's avatar
con committed
725

726
727
void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
{
728
    CPP_CHECK(quote == '"' || quote == '\'');
con's avatar
con committed
729

730
    const char *yytext = _currentChar;
731
732
733
    while (_yychar
           && _yychar != quote
           && _yychar != '\n') {
734
735
736
737
738
        if (_yychar != '\\')
            yyinp();
        else {
            yyinp(); // skip `\\'
            if (_yychar)
con's avatar
con committed
739
                yyinp();
740
741
742
        }
    }
    int yylen = _currentChar - yytext;
con's avatar
con committed
743

744
745
    if (_yychar == quote)
        yyinp();
746

747
748
749
    if (control())
        tok->string = control()->stringLiteral(yytext, yylen);
}
750

751
752
753
754
755
756
757
758
void Lexer::scanNumericLiteral(Token *tok)
{
    const char *yytext = _currentChar - 1;
    while (_yychar) {
        if (_yychar == 'e' || _yychar == 'E') {
            yyinp();
            if (_yychar == '-' || _yychar == '+') {
                yyinp();
759
                // ### CPP_CHECK(std::isdigit(_yychar));
con's avatar
con committed
760
            }
761
762
        } else if (std::isalnum(_yychar) || _yychar == '.') {
            yyinp();
con's avatar
con committed
763
764
765
        } else {
            break;
        }
766
767
    }
    int yylen = _currentChar - yytext;
con's avatar
con committed
768

769
770
771
772
    tok->f.kind = T_NUMERIC_LITERAL;

    if (control())
        tok->number = control()->numericLiteral(yytext, yylen);
con's avatar
con committed
773
774
}

775
void Lexer::scanIdentifier(Token *tok, unsigned extraProcessedChars)
776
{
777
    const char *yytext = _currentChar - 1 - extraProcessedChars;
778
779
780
781
    while (std::isalnum(_yychar) || _yychar == '_' || _yychar == '$')
        yyinp();
    int yylen = _currentChar - yytext;
    if (f._scanKeywords)
782
        tok->f.kind = classify(yytext, yylen, _languageFeatures);
783
784
785
786
787
    else
        tok->f.kind = T_IDENTIFIER;

    if (tok->f.kind == T_IDENTIFIER) {
        tok->f.kind = classifyOperator(yytext, yylen);
Roberto Raggi's avatar
Roberto Raggi committed
788

789
790
791
792
        if (control())
            tok->identifier = control()->identifier(yytext, yylen);
    }
}