Lexer.cpp 19.9 KB
Newer Older
con's avatar
con committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// Copyright (c) 2008 Roberto Raggi <roberto.raggi@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include "Lexer.h"
#include "Control.h"
#include "TranslationUnit.h"
24
#include "Literals.h"
con's avatar
con committed
25 26 27
#include <cctype>
#include <cassert>

Roberto Raggi's avatar
Roberto Raggi committed
28
using namespace CPlusPlus;
con's avatar
con committed
29 30 31

Lexer::Lexer(TranslationUnit *unit)
    : _translationUnit(unit),
32
      _state(State_Default),
con's avatar
con committed
33 34 35
      _flags(0),
      _currentLine(1)
{
36
    f._scanKeywords = true;
con's avatar
con committed
37 38 39 40 41 42
    setSource(_translationUnit->firstSourceChar(),
              _translationUnit->lastSourceChar());
}

Lexer::Lexer(const char *firstChar, const char *lastChar)
    : _translationUnit(0),
43
      _state(State_Default),
con's avatar
con committed
44 45 46
      _flags(0),
      _currentLine(1)
{
47
    f._scanKeywords = true;
con's avatar
con committed
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    setSource(firstChar, lastChar);
}

Lexer::~Lexer()
{ }

TranslationUnit *Lexer::translationUnit() const
{ return _translationUnit; }

Control *Lexer::control() const
{
    if (_translationUnit)
        return _translationUnit->control();

    return 0;
}

void Lexer::setSource(const char *firstChar, const char *lastChar)
{
    _firstChar = firstChar;
    _lastChar = lastChar;
    _currentChar = _firstChar - 1;
    _tokenStart = _currentChar;
    _yychar = '\n';
}

void Lexer::setStartWithNewline(bool enabled)
{
    if (enabled)
        _yychar = '\n';
    else
        _yychar = ' ';
}

int Lexer::state() const
{ return _state; }

void Lexer::setState(int state)
{ _state = state; }

bool Lexer::qtMocRunEnabled() const
89
{ return f._qtMocRunEnabled; }
con's avatar
con committed
90 91

void Lexer::setQtMocRunEnabled(bool onoff)
92
{ f._qtMocRunEnabled = onoff; }
con's avatar
con committed
93

94 95 96 97 98 99
bool Lexer::cxx0xEnabled() const
{ return f._cxx0xEnabled; }

void Lexer::setCxxOxEnabled(bool onoff)
{ f._cxx0xEnabled = onoff; }

Roberto Raggi's avatar
Roberto Raggi committed
100
bool Lexer::objCEnabled() const
101
{ return f._objCEnabled; }
Roberto Raggi's avatar
Roberto Raggi committed
102

Roberto Raggi's avatar
Roberto Raggi committed
103
void Lexer::setObjCEnabled(bool onoff)
104
{ f._objCEnabled = onoff; }
Roberto Raggi's avatar
Roberto Raggi committed
105

con's avatar
con committed
106
bool Lexer::isIncremental() const
107
{ return f._isIncremental; }
con's avatar
con committed
108 109

void Lexer::setIncremental(bool isIncremental)
110
{ f._isIncremental = isIncremental; }
con's avatar
con committed
111 112

bool Lexer::scanCommentTokens() const
113
{ return f._scanCommentTokens; }
con's avatar
con committed
114 115

void Lexer::setScanCommentTokens(bool onoff)
116
{ f._scanCommentTokens = onoff; }
con's avatar
con committed
117 118

bool Lexer::scanKeywords() const
119
{ return f._scanKeywords; }
con's avatar
con committed
120 121

void Lexer::setScanKeywords(bool onoff)
122
{ f._scanKeywords = onoff; }
con's avatar
con committed
123 124

void Lexer::setScanAngleStringLiteralTokens(bool onoff)
125
{ f._scanAngleStringLiteralTokens = onoff; }
con's avatar
con committed
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

void Lexer::pushLineStartOffset()
{
    ++_currentLine;

    if (_translationUnit)
        _translationUnit->pushLineOffset(_currentChar - _firstChar);
}

unsigned Lexer::tokenOffset() const
{ return _tokenStart - _firstChar; }

unsigned Lexer::tokenLength() const
{ return _currentChar - _tokenStart; }

const char *Lexer::tokenBegin() const
{ return _tokenStart; }

const char *Lexer::tokenEnd() const
{ return _currentChar; }

unsigned Lexer::currentLine() const
{ return _currentLine; }

void Lexer::scan(Token *tok)
{
    tok->reset();
    scan_helper(tok);
154
    tok->f.length = _currentChar - _tokenStart;
con's avatar
con committed
155 156 157 158 159 160
}

void Lexer::scan_helper(Token *tok)
{
  _Lagain:
    while (_yychar && std::isspace(_yychar)) {
Erik Verbruggen's avatar
Erik Verbruggen committed
161 162
        if (_yychar == '\n') {
            tok->f.joined = false;
163
            tok->f.newline = true;
Erik Verbruggen's avatar
Erik Verbruggen committed
164
        } else {
165
            tok->f.whitespace = true;
Erik Verbruggen's avatar
Erik Verbruggen committed
166
        }
con's avatar
con committed
167 168 169 170 171 172
        yyinp();
    }

    if (! _translationUnit)
        tok->lineno = _currentLine;

173 174 175
    _tokenStart = _currentChar;
    tok->offset = _currentChar - _firstChar;

176
    if (_state == State_MultiLineComment || _state == State_MultiLineDoxyComment) {
177 178
        const int originalState = _state;

con's avatar
con committed
179
        if (! _yychar) {
180
            tok->f.kind = T_EOF_SYMBOL;
con's avatar
con committed
181 182 183 184 185 186 187 188 189 190
            return;
        }

        while (_yychar) {
            if (_yychar != '*')
                yyinp();
            else {
                yyinp();
                if (_yychar == '/') {
                    yyinp();
191
                    _state = State_Default;
con's avatar
con committed
192 193 194 195 196
                    break;
                }
            }
        }

197
        if (! f._scanCommentTokens)
con's avatar
con committed
198 199
            goto _Lagain;

200
        else if (originalState == State_MultiLineComment)
201
            tok->f.kind = T_COMMENT;
202
        else
203
            tok->f.kind = T_DOXY_COMMENT;
con's avatar
con committed
204 205 206 207
        return; // done
    }

    if (! _yychar) {
208
        tok->f.kind = T_EOF_SYMBOL;
con's avatar
con committed
209 210 211
        return;
    }

212
    unsigned char ch = _yychar;
con's avatar
con committed
213 214 215 216 217 218 219 220
    yyinp();

    switch (ch) {
    case '\\':
        while (_yychar != '\n' && std::isspace(_yychar))
            yyinp();
        // ### assert(! _yychar || _yychar == '\n');
        if (_yychar == '\n') {
221 222
            tok->f.joined = true;
            tok->f.newline = false;
con's avatar
con committed
223 224 225 226
            yyinp();
        }
        goto _Lagain;

227 228 229
    case '"':
        scanStringLiteral(tok);
        break;
con's avatar
con committed
230

231 232 233
    case '\'':
        scanCharLiteral(tok);
        break;
con's avatar
con committed
234 235

    case '{':
236
        tok->f.kind = T_LBRACE;
con's avatar
con committed
237 238 239
        break;

    case '}':
240
        tok->f.kind = T_RBRACE;
con's avatar
con committed
241 242 243
        break;

    case '[':
244
        tok->f.kind = T_LBRACKET;
con's avatar
con committed
245 246 247
        break;

    case ']':
248
        tok->f.kind = T_RBRACKET;
con's avatar
con committed
249 250 251 252
        break;

    case '#':
        if (_yychar == '#') {
253
            tok->f.kind = T_POUND_POUND;
con's avatar
con committed
254 255
            yyinp();
        } else {
256
            tok->f.kind = T_POUND;
con's avatar
con committed
257 258 259 260
        }
        break;

    case '(':
261
        tok->f.kind = T_LPAREN;
con's avatar
con committed
262 263 264
        break;

    case ')':
265
        tok->f.kind = T_RPAREN;
con's avatar
con committed
266 267 268
        break;

    case ';':
269
        tok->f.kind = T_SEMICOLON;
con's avatar
con committed
270 271 272 273 274
        break;

    case ':':
        if (_yychar == ':') {
            yyinp();
275
            tok->f.kind = T_COLON_COLON;
con's avatar
con committed
276
        } else {
277
            tok->f.kind = T_COLON;
con's avatar
con committed
278 279 280 281 282 283
        }
        break;

    case '.':
        if (_yychar == '*') {
            yyinp();
284
            tok->f.kind = T_DOT_STAR;
con's avatar
con committed
285 286 287 288 289
        } else if (_yychar == '.') {
            yyinp();
            // ### assert(_yychar);
            if (_yychar == '.') {
                yyinp();
290
                tok->f.kind = T_DOT_DOT_DOT;
con's avatar
con committed
291
            } else {
292
                tok->f.kind = T_ERROR;
con's avatar
con committed
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
            }
        } else if (std::isdigit(_yychar)) {
            const char *yytext = _currentChar - 2;
            do {
                if (_yychar == 'e' || _yychar == 'E') {
                    yyinp();
                    if (_yychar == '-' || _yychar == '+') {
                        yyinp();
                        // ### assert(std::isdigit(_yychar));
                    }
                } else if (std::isalnum(_yychar) || _yychar == '.') {
                    yyinp();
                } else {
                    break;
                }
            } while (_yychar);
            int yylen = _currentChar - yytext;
310
            tok->f.kind = T_NUMERIC_LITERAL;
con's avatar
con committed
311
            if (control())
312
                tok->number = control()->numericLiteral(yytext, yylen);
con's avatar
con committed
313
        } else {
314
            tok->f.kind = T_DOT;
con's avatar
con committed
315 316 317 318
        }
        break;

    case '?':
319
        tok->f.kind = T_QUESTION;
con's avatar
con committed
320 321 322 323 324
        break;

    case '+':
        if (_yychar == '+') {
            yyinp();
325
            tok->f.kind = T_PLUS_PLUS;
con's avatar
con committed
326 327
        } else if (_yychar == '=') {
            yyinp();
328
            tok->f.kind = T_PLUS_EQUAL;
con's avatar
con committed
329
        } else {
330
            tok->f.kind = T_PLUS;
con's avatar
con committed
331 332 333 334 335 336
        }
        break;

    case '-':
        if (_yychar == '-') {
            yyinp();
337
            tok->f.kind = T_MINUS_MINUS;
con's avatar
con committed
338 339
        } else if (_yychar == '=') {
            yyinp();
340
            tok->f.kind = T_MINUS_EQUAL;
con's avatar
con committed
341 342 343 344
        } else if (_yychar == '>') {
            yyinp();
            if (_yychar == '*') {
                yyinp();
345
                tok->f.kind = T_ARROW_STAR;
con's avatar
con committed
346
            } else {
347
                tok->f.kind = T_ARROW;
con's avatar
con committed
348 349
            }
        } else {
350
            tok->f.kind = T_MINUS;
con's avatar
con committed
351 352 353 354 355 356
        }
        break;

    case '*':
        if (_yychar == '=') {
            yyinp();
357
            tok->f.kind = T_STAR_EQUAL;
con's avatar
con committed
358
        } else {
359
            tok->f.kind = T_STAR;
con's avatar
con committed
360 361 362 363 364
        }
        break;

    case '/':
        if (_yychar == '/') {
365 366 367 368 369 370
            yyinp();

            bool doxy = false;

            if (_yychar == '/' || _yychar == '!') {
                yyinp();
371
                doxy = true;
372 373 374
            }

            while (_yychar && _yychar != '\n')
con's avatar
con committed
375
                yyinp();
376

377
            if (! f._scanCommentTokens)
con's avatar
con committed
378
                goto _Lagain;
379

380
            tok->f.kind = doxy ? T_CPP_DOXY_COMMENT : T_CPP_COMMENT;
381

con's avatar
con committed
382 383
        } else if (_yychar == '*') {
            yyinp();
384

385 386 387
            bool doxy = false;

            if (_yychar == '*' || _yychar == '!') {
Roberto Raggi's avatar
Roberto Raggi committed
388 389
                const char ch = _yychar;

390 391
                yyinp();

Roberto Raggi's avatar
Roberto Raggi committed
392 393 394
                if (ch == '*' && _yychar == '/')
                    goto _Ldone;

395 396 397
                if (_yychar == '<')
                    yyinp();

398 399 400
                if (! _yychar || std::isspace(_yychar))
                    doxy = true;
            }
401

con's avatar
con committed
402 403 404 405 406 407 408 409 410 411
            while (_yychar) {
                if (_yychar != '*') {
                    yyinp();
                } else {
                    yyinp();
                    if (_yychar == '/')
                        break;
                }
            }

Roberto Raggi's avatar
Roberto Raggi committed
412
        _Ldone:
con's avatar
con committed
413 414 415
            if (_yychar)
                yyinp();
            else
416
                _state = doxy ? State_MultiLineDoxyComment : State_MultiLineComment;
con's avatar
con committed
417

418
            if (! f._scanCommentTokens)
con's avatar
con committed
419
                goto _Lagain;
420

421
            tok->f.kind = doxy ? T_DOXY_COMMENT : T_COMMENT;
422

con's avatar
con committed
423 424
        } else if (_yychar == '=') {
            yyinp();
425
            tok->f.kind = T_SLASH_EQUAL;
con's avatar
con committed
426
        } else {
427
            tok->f.kind = T_SLASH;
con's avatar
con committed
428 429 430 431 432 433
        }
        break;

    case '%':
        if (_yychar == '=') {
            yyinp();
434
            tok->f.kind = T_PERCENT_EQUAL;
con's avatar
con committed
435
        } else {
436
            tok->f.kind = T_PERCENT;
con's avatar
con committed
437 438 439 440 441 442
        }
        break;

    case '^':
        if (_yychar == '=') {
            yyinp();
443
            tok->f.kind = T_CARET_EQUAL;
con's avatar
con committed
444
        } else {
445
            tok->f.kind = T_CARET;
con's avatar
con committed
446 447 448 449 450 451
        }
        break;

    case '&':
        if (_yychar == '&') {
            yyinp();
452
            tok->f.kind = T_AMPER_AMPER;
con's avatar
con committed
453 454
        } else if (_yychar == '=') {
            yyinp();
455
            tok->f.kind = T_AMPER_EQUAL;
con's avatar
con committed
456
        } else {
457
            tok->f.kind = T_AMPER;
con's avatar
con committed
458 459 460 461 462 463
        }
        break;

    case '|':
        if (_yychar == '|') {
            yyinp();
464
            tok->f.kind = T_PIPE_PIPE;
con's avatar
con committed
465 466
        } else if (_yychar == '=') {
            yyinp();
467
            tok->f.kind = T_PIPE_EQUAL;
con's avatar
con committed
468
        } else {
469
            tok->f.kind = T_PIPE;
con's avatar
con committed
470 471 472 473 474 475
        }
        break;

    case '~':
        if (_yychar == '=') {
            yyinp();
476
            tok->f.kind = T_TILDE_EQUAL;
con's avatar
con committed
477
        } else {
478
            tok->f.kind = T_TILDE;
con's avatar
con committed
479 480 481 482 483 484
        }
        break;

    case '!':
        if (_yychar == '=') {
            yyinp();
485
            tok->f.kind = T_EXCLAIM_EQUAL;
con's avatar
con committed
486
        } else {
487
            tok->f.kind = T_EXCLAIM;
con's avatar
con committed
488 489 490 491 492 493
        }
        break;

    case '=':
        if (_yychar == '=') {
            yyinp();
494
            tok->f.kind = T_EQUAL_EQUAL;
con's avatar
con committed
495
        } else {
496
            tok->f.kind = T_EQUAL;
con's avatar
con committed
497 498 499 500
        }
        break;

    case '<':
501
        if (f._scanAngleStringLiteralTokens) {
con's avatar
con committed
502 503 504 505 506 507 508 509
            const char *yytext = _currentChar;
            while (_yychar && _yychar != '>')
                yyinp();
            int yylen = _currentChar - yytext;
            // ### assert(_yychar == '>');
            if (_yychar == '>')
                yyinp();
            if (control())
510
                tok->string = control()->stringLiteral(yytext, yylen);
511
            tok->f.kind = T_ANGLE_STRING_LITERAL;
con's avatar
con committed
512 513 514 515
        } else if (_yychar == '<') {
            yyinp();
            if (_yychar == '=') {
                yyinp();
516
                tok->f.kind = T_LESS_LESS_EQUAL;
con's avatar
con committed
517
            } else
518
                tok->f.kind = T_LESS_LESS;
con's avatar
con committed
519 520
        } else if (_yychar == '=') {
            yyinp();
521
            tok->f.kind = T_LESS_EQUAL;
con's avatar
con committed
522
        } else {
523
            tok->f.kind = T_LESS;
con's avatar
con committed
524 525 526 527 528 529 530 531
        }
        break;

    case '>':
        if (_yychar == '>') {
            yyinp();
            if (_yychar == '=') {
                yyinp();
532
                tok->f.kind = T_GREATER_GREATER_EQUAL;
con's avatar
con committed
533
            } else
534 535
                tok->f.kind = T_LESS_LESS;
            tok->f.kind = T_GREATER_GREATER;
con's avatar
con committed
536 537
        } else if (_yychar == '=') {
            yyinp();
538
            tok->f.kind = T_GREATER_EQUAL;
con's avatar
con committed
539
        } else {
540
            tok->f.kind = T_GREATER;
con's avatar
con committed
541 542 543 544
        }
        break;

    case ',':
545
        tok->f.kind = T_COMMA;
con's avatar
con committed
546 547
        break;

548
    default: {
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
        if (f._objCEnabled) {
            if (ch == '@' && _yychar >= 'a' && _yychar <= 'z') {
                const char *yytext = _currentChar;

                do {
                    yyinp();
                    if (! (isalnum(_yychar) || _yychar == '_' || _yychar == '$'))
                        break;
                } while (_yychar);

                const int yylen = _currentChar - yytext;
                tok->f.kind = classifyObjCAtKeyword(yytext, yylen);
                break;
            } else if (ch == '@' && _yychar == '"') {
                yyinp();
564 565 566 567
                scanStringLiteral(tok, '"');
                break;
            }
        }
568

569
        if (ch == 'L' || ch == 'u' || ch == 'U' || ch == 'R') {
570 571 572
            // Either a literal or still an identifier.
            if (_yychar == '"') {
                yyinp();
573 574 575 576
                if (ch == 'R')
                    scanRawStringLiteral(tok);
                else
                    scanStringLiteral(tok, ch);
577 578 579
            } else if (_yychar == '\'') {
                yyinp();
                scanCharLiteral(tok, ch);
580 581 582 583 584 585 586 587
            } else if (ch != 'R' && _yychar == 'R') {
                yyinp();
                if (_yychar == '"') {
                    yyinp();
                    scanRawStringLiteral(tok, ch);
                } else {
                    scanIdentifier(tok, 1);
                }
588
            } else if (ch == 'u' && _yychar == '8') {
589 590
                yyinp();
                if (_yychar == '"') {
591 592
                    yyinp();
                    scanStringLiteral(tok, '8');
593
                } else if (_yychar == '\'') {
594 595
                    yyinp();
                    scanCharLiteral(tok, '8');
596 597 598 599 600 601 602 603
                } else if (_yychar == 'R') {
                    yyinp();
                    if (_yychar == '"') {
                        yyinp();
                        scanRawStringLiteral(tok, '8');
                    } else {
                        scanIdentifier(tok, 2);
                    }
604
                } else {
605
                    scanIdentifier(tok, 1);
606
                }
607 608
            } else {
                scanIdentifier(tok);
609
            }
610 611 612 613 614 615
        } else if (std::isalpha(ch) || ch == '_' || ch == '$') {
            scanIdentifier(tok);
        } else if (std::isdigit(ch)) {
            scanNumericLiteral(tok);
        } else {
            tok->f.kind = T_ERROR;
616
        }
617 618
        break;
    } // default
619

620 621
    } // switch
}
con's avatar
con committed
622

623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
{
    scanUntilQuote(tok, '"');

    if (hint == 'L')
        tok->f.kind = T_WIDE_STRING_LITERAL;
    else if (hint == 'U')
        tok->f.kind = T_UTF32_STRING_LITERAL;
    else if (hint == 'u')
        tok->f.kind = T_UTF16_STRING_LITERAL;
    else if (hint == '8')
        tok->f.kind = T_UTF8_STRING_LITERAL;
    else if (hint == '@')
        tok->f.kind = T_AT_STRING_LITERAL;
    else
        tok->f.kind = T_STRING_LITERAL;
}
con's avatar
con committed
640

641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
{
    const char *yytext = _currentChar;

    int delimLength = -1;
    const char *closingDelimCandidate = 0;
    while (_yychar) {
        if (_yychar == '(' && delimLength == -1) {
            delimLength = _currentChar - yytext;
            yyinp();
        } else if (_yychar == ')') {
            yyinp();
            if (delimLength == -1)
                break;
            closingDelimCandidate = _currentChar;
        } else {
            if (delimLength == -1) {
                if (_yychar == '\\' || std::isspace(_yychar))
                    break;
                yyinp();
            } else {
                if (!closingDelimCandidate) {
                    yyinp();
                } else {
                    if (_yychar == '"') {
                        if (delimLength == _currentChar - closingDelimCandidate) {
                            // Got a matching closing delimiter.
                            break;
                        }
                    }

                    // Make sure this continues to be a valid candidate.
                    if (_yychar != *(yytext + (_currentChar - closingDelimCandidate)))
                        closingDelimCandidate = 0;

                    yyinp();
                }
            }
        }
    }

    int yylen = _currentChar - yytext;

    if (_yychar == '"')
        yyinp();

    if (control())
        tok->string = control()->stringLiteral(yytext, yylen);

    if (hint == 'L')
        tok->f.kind = T_RAW_WIDE_STRING_LITERAL;
    else if (hint == 'U')
        tok->f.kind = T_RAW_UTF32_STRING_LITERAL;
    else if (hint == 'u')
        tok->f.kind = T_RAW_UTF16_STRING_LITERAL;
    else if (hint == '8')
        tok->f.kind = T_RAW_UTF8_STRING_LITERAL;
    else
        tok->f.kind = T_RAW_STRING_LITERAL;
}

702 703 704 705 706 707 708 709 710 711 712 713 714
void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
{
    scanUntilQuote(tok, '\'');

    if (hint == 'L')
        tok->f.kind = T_WIDE_CHAR_LITERAL;
    else if (hint == 'U')
        tok->f.kind = T_UTF32_CHAR_LITERAL;
    else if (hint == 'u')
        tok->f.kind = T_UTF16_CHAR_LITERAL;
    else
        tok->f.kind = T_CHAR_LITERAL;
}
con's avatar
con committed
715

716 717 718
void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
{
    assert(quote == '"' || quote == '\'');
con's avatar
con committed
719

720
    const char *yytext = _currentChar;
721 722 723
    while (_yychar
           && _yychar != quote
           && _yychar != '\n') {
724 725 726 727 728
        if (_yychar != '\\')
            yyinp();
        else {
            yyinp(); // skip `\\'
            if (_yychar)
con's avatar
con committed
729
                yyinp();
730 731 732
        }
    }
    int yylen = _currentChar - yytext;
con's avatar
con committed
733

734 735
    if (_yychar == quote)
        yyinp();
736

737 738 739
    if (control())
        tok->string = control()->stringLiteral(yytext, yylen);
}
740

741 742 743 744 745 746 747 748 749
void Lexer::scanNumericLiteral(Token *tok)
{
    const char *yytext = _currentChar - 1;
    while (_yychar) {
        if (_yychar == 'e' || _yychar == 'E') {
            yyinp();
            if (_yychar == '-' || _yychar == '+') {
                yyinp();
                // ### assert(std::isdigit(_yychar));
con's avatar
con committed
750
            }
751 752
        } else if (std::isalnum(_yychar) || _yychar == '.') {
            yyinp();
con's avatar
con committed
753 754 755
        } else {
            break;
        }
756 757
    }
    int yylen = _currentChar - yytext;
con's avatar
con committed
758

759 760 761 762
    tok->f.kind = T_NUMERIC_LITERAL;

    if (control())
        tok->number = control()->numericLiteral(yytext, yylen);
con's avatar
con committed
763 764
}

765
void Lexer::scanIdentifier(Token *tok, unsigned extraProcessedChars)
766
{
767
    const char *yytext = _currentChar - 1 - extraProcessedChars;
768 769 770 771 772 773 774 775 776 777
    while (std::isalnum(_yychar) || _yychar == '_' || _yychar == '$')
        yyinp();
    int yylen = _currentChar - yytext;
    if (f._scanKeywords)
        tok->f.kind = classify(yytext, yylen, f._qtMocRunEnabled, f._cxx0xEnabled);
    else
        tok->f.kind = T_IDENTIFIER;

    if (tok->f.kind == T_IDENTIFIER) {
        tok->f.kind = classifyOperator(yytext, yylen);
Roberto Raggi's avatar
Roberto Raggi committed
778

779 780 781 782
        if (control())
            tok->identifier = control()->identifier(yytext, yylen);
    }
}