tst_lexer.cpp 27.3 KB
Newer Older
1 2
/****************************************************************************
**
Eike Ziller's avatar
Eike Ziller committed
3 4
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing
5 6 7 8 9 10 11
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
Eike Ziller's avatar
Eike Ziller committed
12 13
** a written agreement between you and The Qt Company.  For licensing terms and
** conditions see http://www.qt.io/terms-conditions.  For further information
Eike Ziller's avatar
Eike Ziller committed
14
** use the contact form at http://www.qt.io/contact-us.
15 16 17
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
Eike Ziller's avatar
Eike Ziller committed
18 19 20 21 22 23
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file.  Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24
**
Eike Ziller's avatar
Eike Ziller committed
25 26
** In addition, as a special exception, The Qt Company gives you certain additional
** rights.  These rights are described in The Qt Company LGPL Exception
27 28 29 30
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
****************************************************************************/

31 32
#include "../cplusplus_global.h"

33 34 35
#include <cplusplus/Token.h>
#include <cplusplus/SimpleLexer.h>

36 37 38
#include <QtTest>
#include <QDebug>

39 40
//#define DEBUG_TOKENS

41
typedef QList<unsigned> TokenKindList;
42
typedef QByteArray _;
43

44
Q_DECLARE_METATYPE(TokenKindList)
Orgad Shaneh's avatar
Orgad Shaneh committed
45
Q_DECLARE_METATYPE(CPlusPlus::Tokens)
46

47 48 49 50 51 52 53
//TESTED_COMPONENT=src/libs/cplusplus
using namespace CPlusPlus;

class tst_SimpleLexer: public QObject
{
    Q_OBJECT

54 55 56
public:
    tst_SimpleLexer() : _state(0) {}

57 58 59 60 61 62 63
    enum TokenCompareFlag {
        CompareKind            = 1 << 1,
        CompareBytes           = 1 << 2,
        CompareBytesBegin      = 1 << 3,
        CompareBytesEnd        = 1 << 4,
        CompareUtf16Chars      = 1 << 5,
        CompareUtf16CharsBegin = 1 << 6,
64 65
        CompareUtf16CharsEnd   = 1 << 7,
        CompareUserDefinedLiteral = 1 << 8
66 67 68
    };
    Q_DECLARE_FLAGS(TokenCompareFlags, TokenCompareFlag)

69
private slots:
70 71
    void basic();
    void basic_data();
72 73
    void incremental();
    void incremental_data();
74 75 76 77
    void literals();
    void literals_data();
    void preprocessor();
    void preprocessor_data();
78

79 80
    void bytes_and_utf16chars();
    void bytes_and_utf16chars_data();
81 82
    void user_defined_literals();
    void user_defined_literals_data();
83 84 85
    void offsets();
    void offsets_data();

86
private:
Orgad Shaneh's avatar
Orgad Shaneh committed
87
    static Tokens toTokens(const TokenKindList &tokenKinds);
88

89
    void run(const QByteArray &source,
Orgad Shaneh's avatar
Orgad Shaneh committed
90
             const Tokens &expectedTokens,
91
             bool preserveState,
92
             TokenCompareFlags compareFlags,
93 94
             bool preprocessorMode = false,
             const LanguageFeatures &extraLanguageFeatures = LanguageFeatures());
95 96

    int _state;
97 98
};

99 100
Q_DECLARE_OPERATORS_FOR_FLAGS(tst_SimpleLexer::TokenCompareFlags)

Orgad Shaneh's avatar
Orgad Shaneh committed
101
Tokens tst_SimpleLexer::toTokens(const TokenKindList &tokenKinds)
102
{
Orgad Shaneh's avatar
Orgad Shaneh committed
103
    Tokens tokens;
104 105 106 107 108 109 110 111
    foreach (unsigned tokenKind, tokenKinds) {
        Token token;
        token.f.kind = tokenKind;
        tokens << token;
    }
    return tokens;
}

112
void tst_SimpleLexer::run(const QByteArray &source,
Orgad Shaneh's avatar
Orgad Shaneh committed
113
                          const Tokens &expectedTokens,
114
                          bool preserveState,
115
                          TokenCompareFlags compareFlags,
116 117
                          bool preprocessorMode,
                          const LanguageFeatures &extraLanguageFeatures)
118
{
119 120
    QVERIFY(compareFlags);

121
    SimpleLexer lexer;
122
    lexer.setPreprocessorMode(preprocessorMode);
123 124 125 126 127
    if (extraLanguageFeatures.flags) {
        LanguageFeatures languageFeatures = lexer.languageFeatures();
        languageFeatures.flags |= extraLanguageFeatures.flags;
        lexer.setLanguageFeatures(languageFeatures);
    }
Orgad Shaneh's avatar
Orgad Shaneh committed
128
    const Tokens tokens = lexer(source, preserveState ? _state : 0);
129 130
    if (preserveState)
        _state = lexer.state();
131 132

    int i = 0;
Orgad Shaneh's avatar
Orgad Shaneh committed
133 134
    for (; i < tokens.size(); ++i) {
        QVERIFY2(i < expectedTokens.size(), "More tokens than expected.");
135

Orgad Shaneh's avatar
Orgad Shaneh committed
136 137
        const Token token = tokens.at(i);
        const Token expectedToken = expectedTokens.at(i);
138 139 140
#ifdef DEBUG_TOKENS
        qDebug("Comparing (i=%d): \"%s\" \"%s\"", i,
               Token::name(token.kind()),
141
               Token::name(expectedToken.kind()));
142
#endif
143 144
        if (compareFlags & CompareKind)
            QCOMPARE(token.kind(), expectedToken.kind());
145 146 147 148 149 150 151 152 153 154 155 156 157 158

        if (compareFlags & CompareBytes)
            QCOMPARE(token.bytes(), expectedToken.bytes());
        if (compareFlags & CompareBytesBegin)
            QCOMPARE(token.bytesBegin(), expectedToken.bytesBegin());
        if (compareFlags & CompareBytesEnd)
            QCOMPARE(token.bytesEnd(), expectedToken.bytesEnd());

        if (compareFlags & CompareUtf16Chars)
            QCOMPARE(token.utf16chars(), expectedToken.utf16chars());
        if (compareFlags & CompareUtf16CharsBegin)
            QCOMPARE(token.utf16charsBegin(), expectedToken.utf16charsBegin());
        if (compareFlags & CompareUtf16CharsEnd)
            QCOMPARE(token.utf16charsEnd(), expectedToken.utf16charsEnd());
159 160
        if (compareFlags & CompareUserDefinedLiteral)
            QCOMPARE(token.userDefinedLiteral(), expectedToken.userDefinedLiteral());
161
    }
162 163 164 165

    QString msg = QLatin1String("Less tokens than expected: got %1, expected %2.");
    msg = msg.arg(i).arg(expectedTokens.size());
    QVERIFY2(i == expectedTokens.size(), msg.toUtf8().constData());
166 167
}

168 169 170
void tst_SimpleLexer::basic()
{
    QFETCH(QByteArray, source);
171
    QFETCH(TokenKindList, expectedTokenKindList);
172

Orgad Shaneh's avatar
Orgad Shaneh committed
173
    run(source, toTokens(expectedTokenKindList), false, CompareKind);
174 175
}

176
void tst_SimpleLexer::basic_data()
177 178
{
    QTest::addColumn<QByteArray>("source");
179
    QTest::addColumn<TokenKindList>("expectedTokenKindList");
180 181

    QByteArray source;
182
    TokenKindList expectedTokenKindList;
183 184

    source = "// comment";
185
    expectedTokenKindList = TokenKindList() << T_CPP_COMMENT;
186 187 188
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "//// comment";
189
    expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT;
190 191 192
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/// comment";
193
    expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT;
194 195 196
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "///< comment";
197
    expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT;
198 199 200
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "//! comment";
201
    expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT;
202 203 204
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "//!< comment";
205
    expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT;
206 207 208
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "///\n";
209
    expectedTokenKindList = TokenKindList() << T_CPP_DOXY_COMMENT;
210
    QTest::newRow(source) << source << expectedTokenKindList;
211 212 213

    source = "///\n"
             "int i;";
214
    expectedTokenKindList = TokenKindList()
215 216 217
        << T_CPP_DOXY_COMMENT
        << T_INT << T_IDENTIFIER << T_SEMICOLON;
    QTest::newRow(source) << source << expectedTokenKindList;
218 219

    source = "/* comment */\n";
220
    expectedTokenKindList = TokenKindList() << T_COMMENT;
221 222 223 224 225
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/* comment\n"
             "   comment\n"
             " */\n";
226
    expectedTokenKindList = TokenKindList() << T_COMMENT;
227 228 229
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/** comment */";
230
    expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT;
231 232 233
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/** comment */\n";
234
    expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT;
235 236 237
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/** comment */ int i;\n";
238
    expectedTokenKindList = TokenKindList()
239 240 241 242 243 244
        << T_DOXY_COMMENT << T_INT << T_IDENTIFIER << T_SEMICOLON;
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/**\n"
            "  * comment\n"
             " */\n";
245
    expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT;
246 247 248 249 250
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/*!\n"
            "  * comment\n"
             " */\n";
251
    expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT;
252 253 254 255 256
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "/*!\n"
             "    comment\n"
             "*/\n";
257
    expectedTokenKindList = TokenKindList() << T_DOXY_COMMENT;
258 259 260 261 262 263 264
    QTest::newRow(source) << source << expectedTokenKindList;

    source = "int i; /*!< first counter */\n"
             "int j; /**< second counter */\n"
             "int k; ///< third counter\n"
             "int l; //!< fourth counter\n"
             "       //!< more details...  ";
265
    expectedTokenKindList = TokenKindList()
266 267 268 269 270
        << T_INT << T_IDENTIFIER << T_SEMICOLON << T_DOXY_COMMENT
        << T_INT << T_IDENTIFIER << T_SEMICOLON << T_DOXY_COMMENT
        << T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT
        << T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT << T_CPP_DOXY_COMMENT;
    QTest::newRow(source) << source << expectedTokenKindList;
271

272
    source = "?" "?(?" "?)?" "?<?" "?>a?b:c";
273
    expectedTokenKindList = TokenKindList()
274 275 276
        << T_LBRACKET << T_RBRACKET << T_LBRACE << T_RBRACE
        << T_IDENTIFIER << T_QUESTION << T_IDENTIFIER << T_COLON << T_IDENTIFIER;
    QTest::newRow(source) << source << expectedTokenKindList;
277
}
278

279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
void tst_SimpleLexer::literals()
{
    QFETCH(QByteArray, source);
    QFETCH(TokenKindList, expectedTokenKindList);

    run(source, toTokens(expectedTokenKindList), false, CompareKind);
}

void tst_SimpleLexer::literals_data()
{
    QTest::addColumn<QByteArray>("source");
    QTest::addColumn<TokenKindList>("expectedTokenKindList");

    QByteArray source;
    TokenKindList expectedTokenKindList;

    source =
            "1.\n"
            "1.1\n"
            "1.23456789\n"
            ".1\n"
            ".3e8\n"
            ".3e8f\n"
            "1e1\n"
            "1E1\n"
            "-1e-1\n" // the first minus sign is a separate token!
            "1e-1\n"
            "1e+1\n"
            "1e1L\n"
            "1e1l\n"
            "1e1f\n"
            "1e1F\n"
            "23.45x"
            ".45x"
            ;
    expectedTokenKindList =
            TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_MINUS
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_ERROR << T_ERROR
                               ;
    QTest::newRow("float-literals") << source << expectedTokenKindList;

    source = // these are all the same
            "42\n"
            "0b101010u\n"
            "052ll\n"
            "0x2aL\n"
            "123FOO\n"
            "0xfOo\n"
            "33_\n"
            ;
    expectedTokenKindList =
            TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_ERROR << T_ERROR << T_ERROR
                               ;
    QTest::newRow("integer-literals") << source << expectedTokenKindList;
338

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
    source =
            "42ui64\n"
            "43UI64\n"
            "44Ui64\n"
            "45uI64\n"
            "46i64\n"
            "47I64\n"
            "0xffffui64\n"
            "0xfffeUi64\n"
            "0xfffdi64\n"
            "56ui\n"   // incomplete
            "56ui6\n"
            "57ui67\n" // wrong
            "58i67\n"
            ;
    expectedTokenKindList =
            TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_ERROR << T_ERROR
                               ;
    QTest::newRow("microsoft-suffix") << source << expectedTokenKindList;

363 364 365 366 367 368 369 370 371 372
    source =
            "R\"(raw text)\"\n"
            "R\"delimiter(raw text)delimiter\"\n"
            "R\"delimiter(\nraw text line1\nraw text line2\n)delimiter\"\n"
            ;
    expectedTokenKindList =
            TokenKindList() << T_RAW_STRING_LITERAL << T_RAW_STRING_LITERAL
                            << T_RAW_STRING_LITERAL
                               ;
    QTest::newRow("raw-string-literals") << source << expectedTokenKindList;
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
}

void tst_SimpleLexer::preprocessor()
{
    QFETCH(QByteArray, source);
    QFETCH(TokenKindList, expectedTokenKindList);

    run(source, toTokens(expectedTokenKindList), false, CompareKind, true);
}

void tst_SimpleLexer::preprocessor_data()
{
    QTest::addColumn<QByteArray>("source");
    QTest::addColumn<TokenKindList>("expectedTokenKindList");

    QByteArray source;
    TokenKindList expectedTokenKindList;

    source = // sad but true [2.10]
            "1\n"
            "1x.\n"
            "1.y\n"
            ".1_1.1.\n"
            "1e-\n"
            "01x1b2qWeRtty_Grumble+E-.\n"
            ;
    expectedTokenKindList =
            TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
                            << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL;
    QTest::newRow("pp-number") << source << expectedTokenKindList;
}

405 406 407
void tst_SimpleLexer::bytes_and_utf16chars()
{
    QFETCH(QByteArray, source);
Orgad Shaneh's avatar
Orgad Shaneh committed
408
    QFETCH(Tokens, expectedTokens);
409 410

    const TokenCompareFlags compareFlags = CompareKind | CompareBytes | CompareUtf16Chars;
Orgad Shaneh's avatar
Orgad Shaneh committed
411
    run(source, expectedTokens, false, compareFlags);
412 413
}

414 415
static Tokens createToken(unsigned kind, unsigned bytes, unsigned utf16chars,
                          bool userDefinedLiteral = false)
416 417 418 419 420
{
    Token t;
    t.f.kind = kind;
    t.f.bytes = bytes;
    t.f.utf16chars = utf16chars;
421
    t.f.userDefinedLiteral = userDefinedLiteral;
Orgad Shaneh's avatar
Orgad Shaneh committed
422
    return Tokens() << t;
423 424 425 426 427
}

void tst_SimpleLexer::bytes_and_utf16chars_data()
{
    QTest::addColumn<QByteArray>("source");
Orgad Shaneh's avatar
Orgad Shaneh committed
428
    QTest::addColumn<Tokens>("expectedTokens");
429 430 431 432 433 434 435 436 437

    typedef QByteArray _;

    // LATIN1 Identifier
    QTest::newRow("latin1 identifier")
        << _("var") << createToken(T_IDENTIFIER, 3, 3);

    // NON-LATIN1 identifier (code point with 2 UTF8 code units)
    QTest::newRow("non-latin1 identifier (2-byte code unit at start)")
438
        << _(UC_U00FC "_var") << createToken(T_IDENTIFIER, 6, 5);
439
    QTest::newRow("non-latin1 identifier (2-byte code unit in center)")
440
        << _("_v" UC_U00FC "r_") << createToken(T_IDENTIFIER, 6, 5);
441
    QTest::newRow("non-latin1 identifier (2-byte code unit at end)")
442
        << _("var_" UC_U00FC) << createToken(T_IDENTIFIER, 6, 5);
443
    QTest::newRow("non-latin1 identifier (2-byte code unit only)")
444
        << _(UC_U00FC) << createToken(T_IDENTIFIER, 2, 1);
445 446 447

    // NON-LATIN1 identifier (code point with 3 UTF8 code units)
    QTest::newRow("non-latin1 identifier (3-byte code unit at start)")
448
        << _(UC_U4E8C "_var") << createToken(T_IDENTIFIER, 7, 5);
449
    QTest::newRow("non-latin1 identifier (3-byte code unit in center)")
450
        << _("_v" UC_U4E8C "r_") << createToken(T_IDENTIFIER, 7, 5);
451
    QTest::newRow("non-latin1 identifier (3-byte code unit at end)")
452
        << _("var_" UC_U4E8C) << createToken(T_IDENTIFIER, 7, 5);
453
    QTest::newRow("non-latin1 identifier (3-byte code unit only)")
454
        << _(UC_U4E8C) << createToken(T_IDENTIFIER, 3, 1);
455 456 457

    // NON-LATIN1 identifier (code point with 4 UTF8 code units)
    QTest::newRow("non-latin1 identifier (4-byte code unit at start)")
458
        << _(UC_U10302 "_var") << createToken(T_IDENTIFIER, 8, 6);
459
    QTest::newRow("non-latin1 identifier (4-byte code unit in center)")
460
        << _("_v" UC_U10302 "r_") << createToken(T_IDENTIFIER, 8, 6);
461
    QTest::newRow("non-latin1 identifier (4-byte code unit at end)")
462
        << _("var_" UC_U10302) << createToken(T_IDENTIFIER, 8, 6);
463
    QTest::newRow("non-latin1 identifier (4-byte code unit only)")
464
        << _(UC_U10302) << createToken(T_IDENTIFIER, 4, 2);
465 466 467

    // NON-LATIN1 identifier (code points with several multi-byte UTF8 code units)
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units at start)")
468
        << _(UC_U00FC UC_U4E8C UC_U10302 "_var") << createToken(T_IDENTIFIER, 13, 8);
469
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units in center)")
470
        << _("_v" UC_U00FC UC_U4E8C UC_U10302 "r_") << createToken(T_IDENTIFIER, 13, 8);
471
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units at end)")
472
        << _("var_" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 13, 8);
473
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units only)")
474
        << _(UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 9, 4);
475 476 477 478 479 480 481

    // Comments
    QTest::newRow("ascii comment /* ... */")
        << _("/* hello world */") << createToken(T_COMMENT, 17, 17);
    QTest::newRow("latin1 comment //")
        << _("// hello world") << createToken(T_CPP_COMMENT, 14, 14);
    QTest::newRow("non-latin1 comment /* ... */ (1)")
482
        << _("/* " UC_U00FC UC_U4E8C UC_U10302 " */") << createToken(T_COMMENT, 15, 10);
483
    QTest::newRow("non-latin1 comment /* ... */ (2)")
484
        << _("/*" UC_U00FC UC_U4E8C UC_U10302 "*/") << createToken(T_COMMENT, 13, 8);
485
    QTest::newRow("non-latin1 comment // (1)")
486
        << _("// " UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 12, 7);
487
    QTest::newRow("non-latin1 comment // (2)")
488
        << _("//" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 11, 6);
489 490 491 492 493

    // String Literals
    QTest::newRow("latin1 string literal")
        << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7);
    QTest::newRow("non-latin1 string literal")
494
        << _("\"" UC_U00FC UC_U4E8C UC_U10302 "\"") << createToken(T_STRING_LITERAL, 11, 6);
495 496
}

497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
void tst_SimpleLexer::user_defined_literals()
{
    QFETCH(QByteArray, source);
    QFETCH(Tokens, expectedTokens);

    const TokenCompareFlags compareFlags = CompareKind | CompareBytes | CompareUtf16Chars | CompareUserDefinedLiteral;
    LanguageFeatures languageFeatures;
    languageFeatures.cxx11Enabled = true;
    run(source, expectedTokens, false, compareFlags, false, languageFeatures);
}

void tst_SimpleLexer::user_defined_literals_data()
{
    QTest::addColumn<QByteArray>("source");
    QTest::addColumn<Tokens>("expectedTokens");

    typedef QByteArray _;

    // String User-defined Literals
    QTest::newRow("latin1 string non-user-defined literal")
        << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7, false);
    QTest::newRow("latin1 string user-defined literal")
        << _("\"hello\"_udl") << createToken(T_STRING_LITERAL, 11, 11, true);

    // Numeric User-defined Literals
    QTest::newRow("numeric non user-defined literal with integer suffix")
        << _("11LL") << createToken(T_NUMERIC_LITERAL, 4, 4, false);
    QTest::newRow("numeric non user-defined literal with decimal part")
        << _("11.1") << createToken(T_NUMERIC_LITERAL, 4, 4, false);
    QTest::newRow("numeric non user-defined literal with float suffix")
        << _("11.1f") << createToken(T_NUMERIC_LITERAL, 5, 5, false);
    QTest::newRow("numeric user-defined literal without decimal part")
        << _("11_udl") << createToken(T_NUMERIC_LITERAL, 6, 6, true);
    QTest::newRow("numeric user-defined literal with decimal part")
        << _("11.1_udl") << createToken(T_NUMERIC_LITERAL, 8, 8, true);
}

534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
static Token createToken(unsigned kind, unsigned byteOffset, unsigned bytes,
                         unsigned utf16charsOffset, unsigned utf16chars)
{
    Token t;
    t.f.kind = kind;
    t.byteOffset = byteOffset;
    t.f.bytes = bytes;
    t.utf16charOffset = utf16charsOffset;
    t.f.utf16chars = utf16chars;
    return t;
}

void tst_SimpleLexer::offsets()
{
    QFETCH(QByteArray, source);
Orgad Shaneh's avatar
Orgad Shaneh committed
549
    QFETCH(Tokens, expectedTokens);
550 551 552 553 554 555 556

    const TokenCompareFlags compareFlags = CompareKind
            | CompareBytesBegin
            | CompareBytesEnd
            | CompareUtf16CharsBegin
            | CompareUtf16CharsEnd
            ;
Orgad Shaneh's avatar
Orgad Shaneh committed
557
    run(source, expectedTokens, false, compareFlags);
558 559 560 561 562
}

void tst_SimpleLexer::offsets_data()
{
    QTest::addColumn<QByteArray>("source");
Orgad Shaneh's avatar
Orgad Shaneh committed
563
    QTest::addColumn<Tokens>("expectedTokens");
564 565 566 567 568

    typedef QByteArray _;

    // LATIN1 Identifier
    QTest::newRow("latin1 identifiers")
Orgad Shaneh's avatar
Orgad Shaneh committed
569
        << _("var var") << (Tokens()
570 571 572 573 574 575
            << createToken(T_IDENTIFIER, 0, 3, 0, 3)
            << createToken(T_IDENTIFIER, 4, 3, 4, 3)
        );

    // NON-LATIN1 identifier
    QTest::newRow("non-latin1 identifiers 1")
Orgad Shaneh's avatar
Orgad Shaneh committed
576
        << _("var_" UC_U00FC " var_" UC_U00FC) << (Tokens()
577 578 579 580
            << createToken(T_IDENTIFIER, 0, 6, 0, 5)
            << createToken(T_IDENTIFIER, 7, 6, 6, 5)
        );
    QTest::newRow("non-latin1 identifiers 2")
Orgad Shaneh's avatar
Orgad Shaneh committed
581
        << _(UC_U00FC UC_U4E8C UC_U10302 " " UC_U00FC UC_U4E8C UC_U10302) << (Tokens()
582 583 584 585 586
            << createToken(T_IDENTIFIER, 0, 9, 0, 4)
            << createToken(T_IDENTIFIER, 10, 9, 5, 4)
        );

    QTest::newRow("non-latin1 identifiers 3")   // first code unit on line: <bytes> / <utf16char>
587
        << _("class v" UC_U00FC UC_U4E8C UC_U10302 "\n"  //  0 / 0
588 589
             "{\n"                              // 17 / 12
             "public:\n"                        // 19 / 14
590
             "    v" UC_U00FC UC_U4E8C UC_U10302 "();\n" // 27 / 22
Orgad Shaneh's avatar
Orgad Shaneh committed
591
             "};\n") << (Tokens()         // 45 / 35
592 593 594 595 596 597 598 599 600 601 602 603
            << createToken(T_CLASS, 0, 5, 0, 5)         // class
            << createToken(T_IDENTIFIER, 6, 10, 6, 5)   // non-latin1 id
            << createToken(T_LBRACE, 17, 1, 12, 1)      // {
            << createToken(T_PUBLIC, 19, 6, 14, 6)      // public
            << createToken(T_COLON, 25, 1, 20, 1)       // :
            << createToken(T_IDENTIFIER, 31, 10, 26, 5) // id
            << createToken(T_LPAREN, 41, 1, 31, 1)      // (
            << createToken(T_RPAREN, 42, 1, 32, 1)      // )
            << createToken(T_SEMICOLON, 43, 1, 33, 1)   // ;
            << createToken(T_RBRACE, 45, 1, 35, 1)      // }
            << createToken(T_SEMICOLON, 46, 1, 36, 1)   // ;
        );
604 605
}

606 607 608
void tst_SimpleLexer::incremental()
{
    QFETCH(QByteArray, source);
609
    QFETCH(TokenKindList, expectedTokenKindList);
610

Orgad Shaneh's avatar
Orgad Shaneh committed
611
    run(source, toTokens(expectedTokenKindList), true, CompareKind);
612 613 614 615 616
}

void tst_SimpleLexer::incremental_data()
{
    QTest::addColumn<QByteArray>("source");
617
    QTest::addColumn<TokenKindList>("expectedTokenKindList");
618 619 620

    QTest::newRow("simple_string_literal")
            << _("\"foo\"")
621
            << (TokenKindList() << T_STRING_LITERAL);
622 623 624

    QTest::newRow("unterminated_string_literal")
            << _("\"foo")
625
            << (TokenKindList() << T_STRING_LITERAL);
626 627 628

    QTest::newRow("escaped_string_literal_1")
            << _("\"foo \\")
629
            << (TokenKindList() << T_STRING_LITERAL);
630 631 632

    QTest::newRow("escaped_string_literal_2")
            << _("bar\"")
633
            << (TokenKindList() << T_STRING_LITERAL);
634 635 636

    QTest::newRow("escaped_string_literal_with_spaces_1")
            << _("\"foo \\    ")
637
            << (TokenKindList() << T_STRING_LITERAL);
638 639 640

    QTest::newRow("escaped_string_literal_with_spaces_2")
            << _("bar\"")
641
            << (TokenKindList() << T_STRING_LITERAL);
642 643 644

    QTest::newRow("double_escaped_string_literal_1")
            << _("\"foo \\")
645
            << (TokenKindList() << T_STRING_LITERAL);
646 647 648

    QTest::newRow("double_escaped_string_literal_2")
            << _("bar \\")
649
            << (TokenKindList() << T_STRING_LITERAL);
650 651 652

    QTest::newRow("double_escaped_string_literal_3")
            << _("baz\"")
653
            << (TokenKindList() << T_STRING_LITERAL);
654 655 656

    QTest::newRow("unterminated_escaped_string_literal")
            << _("\"foo \\\n\nbar\"")
657
            << (TokenKindList() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL);
658

659 660
    QTest::newRow("escaped_string_literal_with_newline_1")
            << _("\"foo \\")
661
            << (TokenKindList() << T_STRING_LITERAL);
662 663 664

    QTest::newRow("escaped_string_literal_with_newline_2")
            << _("")
665
            << TokenKindList();
666 667 668

    QTest::newRow("escaped_string_literal_with_newline_3")
            << _("bar")
669
            << (TokenKindList() << T_IDENTIFIER);
670

671 672
    QTest::newRow("escaped_string_literal_with_space_and_newline_single")
            << _("\"foo \\   \n   bar\"")
673
            << (TokenKindList() << T_STRING_LITERAL);
674 675 676

    QTest::newRow("escaped_string_literal_with_space_and_newline_1")
            << _("\"foo \\   \n   ")
677
            << (TokenKindList() << T_STRING_LITERAL);
678 679

    QTest::newRow("escaped_string_literal_with_space_and_newline_2")
680
            << _("bar")
681
            << (TokenKindList() << T_IDENTIFIER);
682 683 684

    QTest::newRow("token_after_escaped_string_literal_1")
            << _("\"foo \\")
685
            << (TokenKindList() << T_STRING_LITERAL);
686 687 688

    QTest::newRow("token_after_escaped_string_literal_2")
            << _("bar\";")
689
            << (TokenKindList() << T_STRING_LITERAL << T_SEMICOLON);
690 691 692

    QTest::newRow("simple_cpp_comment")
            << _("//foo")
693
            << (TokenKindList() << T_CPP_COMMENT);
694 695 696

    QTest::newRow("escaped_cpp_comment_1")
            << _("//foo \\")
697
            << (TokenKindList() << T_CPP_COMMENT);
698 699 700

    QTest::newRow("escaped_cpp_comment_2")
            << _("bar")
701
            << (TokenKindList() << T_CPP_COMMENT);
702 703 704

    QTest::newRow("escaped_cpp_comment_with_spaces_1")
            << _("//foo \\    ")
705
            << (TokenKindList() << T_CPP_COMMENT);
706 707 708

    QTest::newRow("escaped_cpp_comment_with_spaces_2")
            << _("bar")
709
            << (TokenKindList() << T_CPP_COMMENT);
710 711 712

    QTest::newRow("double_escaped_cpp_comment_1")
            << _("//foo \\")
713
            << (TokenKindList() << T_CPP_COMMENT);
714 715 716

    QTest::newRow("double_escaped_cpp_comment_2")
            << _("bar \\")
717
            << (TokenKindList() << T_CPP_COMMENT);
718 719 720

    QTest::newRow("double_escaped_cpp_comment_3")
            << _("baz")
721
            << (TokenKindList() << T_CPP_COMMENT);
722 723 724

    QTest::newRow("escaped_cpp_comment_with_newline")
            << _("//foo \\\n\nbar")
725
            << (TokenKindList() << T_CPP_COMMENT << T_IDENTIFIER);
726

727 728
    QTest::newRow("escaped_cpp_comment_with_newline_1")
            << _("//foo \\")
729
            << (TokenKindList() << T_CPP_COMMENT);
730 731 732

    QTest::newRow("escaped_cpp_comment_with_newline_2")
            << _("")
733
            << TokenKindList();
734 735 736

    QTest::newRow("escaped_cpp_comment_with_newline_3")
            << _("bar")
737
            << (TokenKindList() << T_IDENTIFIER);
738

739 740
    QTest::newRow("escaped_cpp_comment_with_space_and_newline_single")
            << _("//foo \\   \n   bar")
741
            << (TokenKindList() << T_CPP_COMMENT);
742 743 744

    QTest::newRow("escaped_cpp_comment_with_space_and_newline_1")
            << _("//foo \\   \n   ")
745
            << (TokenKindList() << T_CPP_COMMENT);
746 747 748

    QTest::newRow("escaped_cpp_comment_with_space_and_newline_2")
            << _("bar")
749
            << (TokenKindList() << T_IDENTIFIER);
750 751
}

752 753
QTEST_APPLESS_MAIN(tst_SimpleLexer)
#include "tst_lexer.moc"