pp-engine.cpp 71.7 KB
Newer Older
hjk's avatar
hjk committed
1
/****************************************************************************
2
**
3
** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
hjk's avatar
hjk committed
4
** Contact: http://www.qt-project.org/legal
con's avatar
con committed
5
**
hjk's avatar
hjk committed
6
** This file is part of Qt Creator.
con's avatar
con committed
7
**
hjk's avatar
hjk committed
8
9
10
11
12
13
14
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia.  For licensing terms and
** conditions see http://qt.digia.com/licensing.  For further information
** use the contact form at http://qt.digia.com/contact-us.
15
**
16
** GNU Lesser General Public License Usage
hjk's avatar
hjk committed
17
18
19
20
21
22
23
24
25
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file.  Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights.  These rights are described in the Digia Qt LGPL Exception
con's avatar
con committed
26
27
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
hjk's avatar
hjk committed
28
****************************************************************************/
con's avatar
con committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/*
  Copyright 2005 Roberto Raggi <roberto@kdevelop.org>

  Permission to use, copy, modify, distribute, and sell this software and its
  documentation for any purpose is hereby granted without fee, provided that
  the above copyright notice appear in all copies and that both that
  copyright notice and this permission notice appear in supporting
  documentation.

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  KDEVELOP TEAM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "pp.h"
50
#include "pp-cctype.h"
hjk's avatar
hjk committed
51

52
53
54
55
#include <cplusplus/Control.h>
#include <cplusplus/Lexer.h>
#include <cplusplus/Token.h>
#include <cplusplus/Literals.h>
56

hjk's avatar
hjk committed
57
#include <QDebug>
58
#include <QList>
59
60
#include <QDate>
#include <QTime>
61
62
#include <QPair>

63
#include <cctype>
64
65
#include <list>
#include <algorithm>
66
67
68
69
70
71
72

#define NO_DEBUG

#ifndef NO_DEBUG
#  include <iostream>
#endif // NO_DEBUG

73
74
#include <deque>

75
76
namespace {
enum {
77
    MAX_TOKEN_EXPANSION_COUNT = 5000,
78
    MAX_TOKEN_BUFFER_DEPTH = 16000 // for when macros are using some kind of right-folding, this is the list of "delayed" buffers waiting to be expanded after the current one.
79
80
81
82
};
}

namespace {
83
/// RAII object to save a value, and restore it when the scope is left.
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
template<typename _T>
class ScopedSwap
{
    _T oldValue;
    _T &ref;

public:
    ScopedSwap(_T &var, _T newValue)
        : oldValue(newValue)
        , ref(var)
    {
        std::swap(ref, oldValue);
    }

    ~ScopedSwap()
    {
        std::swap(ref, oldValue);
    }
};
typedef ScopedSwap<bool> ScopedBoolSwap;
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160

static bool same(const char *a, const char *b, int size)
{
    return strncmp(a, b, size) == 0;
}

static bool isQtReservedWord(const char *name, int size)
{
    if (size < 4)
        return false;

    const char c = name[0];
    if (c == 'Q') {
        if (name[1] == '_') {
            name += 2;
            size -= 2;
            switch (size) {
            case 1:
                return name[2] == 'D' || name[2] == 'Q';
            case 4:
                return same(name, "SLOT", size) || same(name, "EMIT", size);
            case 5:
                return same(name, "SLOTS", size) || same(name, "ENUMS", size)
                        || same(name, "FLAGS", size);
            case 6:
                return same(name, "SIGNAL", size);
            case 7:
                return same(name, "SIGNALS", size) || same(name, "FOREACH", size);
            case 8:
                return same(name, "PROPERTY", size);
            case 9:
                return same(name, "INVOKABLE", size);
            case 10:
                return same(name, "INTERFACES", size);
            case 16:
                return same(name, "PRIVATE_PROPERTY", size);
            }
        }
        return false;
    }

    if (c == 'S')
        return (size == 6 && same(name, "SIGNAL", size)) || (size == 4 && same(name, "SLOT", size));

    if (c == 's')
        return (size == 7 && same(name, "signals", size)) || (size == 5 && same(name, "slots", size));

    if (c == 'f')
        return size == 7 && same(name, "foreach", size);

    if (c == 'e')
        return size == 4 && same(name, "emit", size);

    return false;
}


161
} // anonymous namespace
con's avatar
con committed
162

Roberto Raggi's avatar
Roberto Raggi committed
163
164
namespace CPlusPlus {

165
namespace Internal {
166
167
168
169
170
171
172
173
/// Buffers tokens for the Preprocessor::lex() to read next. Do not use  this
/// class directly, but use Preprocessor::State::pushTokenBuffer .
///
/// New tokens are added when undoing look-ahead, or after expanding a macro.
/// When macro expansion happened, the macro is passed in, and blocked until
/// all tokens generated by it (and by subsequent expansion of those generated
/// tokens) are read from the buffer. See Preprocessor::lex() for details on
/// exactly when the buffer (and subsequently a blocking macro) is removed.
174
175
struct TokenBuffer
{
176
    std::deque<PPToken> tokens;
177
178
179
    const Macro *macro;
    TokenBuffer *next;

180
181
    TokenBuffer(const PPToken *start, const PPToken *end, const Macro *macro, TokenBuffer *next)
        : tokens(start, end), macro(macro), next(next)
182
183
    {}

184
185
186
187
    bool isBlocked(const Macro *macro) const {
        if (!macro)
            return false;

188
        for (const TokenBuffer *it = this; it; it = it->next)
189
190
191
            if (it->macro)
                if (it->macro == macro || (it->macro->name() == macro->name()))
                    return true;
192
193
194
195
        return false;
    }
};

Roberto Raggi's avatar
Roberto Raggi committed
196
197
198
199
struct Value
{
    enum Kind {
        Kind_Long,
200
        Kind_ULong
Roberto Raggi's avatar
Roberto Raggi committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
    };

    Kind kind;

    union {
        long l;
        unsigned long ul;
    };


    Value()
        : kind(Kind_Long), l(0)
    { }

    inline bool is_ulong () const
    { return kind == Kind_ULong; }

    inline void set_ulong (unsigned long v)
    {
        ul = v;
        kind = Kind_ULong;
    }

    inline void set_long (long v)
    {
        l = v;
        kind = Kind_Long;
    }

    inline bool is_zero () const
    { return l == 0; }

#define PP_DEFINE_BIN_OP(name, op) \
    inline Value operator op(const Value &other) const \
    { \
        Value v = *this; \
        if (v.is_ulong () || other.is_ulong ()) \
            v.set_ulong (v.ul op other.ul); \
        else \
            v.set_long (v.l op other.l); \
        return v; \
    }

    PP_DEFINE_BIN_OP(op_add, +)
    PP_DEFINE_BIN_OP(op_sub, -)
    PP_DEFINE_BIN_OP(op_mult, *)
    PP_DEFINE_BIN_OP(op_div, /)
    PP_DEFINE_BIN_OP(op_mod, %)
    PP_DEFINE_BIN_OP(op_lhs, <<)
    PP_DEFINE_BIN_OP(op_rhs, >>)
    PP_DEFINE_BIN_OP(op_lt, <)
    PP_DEFINE_BIN_OP(op_gt, >)
    PP_DEFINE_BIN_OP(op_le, <=)
    PP_DEFINE_BIN_OP(op_ge, >=)
    PP_DEFINE_BIN_OP(op_eq, ==)
    PP_DEFINE_BIN_OP(op_ne, !=)
    PP_DEFINE_BIN_OP(op_bit_and, &)
    PP_DEFINE_BIN_OP(op_bit_or, |)
    PP_DEFINE_BIN_OP(op_bit_xor, ^)
    PP_DEFINE_BIN_OP(op_and, &&)
    PP_DEFINE_BIN_OP(op_or, ||)

#undef PP_DEFINE_BIN_OP
};

266
} // namespace Internal
267
} // namespace CPlusPlus
Roberto Raggi's avatar
Roberto Raggi committed
268

con's avatar
con committed
269
using namespace CPlusPlus;
270
using namespace CPlusPlus::Internal;
Roberto Raggi's avatar
Roberto Raggi committed
271

con's avatar
con committed
272
273
namespace {

274
inline bool isContinuationToken(const PPToken &tk)
275
276
277
278
{
    return tk.isNot(T_EOF_SYMBOL) && (! tk.newline() || tk.joined());
}

279
Macro *macroDefinition(const ByteArrayRef &name,
280
281
                       unsigned bytesOffset,
                       unsigned utf16charsOffset,
282
283
284
                       unsigned line,
                       Environment *env,
                       Client *client)
Christian Kamm's avatar
Christian Kamm committed
285
286
287
288
{
    Macro *m = env->resolve(name);
    if (client) {
        if (m)
289
            client->passedMacroDefinitionCheck(bytesOffset, utf16charsOffset, line, *m);
Christian Kamm's avatar
Christian Kamm committed
290
        else
291
            client->failedMacroDefinitionCheck(bytesOffset, utf16charsOffset, name);
Christian Kamm's avatar
Christian Kamm committed
292
    }
293
    return m;
Christian Kamm's avatar
Christian Kamm committed
294
295
}

con's avatar
con committed
296
297
298
299
300
301
302
303
304
305
306
class RangeLexer
{
    const Token *first;
    const Token *last;
    Token trivial;

public:
    inline RangeLexer(const Token *first, const Token *last)
        : first(first), last(last)
    {
        // WARN: `last' must be a valid iterator.
Nikolai Kosjar's avatar
Nikolai Kosjar committed
307
        trivial.byteOffset = last->byteOffset;
308
        trivial.utf16charOffset = last->utf16charOffset;
con's avatar
con committed
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
    }

    inline operator bool() const
    { return first != last; }

    inline bool isValid() const
    { return first != last; }

    inline int size() const
    { return std::distance(first, last); }

    inline const Token *dot() const
    { return first; }

    inline const Token &operator*() const
    {
        if (first != last)
            return *first;

        return trivial;
    }

    inline const Token *operator->() const
    {
        if (first != last)
            return first;

        return &trivial;
    }

    inline RangeLexer &operator++()
    {
        ++first;
        return *this;
    }
};

class ExpressionEvaluator
{
    ExpressionEvaluator(const ExpressionEvaluator &other);
    void operator = (const ExpressionEvaluator &other);

public:
Christian Kamm's avatar
Christian Kamm committed
352
353
    ExpressionEvaluator(Client *client, Environment *env)
        : client(client), env(env), _lex(0)
con's avatar
con committed
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
    { }

    Value operator()(const Token *firstToken, const Token *lastToken,
                     const QByteArray &source)
    {
        this->source = source;
        const Value previousValue = switchValue(Value());
        RangeLexer tmp(firstToken, lastToken);
        RangeLexer *previousLex = _lex;
        _lex = &tmp;
        process_expression();
        _lex = previousLex;
        return switchValue(previousValue);
    }

protected:
    Value switchValue(const Value &value)
    {
        Value previousValue = _value;
        _value = value;
        return previousValue;
    }

    bool isTokenDefined() const
    {
        if ((*_lex)->isNot(T_IDENTIFIER))
            return false;
381
        const ByteArrayRef spell = tokenSpell();
con's avatar
con committed
382
383
384
385
386
        if (spell.size() != 7)
            return false;
        return spell == "defined";
    }

387
388
    const char *tokenPosition() const
    {
Nikolai Kosjar's avatar
Nikolai Kosjar committed
389
        return source.constData() + (*_lex)->byteOffset;
390
391
392
393
    }

    int tokenLength() const
    {
Nikolai Kosjar's avatar
Nikolai Kosjar committed
394
        return (*_lex)->f.bytes;
395
396
    }

397
    ByteArrayRef tokenSpell() const
con's avatar
con committed
398
    {
399
        return ByteArrayRef(tokenPosition(), tokenLength());
con's avatar
con committed
400
401
    }

402
403
    inline void process_expression()
    { process_constant_expression(); }
con's avatar
con committed
404

405
    void process_primary()
con's avatar
con committed
406
    {
407
        if ((*_lex)->is(T_NUMERIC_LITERAL)) {
408
409
410
411
            const char *spell = tokenPosition();
            int len = tokenLength();
            while (len) {
                const char ch = spell[len - 1];
412

413
                if (! (ch == 'u' || ch == 'U' || ch == 'l' || ch == 'L'))
414
                    break;
415
                --len;
416
417
            }

418
419
420
            const char *end = spell + len;
            char *vend = const_cast<char *>(end);
            _value.set_long(strtol(spell, &vend, 0));
con's avatar
con committed
421
422
423
424
            ++(*_lex);
        } else if (isTokenDefined()) {
            ++(*_lex);
            if ((*_lex)->is(T_IDENTIFIER)) {
425
                _value.set_long(macroDefinition(tokenSpell(),
Nikolai Kosjar's avatar
Nikolai Kosjar committed
426
                                                (*_lex)->byteOffset,
427
                                                (*_lex)->utf16charOffset,
428
429
                                                (*_lex)->lineno, env, client)
                                != 0);
con's avatar
con committed
430
431
432
433
                ++(*_lex);
            } else if ((*_lex)->is(T_LPAREN)) {
                ++(*_lex);
                if ((*_lex)->is(T_IDENTIFIER)) {
434
                    _value.set_long(macroDefinition(tokenSpell(),
Nikolai Kosjar's avatar
Nikolai Kosjar committed
435
                                                    (*_lex)->byteOffset,
436
                                                    (*_lex)->utf16charOffset,
437
438
439
                                                    (*_lex)->lineno,
                                                    env, client)
                                    != 0);
con's avatar
con committed
440
                    ++(*_lex);
441
                    if ((*_lex)->is(T_RPAREN))
con's avatar
con committed
442
443
444
445
446
447
448
449
450
451
452
453
454
                        ++(*_lex);
                }
            }
        } else if ((*_lex)->is(T_IDENTIFIER)) {
            _value.set_long(0);
            ++(*_lex);
        } else if ((*_lex)->is(T_MINUS)) {
            ++(*_lex);
            process_primary();
            _value.set_long(- _value.l);
        } else if ((*_lex)->is(T_PLUS)) {
            ++(*_lex);
            process_primary();
Roberto Raggi's avatar
Roberto Raggi committed
455
456
457
458
        } else if ((*_lex)->is(T_TILDE)) {
            ++(*_lex);
            process_primary();
            _value.set_long(~ _value.l);
con's avatar
con committed
459
460
461
462
463
464
465
466
467
468
469
470
        } else if ((*_lex)->is(T_EXCLAIM)) {
            ++(*_lex);
            process_primary();
            _value.set_long(_value.is_zero());
        } else if ((*_lex)->is(T_LPAREN)) {
            ++(*_lex);
            process_expression();
            if ((*_lex)->is(T_RPAREN))
                ++(*_lex);
        }
    }

471
    Value process_expression_with_operator_precedence(const Value &lhs, int minPrecedence)
con's avatar
con committed
472
    {
473
        Value result = lhs;
con's avatar
con committed
474

475
476
477
        while (precedence((*_lex)->kind()) >= minPrecedence) {
            const int oper = (*_lex)->kind();
            const int operPrecedence = precedence(oper);
con's avatar
con committed
478
479
            ++(*_lex);
            process_primary();
480
            Value rhs = _value;
con's avatar
con committed
481

482
            for (int LA_token_kind = (*_lex)->kind(), LA_precedence = precedence(LA_token_kind);
483
                    LA_precedence > operPrecedence && isBinaryOperator(LA_token_kind);
484
485
                    LA_token_kind = (*_lex)->kind(), LA_precedence = precedence(LA_token_kind)) {
                rhs = process_expression_with_operator_precedence(rhs, LA_precedence);
con's avatar
con committed
486
487
            }

488
            result = evaluate_expression(oper, result, rhs);
con's avatar
con committed
489
490
        }

491
        return result;
con's avatar
con committed
492
493
    }

494
    void process_constant_expression()
con's avatar
con committed
495
    {
496
497
        process_primary();
        _value = process_expression_with_operator_precedence(_value, precedence(T_PIPE_PIPE));
con's avatar
con committed
498

499
500
        if ((*_lex)->is(T_QUESTION)) {
            const Value cond = _value;
con's avatar
con committed
501
            ++(*_lex);
502
503
504
505
506
507
508
509
            process_constant_expression();
            Value left = _value, right;
            if ((*_lex)->is(T_COLON)) {
                ++(*_lex);
                process_constant_expression();
                right = _value;
            }
            _value = ! cond.is_zero() ? left : right;
con's avatar
con committed
510
511
512
        }
    }

513
514
private:
    inline int precedence(int tokenKind) const
con's avatar
con committed
515
    {
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
        switch (tokenKind) {
        case T_PIPE_PIPE:       return 0;
        case T_AMPER_AMPER:     return 1;
        case T_PIPE:            return 2;
        case T_CARET:           return 3;
        case T_AMPER:           return 4;
        case T_EQUAL_EQUAL:
        case T_EXCLAIM_EQUAL:   return 5;
        case T_GREATER:
        case T_LESS:
        case T_LESS_EQUAL:
        case T_GREATER_EQUAL:   return 6;
        case T_LESS_LESS:
        case T_GREATER_GREATER: return 7;
        case T_PLUS:
        case T_MINUS:           return 8;
        case T_STAR:
        case T_SLASH:
        case T_PERCENT:         return 9;

        default:
            return -1;
con's avatar
con committed
538
539
540
        }
    }

541
    static inline bool isBinaryOperator(int tokenKind)
con's avatar
con committed
542
    {
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
        switch (tokenKind) {
        case T_PIPE_PIPE:
        case T_AMPER_AMPER:
        case T_PIPE:
        case T_CARET:
        case T_AMPER:
        case T_EQUAL_EQUAL:
        case T_EXCLAIM_EQUAL:
        case T_GREATER:
        case T_LESS:
        case T_LESS_EQUAL:
        case T_GREATER_EQUAL:
        case T_LESS_LESS:
        case T_GREATER_GREATER:
        case T_PLUS:
        case T_MINUS:
        case T_STAR:
        case T_SLASH:
        case T_PERCENT:
            return true;
con's avatar
con committed
563

564
565
        default:
            return false;
con's avatar
con committed
566
567
568
        }
    }

569
    static inline Value evaluate_expression(int tokenKind, const Value &lhs, const Value &rhs)
con's avatar
con committed
570
    {
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
        switch (tokenKind) {
        case T_PIPE_PIPE:       return lhs || rhs;
        case T_AMPER_AMPER:     return lhs && rhs;
        case T_PIPE:            return lhs | rhs;
        case T_CARET:           return lhs ^ rhs;
        case T_AMPER:           return lhs & rhs;
        case T_EQUAL_EQUAL:     return lhs == rhs;
        case T_EXCLAIM_EQUAL:   return lhs != rhs;
        case T_GREATER:         return lhs > rhs;
        case T_LESS:            return lhs < rhs;
        case T_LESS_EQUAL:      return lhs <= rhs;
        case T_GREATER_EQUAL:   return lhs >= rhs;
        case T_LESS_LESS:       return lhs << rhs;
        case T_GREATER_GREATER: return lhs >> rhs;
        case T_PLUS:            return lhs + rhs;
        case T_MINUS:           return lhs - rhs;
        case T_STAR:            return lhs * rhs;
        case T_SLASH:           return rhs.is_zero() ? Value() : lhs / rhs;
        case T_PERCENT:         return rhs.is_zero() ? Value() : lhs % rhs;

        default:
            return Value();
con's avatar
con committed
593
594
595
596
        }
    }

private:
Christian Kamm's avatar
Christian Kamm committed
597
    Client *client;
con's avatar
con committed
598
599
600
601
602
603
604
605
    Environment *env;
    QByteArray source;
    RangeLexer *_lex;
    Value _value;
};

} // end of anonymous namespace

606
607
608
609
610
Preprocessor::State::State()
    : m_lexer(0)
    , m_skipping(MAX_LEVEL)
    , m_trueTest(MAX_LEVEL)
    , m_ifLevel(0)
611
    , m_tokenBufferDepth(0)
612
    , m_tokenBuffer(0)
613
    , m_inPreprocessorDirective(false)
614
    , m_markExpandedTokens(true)
615
616
    , m_noLines(false)
    , m_inCondition(false)
617
618
    , m_bytesOffsetRef(0)
    , m_utf16charsOffsetRef(0)
619
    , m_result(0)
620
    , m_lineRef(1)
621
    , m_currentExpansion(0)
622
    , m_includeGuardState(IncludeGuardState_BeforeIfndef)
con's avatar
con committed
623
{
624
625
    m_skipping[m_ifLevel] = false;
    m_trueTest[m_ifLevel] = false;
626
627

    m_expansionResult.reserve(256);
628
    setExpansionStatus(NotExpanding);
con's avatar
con committed
629
630
}

631
#define COMPRESS_TOKEN_BUFFER
632
633
634
635
636
void Preprocessor::State::pushTokenBuffer(const PPToken *start, const PPToken *end, const Macro *macro)
{
    if (m_tokenBufferDepth <= MAX_TOKEN_BUFFER_DEPTH) {
#ifdef COMPRESS_TOKEN_BUFFER
        if (macro || !m_tokenBuffer) {
637
638
            // If there is a new blocking macro (or no token buffer yet), create
            // one.
639
640
641
            m_tokenBuffer = new TokenBuffer(start, end, macro, m_tokenBuffer);
            ++m_tokenBufferDepth;
        } else {
642
643
            // No new blocking macro is passed in, so tokens can be prepended to
            // the existing buffer.
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
            m_tokenBuffer->tokens.insert(m_tokenBuffer->tokens.begin(), start, end);
        }
#else
        m_tokenBuffer = new TokenBuffer(start, end, macro, m_tokenBuffer);
        ++m_tokenBufferDepth;
#endif
    }
}

void Preprocessor::State::popTokenBuffer()
{
    TokenBuffer *r = m_tokenBuffer;
    m_tokenBuffer = m_tokenBuffer->next;
    delete r;

    if (m_tokenBufferDepth)
        --m_tokenBufferDepth;
}
662

663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
#ifdef DEBUG_INCLUDE_GUARD_TRACKING
QString Preprocessor::State::guardStateToString(int guardState)
{
    switch (guardState) {
    case IncludeGuardState_NoGuard: return QLatin1String("NoGuard");
    case IncludeGuardState_BeforeIfndef: return QLatin1String("BeforeIfndef");
    case IncludeGuardState_AfterIfndef: return QLatin1String("AfterIfndef");
    case IncludeGuardState_AfterDefine: return QLatin1String("AfterDefine");
    case IncludeGuardState_AfterEndif: return QLatin1String("AfterEndif");
    default: return QLatin1String("UNKNOWN");
    }
}
#endif // DEBUG_INCLUDE_GUARD_TRACKING

/**
 * @brief Update the include-guard tracking state.
 *
 * Include guards are the #ifdef/#define/#endif sequence typically found in
 * header files to prevent repeated definition of the contents of that header
 * file. So, for a file to have an include guard, it must look like this:
 * \code
 * #ifndef SOME_ID
 * ... all declarations/definitions/etc. go here ...
 * #endif
 * \endcode
 *
 * SOME_ID is an identifier, and is also the include guard. The only tokens
 * allowed before the #ifndef and after the #endif are comments (in any form)
 * or #line directives. The only other requirement is that a #define SOME_ID
 * occurs inside the #ifndef block, but not nested inside other
 * #if/#ifdef/#ifndef blocks.
 *
695
 * This function tracks the state, and is called from \c updateIncludeGuardState
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
 * which handles the most common no-op cases.
 *
 * @param hint indicates what kind of token is encountered in the input
 * @param idToken the identifier token that ought to be in the input
 *        after a #ifndef or a #define .
 */
void Preprocessor::State::updateIncludeGuardState_helper(IncludeGuardStateHint hint, PPToken *idToken)
{
#ifdef DEBUG_INCLUDE_GUARD_TRACKING
    int oldIncludeGuardState = m_includeGuardState;
    QByteArray oldIncludeGuardMacroName = m_includeGuardMacroName;
#endif // DEBUG_INCLUDE_GUARD_TRACKING

    switch (m_includeGuardState) {
    case IncludeGuardState_NoGuard:
        break;
    case IncludeGuardState_BeforeIfndef:
        if (hint == IncludeGuardStateHint_Ifndef
                && idToken && idToken->is(T_IDENTIFIER)) {
            m_includeGuardMacroName = idToken->asByteArrayRef().toByteArray();
            m_includeGuardState = IncludeGuardState_AfterIfndef;
        } else {
            m_includeGuardState = IncludeGuardState_NoGuard;
        }
        break;
    case IncludeGuardState_AfterIfndef:
        if (hint == IncludeGuardStateHint_Define
                && idToken && idToken->is(T_IDENTIFIER)
                && idToken->asByteArrayRef() == m_includeGuardMacroName)
            m_includeGuardState = IncludeGuardState_AfterDefine;
        break;
    case IncludeGuardState_AfterDefine:
        if (hint == IncludeGuardStateHint_Endif)
            m_includeGuardState = IncludeGuardState_AfterEndif;
        break;
    case IncludeGuardState_AfterEndif:
        m_includeGuardState = IncludeGuardState_NoGuard;
        m_includeGuardMacroName.clear();
        break;
    }

#ifdef DEBUG_INCLUDE_GUARD_TRACKING
    qDebug() << "***" << guardStateToString(oldIncludeGuardState)
             << "->" << guardStateToString(m_includeGuardState)
             << "hint:" << hint
             << "guard:" << oldIncludeGuardMacroName << "->" << m_includeGuardMacroName;
#endif // DEBUG_INCLUDE_GUARD_TRACKING
}

745
746
const QString Preprocessor::configurationFileName = QLatin1String("<configuration>");

747
748
749
Preprocessor::Preprocessor(Client *client, Environment *env)
    : m_client(client)
    , m_env(env)
750
    , m_expandFunctionlikeMacros(true)
751
    , m_keepComments(false)
con's avatar
con committed
752
753
754
{
}

755
QByteArray Preprocessor::run(const QString &fileName, const QString &source)
756
{
757
    return run(fileName, source.toUtf8());
758
759
}

760
761
762
763
QByteArray Preprocessor::run(const QString &fileName,
                             const QByteArray &source,
                             bool noLines,
                             bool markGeneratedTokens)
764
{
765
766
    m_scratchBuffer.clear();

767
    QByteArray preprocessed, includeGuardMacroName;
768
    preprocessed.reserve(source.size() * 2); // multiply by 2 because we insert #gen lines.
769
770
771
772
    preprocess(fileName, source, &preprocessed, &includeGuardMacroName, noLines,
               markGeneratedTokens, false);
    if (!includeGuardMacroName.isEmpty())
        m_client->markAsIncludeGuard(includeGuardMacroName);
773
774
775
    return preprocessed;
}

776
bool Preprocessor::expandFunctionlikeMacros() const
777
{
778
    return m_expandFunctionlikeMacros;
779
780
}

781
void Preprocessor::setExpandFunctionlikeMacros(bool expandMacros)
782
{
783
    m_expandFunctionlikeMacros = expandMacros;
784
785
}

786
787
bool Preprocessor::keepComments() const
{
788
    return m_keepComments;
789
790
791
792
}

void Preprocessor::setKeepComments(bool keepComments)
{
793
    m_keepComments = keepComments;
794
795
}

796
void Preprocessor::generateOutputLineMarker(unsigned lineno)
797
{
798
    maybeStartOutputLine();
799
    QByteArray &marker = currentOutputBuffer();
800
801
802
803
804
    marker.append("# ");
    marker.append(QByteArray::number(lineno));
    marker.append(" \"");
    marker.append(m_env->currentFileUtf8);
    marker.append("\"\n");
805
806
}

807
void Preprocessor::handleDefined(PPToken *tk)
808
{
809
    ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
810
811
812
813
814
815
816
    unsigned lineno = tk->lineno;
    lex(tk); // consume "defined" token
    bool lparenSeen = tk->is(T_LPAREN);
    if (lparenSeen)
        lex(tk); // consume "(" token
    if (tk->isNot(T_IDENTIFIER))
        //### TODO: generate error message
817
        return;
818
819
820
821
822
823
824
825
826
827
    PPToken idToken = *tk;
    do {
        lex(tk);
        if (tk->isNot(T_POUND_POUND))
            break;
        lex(tk);
        if (tk->is(T_IDENTIFIER))
            idToken = generateConcatenated(idToken, *tk);
        else
            break;
828
    } while (isContinuationToken(*tk));
829

830
831
832
833
834
835

    if (lparenSeen && tk->is(T_RPAREN))
        lex(tk);

    pushToken(tk);

836
    QByteArray result(1, '0');
837
    const ByteArrayRef macroName = idToken.asByteArrayRef();
838
839
840
    if (macroDefinition(macroName,
                        idToken.byteOffset + m_state.m_bytesOffsetRef,
                        idToken.utf16charOffset + m_state.m_utf16charsOffsetRef,
841
                        idToken.lineno, m_env, m_client)) {
842
        result[0] = '1';
843
    }
844
    *tk = generateToken(T_NUMERIC_LITERAL, result.constData(), result.size(), lineno, false);
845
846
}

847
void Preprocessor::pushToken(Preprocessor::PPToken *tk)
848
{
849
    const PPToken currentTokenBuffer[] = { *tk };
850
    m_state.pushTokenBuffer(currentTokenBuffer, currentTokenBuffer + 1, 0);
851
}
852

853
854
855
856
void Preprocessor::lex(PPToken *tk)
{
_Lagain:
    if (m_state.m_tokenBuffer) {
857
        // There is a token buffer, so read from there.
858
        if (m_state.m_tokenBuffer->tokens.empty()) {
859
            // The token buffer is empty, so pop it, and start over.
860
            m_state.popTokenBuffer();
861
862
863
864
            goto _Lagain;
        }
        *tk = m_state.m_tokenBuffer->tokens.front();
        m_state.m_tokenBuffer->tokens.pop_front();
865
866
867
868
869
        // The token buffer might now be empty. We leave it in, because the
        // token we just read might expand into new tokens, or might be a call
        // to the macro that generated this token. In either case, the macro
        // that generated the token still needs to be blocked (!), which is
        // recorded in the token buffer. Removing the blocked macro and the
870
        // empty token buffer happens the next time that this function is called.
871
    } else {
872
        // No token buffer, so have the lexer scan the next token.
873
874
875
        tk->setSource(m_state.m_source);
        m_state.m_lexer->scan(tk);
    }
876

877
878
879
    // Adjust token's line number in order to take into account the environment reference.
    tk->lineno += m_state.m_lineRef - 1;

880
881
882
883
884
885
886
887
888
_Lclassify:
    if (! m_state.m_inPreprocessorDirective) {
        if (tk->newline() && tk->is(T_POUND)) {
            handlePreprocessorDirective(tk);
            goto _Lclassify;
        } else if (tk->newline() && skipping()) {
            ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
            do {
                lex(tk);
889
            } while (isContinuationToken(*tk));
890
            goto _Lclassify;
Nikolai Kosjar's avatar
Nikolai Kosjar committed
891
        } else if (tk->is(T_IDENTIFIER) && !isQtReservedWord(tk->tokenStart(), tk->bytes())) {
892
            m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
Leandro Melo's avatar
Leandro Melo committed
893
            if (m_state.m_inCondition && tk->asByteArrayRef() == "defined") {
894
                handleDefined(tk);
Leandro Melo's avatar
Leandro Melo committed
895
            } else {
896
897
                synchronizeOutputLines(*tk);
                if (handleIdentifier(tk))
Leandro Melo's avatar
Leandro Melo committed
898
899
                    goto _Lagain;
            }
900
901
        } else if (tk->isNot(T_COMMENT) && tk->isNot(T_EOF_SYMBOL)) {
            m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
902
903
904
905
        }
    }
}

906
void Preprocessor::skipPreprocesorDirective(PPToken *tk)
907
{
908
909
    ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);

910
    while (isContinuationToken(*tk)) {
911
912
913
        if (tk->isComment()) {
            synchronizeOutputLines(*tk);
            enforceSpacing(*tk, true);
Nikolai Kosjar's avatar
Nikolai Kosjar committed
914
            currentOutputBuffer().append(tk->tokenStart(), tk->bytes());
915
        }
916
        lex(tk);
917
918
919
    }
}

920
bool Preprocessor::handleIdentifier(PPToken *tk)
921
{
922
923
    ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);

924
925
926
927
928
929
930
931
932
933
934
935
    static const QByteArray ppLine("__LINE__");
    static const QByteArray ppFile("__FILE__");
    static const QByteArray ppDate("__DATE__");
    static const QByteArray ppTime("__TIME__");

    ByteArrayRef macroNameRef = tk->asByteArrayRef();

    if (macroNameRef.size() == 8
            && macroNameRef[0] == '_'
            && macroNameRef[1] == '_') {
        PPToken newTk;
        if (macroNameRef == ppLine) {
936
            QByteArray txt = QByteArray::number(tk->lineno);
937
938
            newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
        } else if (macroNameRef == ppFile) {
939
            QByteArray txt;
940
941
942
943
944
            txt.append('"');
            txt.append(m_env->currentFileUtf8);
            txt.append('"');
            newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
        } else if (macroNameRef == ppDate) {
945
            QByteArray txt;
946
947
948
949
950
            txt.append('"');
            txt.append(QDate::currentDate().toString().toUtf8());
            txt.append('"');
            newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
        } else if (macroNameRef == ppTime) {
951
            QByteArray txt;
952
953
954
955
956
957
958
            txt.append('"');
            txt.append(QTime::currentTime().toString().toUtf8());
            txt.append('"');
            newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
        }

        if (newTk.hasSource()) {
959
960
961
962
            newTk.f.newline = tk->newline();
            newTk.f.whitespace = tk->whitespace();
            *tk = newTk;
            return false;
963
964
965
966
        }
    }

    Macro *macro = m_env->resolve(macroNameRef);
967
    if (!macro
968
            || (tk->expanded()
969
970
                && m_state.m_tokenBuffer
                && m_state.m_tokenBuffer->isBlocked(macro))) {
971
        return false;
972
    }
973
//    qDebug() << "expanding" << macro->name() << "on line" << tk->lineno;
974

975
    // Keep track the of the macro identifier token.
976
    PPToken idTk = *tk;
977
978
979
980
981

    // Expanded tokens which are not generated ones preserve the original line number from
    // their corresponding argument in macro substitution. For expanded tokens which are
    // generated, this information must be taken from somewhere else. What we do is to keep
    // a "reference" line initialize set to the line where expansion happens.
982
    unsigned baseLine = idTk.lineno - m_state.m_lineRef + 1;
983

984
    QVector<PPToken> body = macro->definitionTokens();
985

Sergio Ahumada's avatar
Sergio Ahumada committed
986
    // Within nested expansion we might reach a previously added marker token. In this case,
987
988
989
    // we need to move it from its current possition to outside the nesting.
    PPToken oldMarkerTk;

990
    if (macro->isFunctionLike()) {
991
992
993
        if (!expandFunctionlikeMacros()
                // Still expand if this originally started with an object-like macro.
                && m_state.m_expansionStatus != Expanding) {
994
995
            m_client->notifyMacroReference(m_state.m_bytesOffsetRef + idTk.byteOffset,
                                           m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
996
997
                                           idTk.lineno,
                                           *macro);
998
            return false;
999
        }
1000

1001
1002
        // Collect individual tokens that form the macro arguments.
        QVector<QVector<PPToken> > allArgTks;
1003
1004
        bool hasArgs = collectActualArguments(tk, &allArgTks);

1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
        // Check whether collecting arguments failed due to a previously added marker
        // that goot nested in a sequence of expansions. If so, store it and try again.
        if (!hasArgs
                && !tk->hasSource()
                && m_state.m_markExpandedTokens
                && (m_state.m_expansionStatus == Expanding
                    || m_state.m_expansionStatus == ReadyForExpansion)) {
            oldMarkerTk = *tk;
            hasArgs = collectActualArguments(tk, &allArgTks);
        }

1016
1017
1018
1019
        // Check for matching parameter/argument count.
        bool hasMatchingArgs = false;
        if (hasArgs) {
            const int expectedArgCount = macro->formals().size();
1020
1021
            if (macro->isVariadic() && allArgTks.size() == expectedArgCount - 1)
                allArgTks.push_back(QVector<PPToken>());
1022
1023
1024
1025
1026
1027
1028
            const int actualArgCount = allArgTks.size();
            if (expectedArgCount == actualArgCount
                    || (macro->isVariadic() && actualArgCount > expectedArgCount - 1)
                    // Handle '#define foo()' when invoked as 'foo()'
                    || (expectedArgCount == 0
                        && actualArgCount == 1
                        && allArgTks.at(0).isEmpty())) {
1029
                hasMatchingArgs = true;
1030
1031
1032
1033
1034
            }
        }

        if (!hasArgs || !hasMatchingArgs) {
            //### TODO: error message
1035
            pushToken(tk);
1036
            // If a previous marker was found, make sure to put it back.
Nikolai Kosjar's avatar
Nikolai Kosjar committed
1037
            if (oldMarkerTk.bytes())
1038
                pushToken(&oldMarkerTk);
1039
            *tk = idTk;
1040
            return false;
1041
        }
1042

1043
1044
        if (m_client && !idTk.generated()) {
            // Bundle each token sequence into a macro argument "reference" for notification.
1045
1046
            // Even empty ones, which are not necessarily important on its own, but for the matter
            // of couting their number - such as in foo(,)
1047
1048
1049
            QVector<MacroArgumentReference> argRefs;
            for (int i = 0; i < allArgTks.size(); ++i) {
                const QVector<PPToken> &argTks = allArgTks.at(i);
1050
1051
1052
1053
1054
                if (argTks.isEmpty()) {
                    argRefs.push_back(MacroArgumentReference());
                } else {

                    argRefs.push_back(MacroArgumentReference(
1055
1056
1057
                                  m_state.m_utf16charsOffsetRef + argTks.first().utf16charsBegin(),
                                  argTks.last().utf16charsBegin() + argTks.last().utf16chars()
                                    - argTks.first().utf16charsBegin()));
1058
                }
1059
1060
            }

1061
1062
            m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset,
                                          m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
1063
1064
                                          idTk.lineno,
                                          *macro,
Leandro Melo's avatar
Leandro Melo committed
1065
                                          argRefs);
1066
1067
        }

1068
        if (!handleFunctionLikeMacro(macro, body, allArgTks, baseLine)) {
1069
            if (m_client && !idTk.expanded())
Nikolai Kosjar's avatar
Nikolai Kosjar committed
1070
                m_client->stopExpandingMacro(idTk.byteOffset, *macro);
1071
1072
1073
            return false;
        }
    } else if (m_client && !idTk.generated()) {
1074
1075
1076
        m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset,
                                      m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
                                      idTk.lineno, *macro);
1077
    }
1078

1079
    if (body.isEmpty()) {
1080
1081
1082
1083
1084
1085
        if (m_state.m_markExpandedTokens
                && (m_state.m_expansionStatus == NotExpanding
                    || m_state.m_expansionStatus == JustFinishedExpansion)) {
            // This is not the most beautiful approach but it's quite reasonable. What we do here
            // is to create a fake identifier token which is only composed by whitespaces. It's
            // also not marked as expanded so it it can be treated as a regular token.
Nikolai Kosjar's avatar
Nikolai Kosjar committed
1086
            const QByteArray content(int(idTk.bytes() + computeDistance(idTk)), ' ');
1087
1088
1089
1090
1091
1092
1093
            PPToken fakeIdentifier = generateToken(T_IDENTIFIER,
                                                   content.constData(), content.length(),
                                                   idTk.lineno, false, false);
            fakeIdentifier.f.whitespace = true;
            fakeIdentifier.f.expanded = false;
            fakeIdentifier.f.generated = false;
            body.push_back(fakeIdentifier);
1094
        }
1095
    } else {
1096
1097
1098
        // The first body token replaces the macro invocation so its whitespace and
        // newline info is replicated.
        PPToken &bodyTk = body[0];
1099
1100
        bodyTk.f.whitespace = idTk.whitespace();
        bodyTk.f.newline = idTk.newline();
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118

        // Expansions are tracked from a "top-level" basis. This means that each expansion
        // section in the output corresponds to a direct use of a macro (either object-like
        // or function-like) in the source code and all its recurring expansions - they are
        // surrounded by two marker tokens, one at the begin and the other at the end.
        // For instance, the following code will generate 3 expansions in total, but the
        // output will aggregate the tokens in only 2 expansion sections.
        //  - The first corresponds to BAR expanding to FOO and then FOO expanding to T o;
        //  - The second corresponds to FOO expanding to T o;
        //
        // #define FOO(T, o) T o;
        // #define BAR(T, o) FOO(T, o)
        // BAR(Test, x) FOO(Test, y)
        if (m_state.m_markExpandedTokens) {
            if (m_state.m_expansionStatus == NotExpanding
                    || m_state.m_expansionStatus == JustFinishedExpansion) {
                PPToken marker;
                marker.f.expanded = true;
Nikolai Kosjar's avatar
Nikolai Kosjar committed
1119
1120
                marker.f.bytes = idTk.bytes();
                marker.byteOffset = idTk.byteOffset;
1121
1122
1123
                marker.lineno = idTk.lineno;
                body.prepend(marker);
                body.append(marker);
1124
                m_state.setExpansionStatus(ReadyForExpansion);
Nikolai Kosjar's avatar
Nikolai Kosjar committed
1125
            } else if (oldMarkerTk.bytes()
1126
1127
1128
1129
1130
                       && (m_state.m_expansionStatus == ReadyForExpansion
                           || m_state.m_expansionStatus == Expanding)) {
                body.append(oldMarkerTk);
            }
        }
1131
    }
1132

1133
    m_state.pushTokenBuffer(body.begin(), body.end(), macro);
1134

1135
    if (m_client && !idTk.generated())
Nikolai Kosjar's avatar
Nikolai Kosjar committed
1136
        m_client->stopExpandingMacro(idTk.byteOffset, *macro);
1137
1138

    return true;
1139
1140
}

1141
bool Preprocessor::handleFunctionLikeMacro(const Macro *macro,
1142
                                           QVector<PPToken> &body,
1143
1144
                                           const QVector<QVector<PPToken> > &actuals,
                                           unsigned baseLine)
con's avatar
con committed
1145
{
1146
    QVector<PPToken> expanded;
1147
    expanded.reserve(MAX_TOKEN_EXPANSION_COUNT);
1148
1149
1150
1151

    const size_t bodySize = body.size();
    for (size_t i = 0; i < bodySize && expanded.size() < MAX_TOKEN_EXPANSION_COUNT;
            ++i) {
1152
        int expandedSize = expanded.size();
1153
        PPToken bodyTk = body.at(int(i));
1154