Commit 41b23296 authored by Francois Ferrand's avatar Francois Ferrand
Browse files

C++: fix trigraph parsing in macros.



Trigraphs must only be parsed before/during preprocessing. The preprocessor
will now replace trigraphs with their standard form, and re-lexing in
TranslationUnit will not try to parse any trigraph.

Also added a few missing trigraphs: ??=, ??', ??! and ??-.

Task-number: QTCREATORBUG-13253
Change-Id: I1723ed53b00090b878c22b83b7e963b647b65f72
Reviewed-by: default avatarNikolai Kosjar <nikolai.kosjar@theqtcompany.com>
parent 90571432
......@@ -336,20 +336,62 @@ void Lexer::scan_helper(Token *tok)
break;
case '?':
if (_yychar == '?') {
if (_yychar == '?' && f._ppMode) {
yyinp();
if (_yychar == '(') {
yyinp();
tok->f.kind = T_LBRACKET;
tok->f.trigraph = true;
} else if (_yychar == ')') {
yyinp();
tok->f.kind = T_RBRACKET;
tok->f.trigraph = true;
} else if (_yychar == '<') {
yyinp();
tok->f.kind = T_LBRACE;
tok->f.trigraph = true;
} else if (_yychar == '>') {
yyinp();
tok->f.kind = T_RBRACE;
tok->f.trigraph = true;
} else if (_yychar == '=') {
yyinp();
tok->f.trigraph = true;
if (_yychar == '?' && *(_currentChar + 1) == '?' && *(_currentChar + 2) == '=') {
yyinp();
yyinp();
yyinp();
tok->f.kind = T_POUND_POUND;
} else {
tok->f.kind = T_POUND;
}
} else if (_yychar == '\'') {
yyinp();
if (_yychar == '=') {
yyinp();
tok->f.kind = T_CARET_EQUAL;
} else {
tok->f.kind = T_CARET;
}
tok->f.trigraph = true;
} else if (_yychar == '!') {
yyinp();
if (_yychar == '=') {
yyinp();
tok->f.kind = T_PIPE_EQUAL;
} else {
tok->f.kind = T_PIPE;
}
tok->f.trigraph = true;
} else if (_yychar == '-') {
yyinp();
if (_yychar == '=') {
yyinp();
tok->f.kind = T_TILDE_EQUAL;
} else {
tok->f.kind = T_TILDE;
}
tok->f.trigraph = true;
}
} else {
tok->f.kind = T_QUESTION;
......
......@@ -125,6 +125,7 @@ private:
unsigned _scanKeywords: 1;
unsigned _scanAngleStringLiteralTokens: 1;
unsigned _ppMode: 1;
unsigned _ignoreTrigraph : 1;
};
struct State {
......
......@@ -371,8 +371,10 @@ public:
// The token is C++11 user-defined literal such as:
// 12_km, 0.5_Pa, 'c'_X, "abd"_L, u16"xyz"_M
unsigned userDefinedLiteral : 1;
// Indicates the token is a trigraph
unsigned trigraph : 1;
// Unused...
unsigned pad : 2;
unsigned pad : 1;
// The token length in bytes and UTF16 chars.
unsigned bytes : 16;
unsigned utf16chars : 16;
......
......@@ -50,6 +50,7 @@
#include <cplusplus/Lexer.h>
#include <cplusplus/Token.h>
#include <cplusplus/Literals.h>
#include <cplusplus/cppassert.h>
#include <utils/scopedswap.h>
......@@ -1439,7 +1440,25 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source,
enforceSpacing(tk, macroExpanded);
// Finally output the token.
currentOutputBuffer().append(tk.tokenStart(), tk.bytes());
if (!tk.f.trigraph) {
currentOutputBuffer().append(tk.tokenStart(), tk.bytes());
} else {
switch (tk.kind()) {
case T_LBRACKET: currentOutputBuffer().append("["); break;
case T_RBRACKET: currentOutputBuffer().append("]"); break;
case T_LBRACE: currentOutputBuffer().append("{"); break;
case T_RBRACE: currentOutputBuffer().append("}"); break;
case T_POUND: currentOutputBuffer().append("#"); break;
case T_POUND_POUND: currentOutputBuffer().append("##"); break;
case T_CARET: currentOutputBuffer().append("^"); break;
case T_CARET_EQUAL: currentOutputBuffer().append("^="); break;
case T_PIPE: currentOutputBuffer().append("|"); break;
case T_PIPE_EQUAL: currentOutputBuffer().append("|="); break;
case T_TILDE: currentOutputBuffer().append("~"); break;
case T_TILDE_EQUAL: currentOutputBuffer().append("~="); break;
default: CPP_ASSERT(0, qDebug() << tk.spell()); break;
}
}
} while (tk.isNot(T_EOF_SYMBOL));
......
......@@ -70,6 +70,8 @@ private slots:
void literals_data();
void preprocessor();
void preprocessor_data();
void trigraph();
void trigraph_data();
void bytes_and_utf16chars();
void bytes_and_utf16chars_data();
......@@ -263,12 +265,6 @@ void tst_SimpleLexer::basic_data()
<< T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT
<< T_INT << T_IDENTIFIER << T_SEMICOLON << T_CPP_DOXY_COMMENT << T_CPP_DOXY_COMMENT;
QTest::newRow(source) << source << expectedTokenKindList;
source = "?" "?(?" "?)?" "?<?" "?>a?b:c";
expectedTokenKindList = TokenKindList()
<< T_LBRACKET << T_RBRACKET << T_LBRACE << T_RBRACE
<< T_IDENTIFIER << T_QUESTION << T_IDENTIFIER << T_COLON << T_IDENTIFIER;
QTest::newRow(source) << source << expectedTokenKindList;
}
void tst_SimpleLexer::literals()
......@@ -744,5 +740,43 @@ void tst_SimpleLexer::incremental_data()
<< (TokenKindList() << T_IDENTIFIER);
}
void tst_SimpleLexer::trigraph()
{
QFETCH(QByteArray, source);
QFETCH(TokenKindList, expectedTokenKindList);
run(source, toTokens(expectedTokenKindList), false, CompareKind, true);
}
void tst_SimpleLexer::trigraph_data()
{
QTest::addColumn<QByteArray>("source");
QTest::addColumn<TokenKindList>("expectedTokenKindList");
QTest::newRow("pound_trigraph") << _("?" "?=") << (TokenKindList() << T_POUND);
QTest::newRow("caret_trigraph") << _("?" "?'") << (TokenKindList() << T_CARET);
QTest::newRow("left_bracket_trigraph") << _("?" "?(") << (TokenKindList() << T_LBRACKET);
QTest::newRow("right_bracket_trigraph") << _("?" "?)") << (TokenKindList() << T_RBRACKET);
QTest::newRow("pipe_trigraph") << _("?" "?!") << (TokenKindList() << T_PIPE);
QTest::newRow("left_brace_trigraph") << _("?" "?<") << (TokenKindList() << T_LBRACE);
QTest::newRow("right_brace_trigraph") << _("?" "?>") << (TokenKindList() << T_RBRACE);
QTest::newRow("tilde_trigraph") << _("?" "?-") << (TokenKindList() << T_TILDE);
QTest::newRow("pound_pound_trigraph") << _("?" "?=" "?" "?=") << (TokenKindList() << T_POUND_POUND);
QTest::newRow("caret_equal_trigraph") << _("?" "?'=") << (TokenKindList() << T_CARET_EQUAL);
QTest::newRow("pipe_equal_trigraph") << _("?" "?!=") << (TokenKindList() << T_PIPE_EQUAL);
QTest::newRow("tilde_equal_trigraph") << _("?" "?-=") << (TokenKindList() << T_TILDE_EQUAL);
}
QTEST_APPLESS_MAIN(tst_SimpleLexer)
#include "tst_lexer.moc"
......@@ -405,6 +405,7 @@ private slots:
void concat();
void excessive_nesting();
void multi_byte_code_point_in_expansion();
void trigraph();
};
// Remove all #... lines, and 'simplify' string, to allow easily comparing the result
......@@ -2092,6 +2093,29 @@ void tst_Preprocessor::compare_input_output(bool keepComments)
QVERIFY(compare(prep, output));
}
void tst_Preprocessor::trigraph()
{
Environment env;
Preprocessor preprocess(0, &env);
// We cannot use actual trigraphs in strings, they would be replaced by the preprocessor when
// compiling the test, so we use strings with 'j' character instead of '?', and perform a
// replacement at runtime.
// Trigraphs in source code are replaced
QByteArray prep = preprocess.run(QLatin1String("<stdin>"),
QByteArray("jj( jj) jj< jj> jj= jj=jj= jj' jj'= jj! jj!= jj- jj-=").replace('j', '?'),
true, false);
QCOMPARE(prep.constData(), "[ ] { } # ## ^ ^= | |= ~ ~=");
// Trigraphs that appear after macro expansion are not replaced
prep = preprocess.run(QLatin1String("<stdin>"),
"#define TRIGRAPH(x...) ? ## x ## ? ## x ## =\n"
"TRIGRAPH()",
true, false);
QCOMPARE(prep.constData(), QByteArray("\njj=").replace('j', '?').data());
}
QTEST_APPLESS_MAIN(tst_Preprocessor)
#include "tst_preprocessor.moc"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment