Commit 0f4e3c35 authored by Orgad Shaneh's avatar Orgad Shaneh Committed by Orgad Shaneh
Browse files

C++: Support multiline strings and comments



Task-number: QTCREATORBUG-662
Change-Id: I0997fe2afaba71998d5da549b7141df0c023ff12
Reviewed-by: default avatarErik Verbruggen <erik.verbruggen@digia.com>
parent c06e68c4
......@@ -145,16 +145,18 @@ void Lexer::scan_helper(Token *tok)
_tokenStart = _currentChar;
tok->offset = _currentChar - _firstChar;
if (_state != T_EOF_SYMBOL && !_yychar) {
tok->f.kind = T_EOF_SYMBOL;
return;
}
switch (_state) {
case T_EOF_SYMBOL:
break;
case T_COMMENT:
case T_DOXY_COMMENT: {
const int originalState = _state;
if (! _yychar) {
tok->f.kind = T_EOF_SYMBOL;
return;
}
while (_yychar) {
if (_yychar != '*')
yyinp();
......@@ -174,6 +176,17 @@ void Lexer::scan_helper(Token *tok)
tok->f.kind = originalState;
return; // done
}
case T_CPP_COMMENT:
case T_CPP_DOXY_COMMENT:
tok->f.kind = _state;
_state = T_EOF_SYMBOL;
scanCppComment((Kind)tok->f.kind);
return;
default: // Strings
tok->f.kind = _state;
_state = T_EOF_SYMBOL;
scanUntilQuote(tok, '"');
return;
}
if (! _yychar) {
......@@ -356,20 +369,19 @@ void Lexer::scan_helper(Token *tok)
if (_yychar == '/') {
yyinp();
bool doxy = false;
Kind commentType = T_CPP_COMMENT;
if (_yychar == '/' || _yychar == '!') {
yyinp();
doxy = true;
commentType = T_CPP_DOXY_COMMENT;
}
while (_yychar && _yychar != '\n')
yyinp();
scanCppComment(commentType);
if (! f._scanCommentTokens)
goto _Lagain;
tok->f.kind = doxy ? T_CPP_DOXY_COMMENT : T_CPP_COMMENT;
tok->f.kind = commentType;
} else if (_yychar == '*') {
yyinp();
......@@ -626,8 +638,6 @@ void Lexer::scan_helper(Token *tok)
void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
{
scanUntilQuote(tok, '"');
if (hint == 'L')
tok->f.kind = T_WIDE_STRING_LITERAL;
else if (hint == 'U')
......@@ -640,6 +650,8 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
tok->f.kind = T_AT_STRING_LITERAL;
else
tok->f.kind = T_STRING_LITERAL;
scanUntilQuote(tok, '"');
}
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
......@@ -705,8 +717,6 @@ void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
{
scanUntilQuote(tok, '\'');
if (hint == 'L')
tok->f.kind = T_WIDE_CHAR_LITERAL;
else if (hint == 'U')
......@@ -715,6 +725,8 @@ void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
tok->f.kind = T_UTF16_CHAR_LITERAL;
else
tok->f.kind = T_CHAR_LITERAL;
scanUntilQuote(tok, '\'');
}
void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
......@@ -725,13 +737,10 @@ void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
while (_yychar
&& _yychar != quote
&& _yychar != '\n') {
if (_yychar != '\\')
if (_yychar == '\\')
scanBackslash((Kind)tok->f.kind);
else
yyinp();
else {
yyinp(); // skip `\\'
if (_yychar)
yyinp();
}
}
int yylen = _currentChar - yytext;
......@@ -784,3 +793,35 @@ void Lexer::scanIdentifier(Token *tok, unsigned extraProcessedChars)
tok->identifier = control()->identifier(yytext, yylen);
}
}
void Lexer::scanBackslash(Kind type)
{
yyinp(); // skip '\\'
if (_yychar && !std::isspace(_yychar)) {
yyinp();
return;
}
while (_yychar != '\n' && std::isspace(_yychar))
yyinp();
if (!_yychar) {
_state = type;
return;
}
if (_yychar == '\n') {
yyinp();
while (_yychar != '\n' && std::isspace(_yychar))
yyinp();
if (!_yychar)
_state = type;
}
}
void Lexer::scanCppComment(Kind type)
{
while (_yychar && _yychar != '\n') {
if (_yychar == '\\')
scanBackslash(type);
else if (_yychar)
yyinp();
}
}
......@@ -80,6 +80,8 @@ private:
void scanUntilQuote(Token *tok, unsigned char quote);
void scanNumericLiteral(Token *tok);
void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0);
void scanBackslash(Kind type);
void scanCppComment(Kind type);
inline void yyinp()
{
......
......@@ -223,7 +223,7 @@ void CppHighlighter::highlightBlock(const QString &text)
if (text.length() > lastTokenEnd)
highlightLine(text, lastTokenEnd, text.length() - lastTokenEnd, formatForCategory(CppVisualWhitespace));
if (!initialState && state && !tokens.isEmpty()) {
if (!initialState && state && !tokens.isEmpty() && tokens.last().isComment()) {
parentheses.append(Parenthesis(Parenthesis::Opened, QLatin1Char('+'),
tokens.last().begin()));
++braceDepth;
......
......@@ -36,6 +36,7 @@
//#define DEBUG_TOKENS
typedef QList<unsigned> List;
typedef QByteArray _;
Q_DECLARE_METATYPE(List)
......@@ -46,18 +47,31 @@ class tst_SimpleLexer: public QObject
{
Q_OBJECT
public:
tst_SimpleLexer() : _state(0) {}
private slots:
void basic();
void basic_data();
void incremental();
void incremental_data();
private:
void run(const QByteArray &source,
const List &expectedTokenKindList,
bool preserveState);
int _state;
};
void tst_SimpleLexer::basic()
void tst_SimpleLexer::run(const QByteArray &source,
const List &expectedTokenKindList,
bool preserveState)
{
QFETCH(QByteArray, source);
QFETCH(QList<unsigned>, expectedTokenKindList);
SimpleLexer lexer;
const QList<Token> tokenList = lexer(source);
const QList<Token> tokenList = lexer(source, preserveState ? _state : 0);
if (preserveState)
_state = lexer.state();
int i = 0;
for (; i < tokenList.size(); ++i) {
......@@ -75,6 +89,14 @@ void tst_SimpleLexer::basic()
QVERIFY2(i == expectedTokenKindList.size(), "Less tokens than expected.");
}
void tst_SimpleLexer::basic()
{
QFETCH(QByteArray, source);
QFETCH(List, expectedTokenKindList);
run(source, expectedTokenKindList, false);
}
void tst_SimpleLexer::basic_data()
{
QTest::addColumn<QByteArray>("source");
......@@ -179,5 +201,127 @@ void tst_SimpleLexer::basic_data()
}
void tst_SimpleLexer::incremental()
{
QFETCH(QByteArray, source);
QFETCH(List, expectedTokenKindList);
run(source, expectedTokenKindList, true);
}
void tst_SimpleLexer::incremental_data()
{
QTest::addColumn<QByteArray>("source");
QTest::addColumn<List>("expectedTokenKindList");
QTest::newRow("simple_string_literal")
<< _("\"foo\"")
<< (List() << T_STRING_LITERAL);
QTest::newRow("unterminated_string_literal")
<< _("\"foo")
<< (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_1")
<< _("\"foo \\")
<< (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_2")
<< _("bar\"")
<< (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_spaces_1")
<< _("\"foo \\ ")
<< (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_spaces_2")
<< _("bar\"")
<< (List() << T_STRING_LITERAL);
QTest::newRow("double_escaped_string_literal_1")
<< _("\"foo \\")
<< (List() << T_STRING_LITERAL);
QTest::newRow("double_escaped_string_literal_2")
<< _("bar \\")
<< (List() << T_STRING_LITERAL);
QTest::newRow("double_escaped_string_literal_3")
<< _("baz\"")
<< (List() << T_STRING_LITERAL);
QTest::newRow("unterminated_escaped_string_literal")
<< _("\"foo \\\n\nbar\"")
<< (List() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_space_and_newline_single")
<< _("\"foo \\ \n bar\"")
<< (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_space_and_newline_1")
<< _("\"foo \\ \n ")
<< (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_space_and_newline_2")
<< _("bar\"")
<< (List() << T_STRING_LITERAL);
QTest::newRow("token_after_escaped_string_literal_1")
<< _("\"foo \\")
<< (List() << T_STRING_LITERAL);
QTest::newRow("token_after_escaped_string_literal_2")
<< _("bar\";")
<< (List() << T_STRING_LITERAL << T_SEMICOLON);
QTest::newRow("simple_cpp_comment")
<< _("//foo")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_1")
<< _("//foo \\")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_2")
<< _("bar")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_with_spaces_1")
<< _("//foo \\ ")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_with_spaces_2")
<< _("bar")
<< (List() << T_CPP_COMMENT);
QTest::newRow("double_escaped_cpp_comment_1")
<< _("//foo \\")
<< (List() << T_CPP_COMMENT);
QTest::newRow("double_escaped_cpp_comment_2")
<< _("bar \\")
<< (List() << T_CPP_COMMENT);
QTest::newRow("double_escaped_cpp_comment_3")
<< _("baz")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_with_newline")
<< _("//foo \\\n\nbar")
<< (List() << T_CPP_COMMENT << T_IDENTIFIER);
QTest::newRow("escaped_cpp_comment_with_space_and_newline_single")
<< _("//foo \\ \n bar")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_with_space_and_newline_1")
<< _("//foo \\ \n ")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_with_space_and_newline_2")
<< _("bar")
<< (List() << T_CPP_COMMENT);
}
QTEST_APPLESS_MAIN(tst_SimpleLexer)
#include "tst_lexer.moc"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment