Commit e148d030 authored by Leandro Melo's avatar Leandro Melo

C++: Introduce C++11 raw string literals

Although they are now supported by the lexer
and parser, it is worth to remind that we still
need to address an issue concerning the highlight
of multiline literals (which with the advent of
the new raw strings will become more common).

Task-number: QTCREATORBUG-6722
Change-Id: I137337a9ac0152a1f8b9faded0b960c6fe3dd38a
Reviewed-by: default avatarRoberto Raggi <roberto.raggi@nokia.com>
parent 15a67c41
......@@ -571,14 +571,25 @@ void Lexer::scan_helper(Token *tok)
}
}
if (ch == 'L' || ch == 'u' || ch == 'U') {
if (ch == 'L' || ch == 'u' || ch == 'U' || ch == 'R') {
// Either a literal or still an identifier.
if (_yychar == '"') {
yyinp();
scanStringLiteral(tok, ch);
if (ch == 'R')
scanRawStringLiteral(tok);
else
scanStringLiteral(tok, ch);
} else if (_yychar == '\'') {
yyinp();
scanCharLiteral(tok, ch);
} else if (ch != 'R' && _yychar == 'R') {
yyinp();
if (_yychar == '"') {
yyinp();
scanRawStringLiteral(tok, ch);
} else {
scanIdentifier(tok, 1);
}
} else if (ch == 'u' && _yychar == '8') {
yyinp();
if (_yychar == '"') {
......@@ -587,6 +598,14 @@ void Lexer::scan_helper(Token *tok)
} else if (_yychar == '\'') {
yyinp();
scanCharLiteral(tok, '8');
} else if (_yychar == 'R') {
yyinp();
if (_yychar == '"') {
yyinp();
scanRawStringLiteral(tok, '8');
} else {
scanIdentifier(tok, 2);
}
} else {
scanIdentifier(tok, 1);
}
......@@ -624,6 +643,67 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
tok->f.kind = T_STRING_LITERAL;
}
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
{
const char *yytext = _currentChar;
int delimLength = -1;
const char *closingDelimCandidate = 0;
while (_yychar) {
if (_yychar == '(' && delimLength == -1) {
delimLength = _currentChar - yytext;
yyinp();
} else if (_yychar == ')') {
yyinp();
if (delimLength == -1)
break;
closingDelimCandidate = _currentChar;
} else {
if (delimLength == -1) {
if (_yychar == '\\' || std::isspace(_yychar))
break;
yyinp();
} else {
if (!closingDelimCandidate) {
yyinp();
} else {
if (_yychar == '"') {
if (delimLength == _currentChar - closingDelimCandidate) {
// Got a matching closing delimiter.
break;
}
}
// Make sure this continues to be a valid candidate.
if (_yychar != *(yytext + (_currentChar - closingDelimCandidate)))
closingDelimCandidate = 0;
yyinp();
}
}
}
}
int yylen = _currentChar - yytext;
if (_yychar == '"')
yyinp();
if (control())
tok->string = control()->stringLiteral(yytext, yylen);
if (hint == 'L')
tok->f.kind = T_RAW_WIDE_STRING_LITERAL;
else if (hint == 'U')
tok->f.kind = T_RAW_UTF32_STRING_LITERAL;
else if (hint == 'u')
tok->f.kind = T_RAW_UTF16_STRING_LITERAL;
else if (hint == '8')
tok->f.kind = T_RAW_UTF8_STRING_LITERAL;
else
tok->f.kind = T_RAW_STRING_LITERAL;
}
void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
{
scanUntilQuote(tok, '\'');
......
......@@ -91,6 +91,7 @@ private:
static int classifyOperator(const char *string, int length);
void scanStringLiteral(Token *tok, unsigned char hint = 0);
void scanRawStringLiteral(Token *tok, unsigned char hint = 0);
void scanCharLiteral(Token *tok, unsigned char hint = 0);
void scanUntilQuote(Token *tok, unsigned char quote);
void scanNumericLiteral(Token *tok);
......
......@@ -2815,7 +2815,12 @@ bool Parser::parseStringLiteral(ExpressionAST *&node)
|| LA() == T_WIDE_STRING_LITERAL
|| LA() == T_UTF8_STRING_LITERAL
|| LA() == T_UTF16_STRING_LITERAL
|| LA() == T_UTF32_STRING_LITERAL)) {
|| LA() == T_UTF32_STRING_LITERAL
|| LA() == T_RAW_STRING_LITERAL
|| LA() == T_RAW_WIDE_STRING_LITERAL
|| LA() == T_RAW_UTF8_STRING_LITERAL
|| LA() == T_RAW_UTF16_STRING_LITERAL
|| LA() == T_RAW_UTF32_STRING_LITERAL)) {
return false;
}
......@@ -2825,7 +2830,12 @@ bool Parser::parseStringLiteral(ExpressionAST *&node)
|| LA() == T_WIDE_STRING_LITERAL
|| LA() == T_UTF8_STRING_LITERAL
|| LA() == T_UTF16_STRING_LITERAL
|| LA() == T_UTF32_STRING_LITERAL) {
|| LA() == T_UTF32_STRING_LITERAL
|| LA() == T_RAW_STRING_LITERAL
|| LA() == T_RAW_WIDE_STRING_LITERAL
|| LA() == T_RAW_UTF8_STRING_LITERAL
|| LA() == T_RAW_UTF16_STRING_LITERAL
|| LA() == T_RAW_UTF32_STRING_LITERAL) {
*ast = new (_pool) StringLiteralAST;
(*ast)->literal_token = consumeToken();
ast = &(*ast)->next;
......@@ -4054,6 +4064,11 @@ bool Parser::parsePrimaryExpression(ExpressionAST *&node)
case T_UTF8_STRING_LITERAL:
case T_UTF16_STRING_LITERAL:
case T_UTF32_STRING_LITERAL:
case T_RAW_STRING_LITERAL:
case T_RAW_WIDE_STRING_LITERAL:
case T_RAW_UTF8_STRING_LITERAL:
case T_RAW_UTF16_STRING_LITERAL:
case T_RAW_UTF32_STRING_LITERAL:
return parseStringLiteral(node);
case T_NULLPTR:
......
......@@ -35,6 +35,8 @@ static const char *token_names[] = {
("<char literal>"), ("<wide char literal>"), ("<utf16 char literal>"), ("<utf32 char literal>"),
("<string literal>"), ("<wide string literal>"), ("<utf8 string literal>"),
("<utf16 string literal>"), ("<utf32 string literal>"),
("<raw string literal>"), ("<raw wide string literal>"), ("<raw utf8 string literal>"),
("<raw utf16 string literal>"), ("<raw utf32 string literal>"),
("<@string literal>"), ("<angle string literal>"),
("&"), ("&&"), ("&="), ("->"), ("->*"), ("^"), ("^="), (":"), ("::"),
......@@ -105,6 +107,11 @@ const char *Token::spell() const
case T_UTF8_STRING_LITERAL:
case T_UTF16_STRING_LITERAL:
case T_UTF32_STRING_LITERAL:
case T_RAW_STRING_LITERAL:
case T_RAW_WIDE_STRING_LITERAL:
case T_RAW_UTF8_STRING_LITERAL:
case T_RAW_UTF16_STRING_LITERAL:
case T_RAW_UTF32_STRING_LITERAL:
case T_AT_STRING_LITERAL:
case T_ANGLE_STRING_LITERAL:
return literal->chars();
......
......@@ -49,6 +49,11 @@ enum Kind {
T_UTF8_STRING_LITERAL,
T_UTF16_STRING_LITERAL,
T_UTF32_STRING_LITERAL,
T_RAW_STRING_LITERAL,
T_RAW_WIDE_STRING_LITERAL,
T_RAW_UTF8_STRING_LITERAL,
T_RAW_UTF16_STRING_LITERAL,
T_RAW_UTF32_STRING_LITERAL,
T_AT_STRING_LITERAL,
T_ANGLE_STRING_LITERAL,
T_LAST_STRING_LITERAL = T_ANGLE_STRING_LITERAL,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment