Commit fa925ccd authored by Roberto Raggi's avatar Roberto Raggi
Browse files

Say hello to the new incremental scanner for QML/JS.

parent 7a379001
......@@ -31,6 +31,7 @@
#include <QtCore/QSet>
#include <QtCore/QtAlgorithms>
#include <QtCore/QDebug>
using namespace QmlJS;
......@@ -58,11 +59,10 @@ bool QScriptHighlighter::isDuiEnabled() const
void QScriptHighlighter::highlightBlock(const QString &text)
{
m_scanner(text, onBlockStart());
const QList<Token> tokens = m_scanner(text, onBlockStart());
QTextCharFormat emptyFormat;
int lastEnd = 0;
const QList<Token> tokens = m_scanner.tokens();
for (int i = 0; i < tokens.size(); ++i) {
const Token token = tokens.at(i);
......@@ -111,7 +111,7 @@ void QScriptHighlighter::highlightBlock(const QString &text)
break;
case Token::Identifier:
if (m_duiEnabled && (i + 1 != tokens.size()) && tokens.at(i + 1).kind == Token::Colon) {
if (m_duiEnabled && (i + 1) < tokens.size() && tokens.at(i + 1).is(Token::Colon)) {
int j = i;
for (; j != -1; --j) {
const Token &tok = tokens.at(j);
......@@ -138,8 +138,7 @@ void QScriptHighlighter::highlightBlock(const QString &text)
setFormat(token.offset, token.length, emptyFormat);
break;
case Token::Operator:
case Token::Dot:
case Token::Delimiter:
setFormat(token.offset, token.length, emptyFormat);
break;
......@@ -150,13 +149,21 @@ void QScriptHighlighter::highlightBlock(const QString &text)
lastEnd = token.end();
}
const int firstNonSpace = m_scanner.firstNonSpace();
int firstNonSpace = 0;
if (! tokens.isEmpty()) {
const Token &tk = tokens.first();
firstNonSpace = tk.offset;
}
if (firstNonSpace > lastEnd)
setFormat(lastEnd, firstNonSpace - lastEnd, m_formats[VisualWhitespace]);
else if (text.length() > lastEnd)
setFormat(lastEnd, text.length() - lastEnd, m_formats[VisualWhitespace]);
onBlockEnd(m_scanner.endState(), firstNonSpace);
setCurrentBlockState(m_scanner.endState());
}
void QScriptHighlighter::setFormats(const QVector<QTextCharFormat> &s)
......@@ -237,15 +244,20 @@ QSet<QString> QScriptHighlighter::keywords()
int QScriptHighlighter::onBlockStart()
{
int state = 0;
int previousState = previousBlockState();
if (previousState != -1)
state = previousState;
return state;
return currentBlockState();
}
void QScriptHighlighter::onBlockEnd(int, int)
{
}
void QScriptHighlighter::onOpeningParenthesis(QChar, int)
{
}
void QScriptHighlighter::onClosingParenthesis(QChar, int)
{
}
void QScriptHighlighter::onOpeningParenthesis(QChar, int) {}
void QScriptHighlighter::onClosingParenthesis(QChar, int) {}
void QScriptHighlighter::onBlockEnd(int state, int) { return setCurrentBlockState(state); }
void QScriptHighlighter::highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat)
{
......
......@@ -61,13 +61,13 @@ public:
QSet<QString> keywords();
protected:
virtual int onBlockStart();
virtual void onBlockEnd(int state, int firstNonSpace);
// The functions are notified whenever parentheses are encountered.
// Custom behaviour can be added, for example storing info for indenting.
virtual int onBlockStart(); // returns the blocks initial state
virtual void onOpeningParenthesis(QChar parenthesis, int pos);
virtual void onClosingParenthesis(QChar parenthesis, int pos);
// sets the enriched user state, or simply calls setCurrentBlockState(state);
virtual void onBlockEnd(int state, int firstNonSpace);
virtual void highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat);
......
......@@ -256,7 +256,7 @@ QString QmlJSIndenter::trimmedCodeLine(const QString &t)
case Token::LeftParenthesis:
case Token::LeftBrace:
case Token::Semicolon:
case Token::Operator:
case Token::Delimiter:
break;
case Token::RightParenthesis:
......
......@@ -34,330 +34,209 @@
using namespace QmlJS;
QmlJSScanner::QmlJSScanner()
: m_state(0)
{
reset();
}
QmlJSScanner::~QmlJSScanner()
{}
{
}
void QmlJSScanner::reset()
static bool isIdentifierChar(QChar ch)
{
m_endState = -1;
m_firstNonSpace = -1;
m_tokens.clear();
switch (ch.unicode()) {
case '$': case '_':
return true;
default:
return ch.isLetterOrNumber();
}
}
QList<Token> QmlJSScanner::operator()(const QString &text, int startState)
static bool isNumberChar(QChar ch)
{
reset();
// tokens
enum TokenKind {
InputAlpha,
InputNumber,
InputAsterix,
InputSlash,
InputSpace,
InputQuotation,
InputApostrophe,
InputSep,
NumInputs
};
switch (ch.unicode()) {
case '.':
case 'e':
case 'E': // ### more...
return true;
default:
return ch.isLetterOrNumber();
}
}
// states
QList<Token> QmlJSScanner::operator()(const QString &text, int startState)
{
enum {
StateStandard,
StateCommentStart1, // '/'
StateCCommentStart2, // '*' after a '/'
StateCppCommentStart2, // '/' after a '/'
StateCComment, // after a "/*"
StateCppComment, // after a "//"
StateCCommentEnd1, // '*' in a CppComment
StateCCommentEnd2, // '/' after a '*' in a CppComment
StateStringStart,
StateString,
StateStringEnd,
StateString2Start,
StateString2,
StateString2End,
StateNumber,
NumStates
Normal = 0,
MultiLineComment = 1
};
static const uchar table[NumStates][NumInputs] = {
// InputAlpha InputNumber InputAsterix InputSlash InputSpace InputQuotation InputApostrophe InputSep
{ StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStandard
{ StateStandard, StateNumber, StateCCommentStart2, StateCppCommentStart2, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCommentStart1
{ StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentStart2
{ StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppCommentStart2
{ StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCComment
{ StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppComment
{ StateCComment, StateCComment, StateCCommentEnd1, StateCCommentEnd2, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentEnd1
{ StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCCommentEnd2
{ StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateStringStart
{ StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateString
{ StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStringEnd
{ StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2Start
{ StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2
{ StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateString2End
{ StateNumber, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard } // StateNumber
};
m_state = startState;
QList<Token> tokens;
// ### handle multi line comment state.
int index = 0;
if (m_state == MultiLineComment) {
const int start = index;
while (index < text.length()) {
const QChar ch = text.at(index);
QChar la;
if (index + 1 < text.length())
la = text.at(index + 1);
if (ch == QLatin1Char('*') && la == QLatin1Char('/')) {
m_state = Normal;
index += 2;
break;
} else {
++index;
}
}
int state = startState;
if (text.isEmpty()) {
blockEnd(state, 0);
return m_tokens;
tokens.append(Token(start, index - start, Token::Comment));
}
int input = -1;
int i = 0;
bool lastWasBackSlash = false;
bool makeLastStandard = false;
static const QString alphabeth = QLatin1String("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
static const QString mathChars = QString::fromLatin1("xXeE");
static const QString numbers = QString::fromLatin1("0123456789");
QChar lastChar;
int firstNonSpace = -1;
int lastNonSpace = -1;
forever {
const QChar qc = text.at(i);
const char c = qc.toLatin1();
if (lastWasBackSlash) {
input = InputSep;
} else {
switch (c) {
case '*':
input = InputAsterix;
break;
case '/':
input = InputSlash;
break;
case '"':
input = InputQuotation;
break;
case '\'':
input = InputApostrophe;
break;
case ' ':
input = InputSpace;
break;
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': case '0':
if (alphabeth.contains(lastChar) && (!mathChars.contains(lastChar) || !numbers.contains(text.at(i - 1)))) {
input = InputAlpha;
while (index < text.length()) {
const QChar ch = text.at(index);
QChar la; // lookahead char
if (index + 1 < text.length())
la = text.at(index + 1);
switch (ch.unicode()) {
case '/':
if (la == QLatin1Char('/')) {
tokens.append(Token(index, text.length() - index, Token::Comment));
index = text.length();
} else if (la == QLatin1Char('*')) {
const int start = index;
index += 2;
m_state = MultiLineComment;
while (index < text.length()) {
const QChar ch = text.at(index);
QChar la;
if (index + 1 < text.length())
la = text.at(index + 1);
if (ch == QLatin1Char('*') && la == QLatin1Char('/')) {
m_state = Normal;
index += 2;
break;
} else {
if (input == InputAlpha && numbers.contains(lastChar))
input = InputAlpha;
else
input = InputNumber;
++index;
}
break;
case '.':
if (state == StateNumber)
input = InputNumber;
else
input = InputSep;
break;
default: {
if (qc.isLetter() || c == '_')
input = InputAlpha;
else
input = InputSep;
break;
}
tokens.append(Token(start, index - start, Token::Comment));
} else {
tokens.append(Token(index++, 1, Token::Delimiter));
}
}
if (input != InputSpace) {
if (firstNonSpace < 0)
firstNonSpace = i;
lastNonSpace = i;
}
break;
lastWasBackSlash = !lastWasBackSlash && c == '\\';
case '\'':
case '"': {
const QChar quote = ch;
const int start = index;
++index;
while (index < text.length()) {
const QChar ch = text.at(index);
state = table[state][input];
if (ch == quote)
break;
else if (index + 1 < text.length() && ch == QLatin1Char('\\'))
index += 2;
else
++index;
}
switch (state) {
case StateStandard: {
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
if (index < text.length()) {
++index;
// good one
} else {
// unfinished
}
if (input == InputAlpha ) {
insertIdentifier(i);
} else if (input == InputSep || input == InputAsterix) {
insertCharToken(i, c);
}
tokens.append(Token(start, index - start, Token::String));
} break;
case '.':
if (la.isDigit()) {
const int start = index;
do {
++index;
} while (index < text.length() && isNumberChar(text.at(index)));
tokens.append(Token(start, index - start, Token::Number));
break;
}
tokens.append(Token(index++, 1, Token::Dot));
break;
case StateCommentStart1:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = true;
break;
case StateCCommentStart2:
makeLastStandard = false;
insertComment(i - 1, 2);
break;
case StateCppCommentStart2:
insertComment(i - 1, 2);
makeLastStandard = false;
break;
case StateCComment:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateCppComment:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateCCommentEnd1:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateCCommentEnd2:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateStringStart:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateStringEnd:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString2Start:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString2:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString2End:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateNumber:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertNumber(i);
break;
}
case '(':
tokens.append(Token(index++, 1, Token::LeftParenthesis));
break;
lastChar = qc;
i++;
if (i >= text.length())
case ')':
tokens.append(Token(index++, 1, Token::RightParenthesis));
break;
}
scanForKeywords(text);
case '[':
tokens.append(Token(index++, 1, Token::LeftBracket));
break;
if (state == StateCComment
|| state == StateCCommentEnd1
|| state == StateCCommentStart2
) {
state = StateCComment;
} else {
state = StateStandard;
}
case ']':
tokens.append(Token(index++, 1, Token::RightBracket));
break;
blockEnd(state, firstNonSpace);
case '{':
tokens.append(Token(index++, 1, Token::LeftBrace));
break;
return m_tokens;
}
case '}':
tokens.append(Token(index++, 1, Token::RightBrace));
break;
void QmlJSScanner::insertToken(int start, int length, Token::Kind kind, bool forceNewToken)
{
if (m_tokens.isEmpty() || forceNewToken) {
m_tokens.append(Token(start, length, kind));
} else {
Token &lastToken(m_tokens.last());
if (lastToken.kind == kind && lastToken.end() == start) {
lastToken.length += 1;
} else {
m_tokens.append(Token(start, length, kind));
}
}
}
case ';':
tokens.append(Token(index++, 1, Token::Semicolon));
break;
void QmlJSScanner::insertCharToken(int start, const char c)
{
Token::Kind kind;
switch (c) {
case '!':
case '<':
case '>':
case '+':
case '-':
case '*':
case '/':
case '%': kind = Token::Operator; break;
case ';': kind = Token::Semicolon; break;
case ':': kind = Token::Colon; break;
case ',': kind = Token::Comma; break;
case '.': kind = Token::Dot; break;
case '(': kind = Token::LeftParenthesis; break;
case ')': kind = Token::RightParenthesis; break;
case '{': kind = Token::LeftBrace; break;
case '}': kind = Token::RightBrace; break;
case '[': kind = Token::LeftBracket; break;
case ']': kind = Token::RightBracket; break;
default: kind = Token::Identifier; break;
case ':':
tokens.append(Token(index++, 1, Token::Colon));
break;
case ',':
tokens.append(Token(index++, 1, Token::Comma));
break;
default:
if (ch.isNumber()) {
const int start = index;
do {
++index;
} while (index < text.length() && isNumberChar(text.at(index)));
tokens.append(Token(start, index - start, Token::Number));
} else if (ch.isLetter() || ch == QLatin1Char('_') || ch == QLatin1Char('$')) {
const int start = index;
do {
++index;
} while (index < text.length() && isIdentifierChar(text.at(index)));
if (isKeyword(text.mid(start, index - start)))
tokens.append(Token(start, index - start, Token::Keyword)); // ### fixme
else
tokens.append(Token(start, index - start, Token::Identifier));
} else {
tokens.append(Token(index++, 1, Token::Delimiter));
}
} // end of switch
}
insertToken(start, 1, kind, true);
return tokens;
}
void QmlJSScanner::scanForKeywords(const QString &text)