Commit 57ff9929 authored by Nikolai Kosjar's avatar Nikolai Kosjar

C++/CppTools: Pass UTF-8 encoded source to Lexer

The Lexer can handle it now.

Task-number: QTCREATORBUG-7356
Change-Id: I8c4b03a247656e013d44c3cedca4835e133d4036
Reviewed-by: default avatarErik Verbruggen <erik.verbruggen@digia.com>
parent 587eb49c
......@@ -41,18 +41,6 @@ FastPreprocessor::FastPreprocessor(const Snapshot &snapshot)
, _preproc(this, &_env)
{ }
// This is a temporary fix to handle non-ascii characters. This can be removed when the lexer can
// handle multi-byte characters.
static QByteArray convertToLatin1(const QByteArray &contents)
{
const char *p = contents.constData();
while (char ch = *p++)
if (ch & 0x80)
return QString::fromUtf8(contents).toLatin1();
return contents;
}
QByteArray FastPreprocessor::run(Document::Ptr newDoc, const QByteArray &source)
{
std::swap(newDoc, _currentDoc);
......@@ -72,9 +60,7 @@ QByteArray FastPreprocessor::run(Document::Ptr newDoc, const QByteArray &source)
mergeEnvironment(i.resolvedFileName());
}
QByteArray src = convertToLatin1(source);
const QByteArray preprocessed = _preproc.run(fileName, src);
const QByteArray preprocessed = _preproc.run(fileName, source);
// qDebug("FastPreprocessor::run for %s produced [[%s]]", fileName.toUtf8().constData(), preprocessed.constData());
std::swap(newDoc, _currentDoc);
return preprocessed;
......
......@@ -61,11 +61,11 @@ bool SimpleLexer::endedJoined() const
return _endedJoined;
}
QList<Token> SimpleLexer::operator()(const QString &text, int state, bool convertToUtf8)
QList<Token> SimpleLexer::operator()(const QString &text, int state)
{
QList<Token> tokens;
const QByteArray bytes = convertToUtf8 ? text.toUtf8() : text.toLatin1();
const QByteArray bytes = text.toUtf8();
const char *firstChar = bytes.constData();
const char *lastChar = firstChar + bytes.size();
......
......@@ -54,7 +54,7 @@ public:
bool endedJoined() const;
QList<Token> operator()(const QString &text, int state = 0, bool convertToUtf8 = false);
QList<Token> operator()(const QString &text, int state = 0);
int state() const
{ return _lastState; }
......
......@@ -749,7 +749,7 @@ Preprocessor::Preprocessor(Client *client, Environment *env)
QByteArray Preprocessor::run(const QString &fileName, const QString &source)
{
return run(fileName, source.toLatin1());
return run(fileName, source.toUtf8());
}
QByteArray Preprocessor::run(const QString &fileName,
......
......@@ -378,18 +378,6 @@ void CppPreprocessor::stopSkippingBlocks(unsigned offset)
m_currentDoc->stopSkippingBlocks(offset);
}
// This is a temporary fix to handle non-ascii characters. This can be removed when the lexer can
// handle multi-byte characters.
static QByteArray convertToLatin1(const QByteArray &contents)
{
const char *p = contents.constData();
while (char ch = *p++)
if (ch & 0x80)
return QString::fromUtf8(contents).toLatin1();
return contents;
}
void CppPreprocessor::sourceNeeded(unsigned line, const QString &fileName, IncludeType type)
{
typedef Document::DiagnosticMessage Message;
......@@ -424,7 +412,6 @@ void CppPreprocessor::sourceNeeded(unsigned line, const QString &fileName, Inclu
unsigned editorRevision = 0;
QByteArray contents;
const bool gotFileContents = getFileContents(absoluteFileName, &contents, &editorRevision);
contents = convertToLatin1(contents);
if (m_currentDoc && !gotFileContents) {
const QString text = QCoreApplication::translate(
"CppPreprocessor", "%1: Could not get file contents").arg(fileName);
......
......@@ -114,8 +114,7 @@ void tst_SimpleLexer::run(const QByteArray &source,
QVERIFY(compareFlags);
SimpleLexer lexer;
const QList<Token> tokenList = lexer(source, preserveState ? _state : 0,
/*convertToUtf8=*/ true);
const QList<Token> tokenList = lexer(source, preserveState ? _state : 0);
if (preserveState)
_state = lexer.state();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment