Commit e2f8a988 authored by Marco Bubke's avatar Marco Bubke Committed by Nikolai Kosjar

Introduce codemodelbackend process and library

This is a partial result of wip/clang-oop. More will follow.

This allows us to invoke the completion out of the Qt Creator process
and thus safes us as against libclang crashes.

At this point only the completion use case is supported.

Some notes on the individual components:

 src/libs/codemodelbackendipc
  * library encapsulating the inter process communication handling
  * used by the backend application and in a follow-up change by the
    creator integration

 src/libs/3rdparty/sqlite
  * version 3.8.10.2
  * dependency of codemodelbackendipc, will be used to storage indexing
    data, among others

 src/tools/codemodelbackend
  * the backend application

 tests/unit:
  * unit tests

Change-Id: I91a48e27467581a22fb760a18d8eb926008fea60
Reviewed-by: default avatarAlessandro Portale <alessandro.portale@theqtcompany.com>
Reviewed-by: default avatarNikolai Kosjar <nikolai.kosjar@theqtcompany.com>
Reviewed-by: default avatarMarco Bubke <marco.bubke@theqtcompany.com>
Reviewed-by: default avatarOswald Buddenhagen <oswald.buddenhagen@theqtcompany.com>
parent 51fec1a3
......@@ -117,6 +117,7 @@ tmp/
*.dll
*.exe
/bin/buildoutputparser
/bin/codemodelbackend
/bin/cpaster
/bin/cplusplus-ast2png
/bin/cplusplus-frontend
......
function llvmConfig(qbs)
{
var llvmInstallDirFromEnv = qbs.getEnv("LLVM_INSTALL_DIR")
var llvmConfigVariants = [
"llvm-config", "llvm-config-3.2", "llvm-config-3.3", "llvm-config-3.4",
"llvm-config-3.5", "llvm-config-3.6", "llvm-config-4.0", "llvm-config-4.1"
];
// Prefer llvm-config* from LLVM_INSTALL_DIR
if (llvmInstallDirFromEnv) {
for (var i = 0; i < llvmConfigVariants.length; ++i) {
var variant = llvmInstallDirFromEnv + "/bin/" + llvmConfigVariants[i];
if (File.exists(variant))
return variant;
}
}
// Find llvm-config* in PATH
var pathListString = qbs.getEnv("PATH");
var separator = qbs.hostOS.contains("windows") ? ";" : ":";
var pathList = pathListString.split(separator);
for (var i = 0; i < llvmConfigVariants.length; ++i) {
for (var j = 0; j < pathList.length; ++j) {
var variant = pathList[j] + "/" + llvmConfigVariants[i];
if (File.exists(variant))
return variant;
}
}
return undefined;
}
function includeDir(llvmConfig, processOutputReader)
{
return processOutputReader.readOutput(llvmConfig, ["--includedir"])
}
function libDir(llvmConfig, processOutputReader)
{
return processOutputReader.readOutput(llvmConfig, ["--libdir"])
}
function version(llvmConfig, processOutputReader)
{
return processOutputReader.readOutput(llvmConfig, ["--version"])
.replace(/(\d+\.\d+\.\d+).*/, "$1")
}
function libraries(targetOS)
{
return ["clang"] + (targetOS.contains("windows") ? ["advapi32", "shell32"] : [])
}
#include <math.h>
#include <assert.h>
#include "sqlite3.h"
static void okapi_bm25(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal) {
assert(sizeof(int) == 4);
unsigned int *matchinfo = (unsigned int *)sqlite3_value_blob(apVal[0]);
int searchTextCol = sqlite3_value_int(apVal[1]);
double K1 = ((nVal >= 3) ? sqlite3_value_double(apVal[2]) : 1.2);
double B = ((nVal >= 4) ? sqlite3_value_double(apVal[3]) : 0.75);
int P_OFFSET = 0;
int C_OFFSET = 1;
int X_OFFSET = 2;
int termCount = matchinfo[P_OFFSET];
int colCount = matchinfo[C_OFFSET];
int N_OFFSET = X_OFFSET + 3*termCount*colCount;
int A_OFFSET = N_OFFSET + 1;
int L_OFFSET = (A_OFFSET + colCount);
double totalDocs = matchinfo[N_OFFSET];
double avgLength = matchinfo[A_OFFSET + searchTextCol];
double docLength = matchinfo[L_OFFSET + searchTextCol];
double sum = 0.0;
for (int i = 0; i < termCount; i++) {
int currentX = X_OFFSET + (3 * searchTextCol * (i + 1));
double termFrequency = matchinfo[currentX];
double docsWithTerm = matchinfo[currentX + 2];
double idf = log(
(totalDocs - docsWithTerm + 0.5) /
(docsWithTerm + 0.5)
);
double rightSide = (
(termFrequency * (K1 + 1)) /
(termFrequency + (K1 * (1 - B + (B * (docLength / avgLength)))))
);
sum += (idf * rightSide);
}
sqlite3_result_double(pCtx, sum);
}
//
// Created by Joshua Wilson on 27/05/14.
// Copyright (c) 2014 Joshua Wilson. All rights reserved.
// https://github.com/neozenith/sqlite-okapi-bm25
//
// This is an extension to the work of "Radford 'rads' Smith"
// found at: https://github.com/rads/sqlite-okapi-bm25
// which is covered by the MIT License
// http://opensource.org/licenses/MIT
// the following code shall also be covered by the same MIT License
static void okapi_bm25f(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal) {
assert(sizeof(int) == 4);
unsigned int *matchinfo = (unsigned int *)sqlite3_value_blob(apVal[0]);
//Setting the default values and ignoring argument based inputs so the extra
//arguments can be the column weights instead.
double K1 = 1.2;// ((nVal >= 3) ? sqlite3_value_double(apVal[2]) : 1.2);
double B = 0.75;// ((nVal >= 4) ? sqlite3_value_double(apVal[3]) : 0.75);
//For a good explanation fo the maths and how to choose these variables
//http://stackoverflow.com/a/23161886/622276
//NOTE: the rearranged order of parameters to match the order presented on
//SQLite3 FTS3 documentation 'pcxnals' (http://www.sqlite.org/fts3.html#matchinfo)
int P_OFFSET = 0;
int C_OFFSET = 1;
int X_OFFSET = 2;
int termCount = matchinfo[P_OFFSET];
int colCount = matchinfo[C_OFFSET];
int N_OFFSET = X_OFFSET + 3*termCount*colCount;
int A_OFFSET = N_OFFSET + 1;
int L_OFFSET = (A_OFFSET + colCount);
// int S_OFFSET = (L_OFFSET + colCount); //useful as a pseudo proximity weighting per field/column
double totalDocs = matchinfo[N_OFFSET];
double avgLength = 0.0;
double docLength = 0.0;
for (int col = 0; col < colCount; col++)
{
avgLength += matchinfo[A_OFFSET + col];
docLength += matchinfo[L_OFFSET + col];
}
double epsilon = 1.0 / (totalDocs*avgLength);
double sum = 0.0;
for (int t = 0; t < termCount; t++) {
for (int col = 0 ; col < colCount; col++)
{
int currentX = X_OFFSET + (3 * col * (t + 1));
double termFrequency = matchinfo[currentX];
double docsWithTerm = matchinfo[currentX + 2];
double idf = log(
(totalDocs - docsWithTerm + 0.5) /
(docsWithTerm + 0.5)
);
// "...terms appearing in more than half of the corpus will provide negative contributions to the final document score."
//http://en.wikipedia.org/wiki/Okapi_BM25
idf = (idf < 0) ? epsilon : idf; //common terms could have no effect (\epsilon=0.0) or a very small effect (\epsilon=1/NoOfTokens which asymptotes to 0.0)
double rightSide = (
(termFrequency * (K1 + 1)) /
(termFrequency + (K1 * (1 - B + (B * (docLength / avgLength)))))
);
rightSide += 1.0;
//To comply with BM25+ that solves a lower bounding issue where large documents that match are unfairly scored as
//having similar relevancy as short documents that do not contain as many terms
//Yuanhua Lv and ChengXiang Zhai. 'Lower-bounding term frequency normalization.' In Proceedings of CIKM'2011, pages 7-16.
//http://sifaka.cs.uiuc.edu/~ylv2/pub/cikm11-lowerbound.pdf
double weight = ((nVal > col+1) ? sqlite3_value_double(apVal[col+1]) : 1.0);
// double subsequence = matchinfo[S_OFFSET + col];
sum += (idf * rightSide) * weight; // * subsequence; //useful as a pseudo proximty weighting
}
}
sqlite3_result_double(pCtx, sum);
}
static void okapi_bm25f_kb(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal) {
assert(sizeof(int) == 4);
unsigned int *matchinfo = (unsigned int *)sqlite3_value_blob(apVal[0]);
//Setting the default values and ignoring argument based inputs so the extra
//arguments can be the column weights instead.
if (nVal < 2) sqlite3_result_error(pCtx, "wrong number of arguments to function okapi_bm25_kb(), expected k1 parameter", -1);
if (nVal < 3) sqlite3_result_error(pCtx, "wrong number of arguments to function okapi_bm25_kb(), expected b parameter", -1);
double K1 = sqlite3_value_double(apVal[1]);
double B = sqlite3_value_double(apVal[2]);
//For a good explanation fo the maths and how to choose these variables
//http://stackoverflow.com/a/23161886/622276
//NOTE: the rearranged order of parameters to match the order presented on
//SQLite3 FTS3 documentation 'pcxnals' (http://www.sqlite.org/fts3.html#matchinfo)
int P_OFFSET = 0;
int C_OFFSET = 1;
int X_OFFSET = 2;
int termCount = matchinfo[P_OFFSET];
int colCount = matchinfo[C_OFFSET];
int N_OFFSET = X_OFFSET + 3*termCount*colCount;
int A_OFFSET = N_OFFSET + 1;
int L_OFFSET = (A_OFFSET + colCount);
// int S_OFFSET = (L_OFFSET + colCount); //useful as a pseudo proximity weighting per field/column
double totalDocs = matchinfo[N_OFFSET];
double avgLength = 0.0;
double docLength = 0.0;
for (int col = 0; col < colCount; col++)
{
avgLength += matchinfo[A_OFFSET + col];
docLength += matchinfo[L_OFFSET + col];
}
double epsilon = 1.0 / (totalDocs*avgLength);
double sum = 0.0;
for (int t = 0; t < termCount; t++) {
for (int col = 0 ; col < colCount; col++)
{
int currentX = X_OFFSET + (3 * col * (t + 1));
double termFrequency = matchinfo[currentX];
double docsWithTerm = matchinfo[currentX + 2];
double idf = log(
(totalDocs - docsWithTerm + 0.5) /
(docsWithTerm + 0.5)
);
// "...terms appearing in more than half of the corpus will provide negative contributions to the final document score."
//http://en.wikipedia.org/wiki/Okapi_BM25
idf = (idf < 0) ? epsilon : idf; //common terms could have no effect (\epsilon=0.0) or a very small effect (\epsilon=1/NoOfTokens which asymptotes to 0.0)
double rightSide = (
(termFrequency * (K1 + 1)) /
(termFrequency + (K1 * (1 - B + (B * (docLength / avgLength)))))
);
rightSide += 1.0;
//To comply with BM25+ that solves a lower bounding issue where large documents that match are unfairly scored as
//having similar relevancy as short documents that do not contain as many terms
//Yuanhua Lv and ChengXiang Zhai. 'Lower-bounding term frequency normalization.' In Proceedings of CIKM'2011, pages 7-16.
//http://sifaka.cs.uiuc.edu/~ylv2/pub/cikm11-lowerbound.pdf
double weight = ((nVal > col+3) ? sqlite3_value_double(apVal[col+3]) : 1.0);
// double subsequence = matchinfo[S_OFFSET + col];
sum += (idf * rightSide) * weight; // * subsequence; //useful as a pseudo proximty weighting
}
}
sqlite3_result_double(pCtx, sum);
}
INCLUDEPATH *= $$PWD
VPATH *= $$PWD
HEADERS += okapi_bm25.h \
sqlite3.h \
sqlite3ext.h
SOURCES += sqlite3.c
win32:DEFINES += SQLITE_API=__declspec(dllexport)
unix:DEFINES += SQLITE_API=\"__attribute__((visibility(\\\"default\\\")))\"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://www.qt.io/licensing. For further information
** use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
****************************************************************************/
#include "cmbalivecommand.h"
#include <QtDebug>
#include <QDataStream>
namespace CodeModelBackEnd {
QDataStream &operator<<(QDataStream &out, const AliveCommand &/*command*/)
{
return out;
}
QDataStream &operator>>(QDataStream &in, AliveCommand &/*command*/)
{
return in;
}
bool operator == (const AliveCommand &/*first*/, const AliveCommand &/*second*/)
{
return true;
}
bool operator < (const AliveCommand &/*first*/, const AliveCommand &/*second*/)
{
return true;
}
QDebug operator <<(QDebug debug, const AliveCommand &/*command*/)
{
return debug.nospace() << "AliveCommand()";
}
}
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://www.qt.io/licensing. For further information
** use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
****************************************************************************/
#ifndef CMBALIVECOMMAND_H
#define CMBALIVECOMMAND_H
#include "codemodelbackendipc_global.h"
#include <QMetaType>
namespace CodeModelBackEnd {
class CMBIPC_EXPORT AliveCommand
{
};
CMBIPC_EXPORT QDataStream &operator<<(QDataStream &out, const AliveCommand &command);
CMBIPC_EXPORT QDataStream &operator>>(QDataStream &in, AliveCommand &command);
CMBIPC_EXPORT bool operator == (const AliveCommand &first, const AliveCommand &second);
CMBIPC_EXPORT bool operator < (const AliveCommand &first, const AliveCommand &second);
CMBIPC_EXPORT QDebug operator <<(QDebug debug, const AliveCommand &command);
}
Q_DECLARE_METATYPE(CodeModelBackEnd::AliveCommand)
#endif // CMBALIVECOMMAND_H
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://www.qt.io/licensing. For further information
** use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
****************************************************************************/
#include "cmbcodecompletedcommand.h"
#include <QDebug>
#include <QDataStream>
#include <ostream>
namespace CodeModelBackEnd {
CodeCompletedCommand::CodeCompletedCommand(const QVector<CodeCompletion> &codeCompletions, quint64 ticketNumber)
: codeCompletions_(codeCompletions),
ticketNumber_(ticketNumber)
{
}
const QVector<CodeCompletion> &CodeCompletedCommand::codeCompletions() const
{
return codeCompletions_;
}
quint64 CodeCompletedCommand::ticketNumber() const
{
return ticketNumber_;
}
QDataStream &operator<<(QDataStream &out, const CodeCompletedCommand &command)
{
out << command.codeCompletions_;
out << command.ticketNumber_;
return out;
}
QDataStream &operator>>(QDataStream &in, CodeCompletedCommand &command)
{
in >> command.codeCompletions_;
in >> command.ticketNumber_;
return in;
}
bool operator == (const CodeCompletedCommand &first, const CodeCompletedCommand &second)
{
return first.ticketNumber_ == second.ticketNumber_
&& first.codeCompletions_ == second.codeCompletions_;
}
bool operator < (const CodeCompletedCommand &first, const CodeCompletedCommand &second)
{
return first.ticketNumber_ < second.ticketNumber_;
}
QDebug operator <<(QDebug debug, const CodeCompletedCommand &command)
{
debug.nospace() << "CodeCompletedCommand(";
debug.nospace() << command.codeCompletions_ << ", " << command.ticketNumber_;
debug.nospace() << ")";
return debug;
}
void PrintTo(const CodeCompletedCommand &command, ::std::ostream* os)
{
QString output;
QDebug debug(&output);
debug << command;
*os << output.toUtf8().constData();
}
} // namespace CodeModelBackEnd
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://www.qt.io/licensing. For further information
** use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
****************************************************************************/
#ifndef CODEMODELBACKEND_CODECOMPLETEDCOMMAND_H
#define CODEMODELBACKEND_CODECOMPLETEDCOMMAND_H
#include <QMetaType>
#include <QVector>
#include "codecompletion.h"
namespace CodeModelBackEnd {
class CMBIPC_EXPORT CodeCompletedCommand
{
friend CMBIPC_EXPORT QDataStream &operator<<(QDataStream &out, const CodeCompletedCommand &command);
friend CMBIPC_EXPORT QDataStream &operator>>(QDataStream &in, CodeCompletedCommand &command);
friend CMBIPC_EXPORT bool operator == (const CodeCompletedCommand &first, const CodeCompletedCommand &second);
friend CMBIPC_EXPORT bool operator < (const CodeCompletedCommand &first, const CodeCompletedCommand &second);