Commit a3c0f0f6 authored by Eike Ziller's avatar Eike Ziller

MapReduce refactoring and improvements

- QThread instead of std::thread for better integration with Qt
- Use thread pool for recycling threads
- Map and reduce functions are handled like any function passed
  to runAsync, so they either report results through the
  QFutureInterface, or through the return value.
- Automatically deduce the reduce result type

Change-Id: I7a31370c21f8c27b378cd87c3d5974b162449ce1
Reviewed-by: default avatarTobias Hunger <tobias.hunger@theqtcompany.com>
parent 36a61a94
...@@ -83,8 +83,8 @@ class FileSearch ...@@ -83,8 +83,8 @@ class FileSearch
public: public:
FileSearch(const QString &searchTerm, QTextDocument::FindFlags flags, FileSearch(const QString &searchTerm, QTextDocument::FindFlags flags,
QMap<QString, QString> fileToContentsMap); QMap<QString, QString> fileToContentsMap);
FileSearchResultList operator()(QFutureInterface<FileSearchResultList> futureInterface, void operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const; const FileIterator::Item &item) const;
private: private:
QMap<QString, QString> fileToContentsMap; QMap<QString, QString> fileToContentsMap;
...@@ -104,8 +104,8 @@ public: ...@@ -104,8 +104,8 @@ public:
FileSearchRegExp(const QString &searchTerm, QTextDocument::FindFlags flags, FileSearchRegExp(const QString &searchTerm, QTextDocument::FindFlags flags,
QMap<QString, QString> fileToContentsMap); QMap<QString, QString> fileToContentsMap);
FileSearchRegExp(const FileSearchRegExp &other); FileSearchRegExp(const FileSearchRegExp &other);
FileSearchResultList operator()(QFutureInterface<FileSearchResultList> futureInterface, void operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const; const FileIterator::Item &item) const;
private: private:
QRegularExpressionMatch doGuardedMatch(const QString &line, int offset) const; QRegularExpressionMatch doGuardedMatch(const QString &line, int offset) const;
...@@ -129,17 +129,21 @@ FileSearch::FileSearch(const QString &searchTerm, QTextDocument::FindFlags flags ...@@ -129,17 +129,21 @@ FileSearch::FileSearch(const QString &searchTerm, QTextDocument::FindFlags flags
termDataUpper = searchTermUpper.constData(); termDataUpper = searchTermUpper.constData();
} }
FileSearchResultList FileSearch::operator()(QFutureInterface<FileSearchResultList> futureInterface, void FileSearch::operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const const FileIterator::Item &item) const
{ {
FileSearchResultList results;
if (futureInterface.isCanceled()) if (futureInterface.isCanceled())
return results; return;
futureInterface.setProgressRange(0, 1);
futureInterface.setProgressValue(0);
FileSearchResultList results;
QFile file; QFile file;
QTextStream stream; QTextStream stream;
QString tempString; QString tempString;
if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap)) if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap)) {
return results; futureInterface.cancel(); // failure
return;
}
int lineNr = 0; int lineNr = 0;
while (!stream.atEnd()) { while (!stream.atEnd()) {
...@@ -211,7 +215,10 @@ FileSearchResultList FileSearch::operator()(QFutureInterface<FileSearchResultLis ...@@ -211,7 +215,10 @@ FileSearchResultList FileSearch::operator()(QFutureInterface<FileSearchResultLis
} }
if (file.isOpen()) if (file.isOpen())
file.close(); file.close();
return results; if (!futureInterface.isCanceled()) {
futureInterface.reportResult(results);
futureInterface.setProgressValue(1);
}
} }
FileSearchRegExp::FileSearchRegExp(const QString &searchTerm, QTextDocument::FindFlags flags, FileSearchRegExp::FileSearchRegExp(const QString &searchTerm, QTextDocument::FindFlags flags,
...@@ -238,17 +245,21 @@ QRegularExpressionMatch FileSearchRegExp::doGuardedMatch(const QString &line, in ...@@ -238,17 +245,21 @@ QRegularExpressionMatch FileSearchRegExp::doGuardedMatch(const QString &line, in
return expression.match(line, offset); return expression.match(line, offset);
} }
FileSearchResultList FileSearchRegExp::operator()(QFutureInterface<FileSearchResultList> futureInterface, void FileSearchRegExp::operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const const FileIterator::Item &item) const
{ {
FileSearchResultList results;
if (futureInterface.isCanceled()) if (futureInterface.isCanceled())
return results; return;
futureInterface.setProgressRange(0, 1);
futureInterface.setProgressValue(0);
FileSearchResultList results;
QFile file; QFile file;
QTextStream stream; QTextStream stream;
QString tempString; QString tempString;
if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap)) if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap)) {
return results; futureInterface.cancel(); // failure
return;
}
int lineNr = 0; int lineNr = 0;
QString line; QString line;
...@@ -277,7 +288,10 @@ FileSearchResultList FileSearchRegExp::operator()(QFutureInterface<FileSearchRes ...@@ -277,7 +288,10 @@ FileSearchResultList FileSearchRegExp::operator()(QFutureInterface<FileSearchRes
} }
if (file.isOpen()) if (file.isOpen())
file.close(); file.close();
return results; if (!futureInterface.isCanceled()) {
futureInterface.reportResult(results);
futureInterface.setProgressValue(1);
}
} }
struct SearchState struct SearchState
...@@ -345,7 +359,7 @@ void cleanUpFileSearch(QFutureInterface<FileSearchResultList> &futureInterface, ...@@ -345,7 +359,7 @@ void cleanUpFileSearch(QFutureInterface<FileSearchResultList> &futureInterface,
QFuture<FileSearchResultList> Utils::findInFiles(const QString &searchTerm, FileIterator *files, QFuture<FileSearchResultList> Utils::findInFiles(const QString &searchTerm, FileIterator *files,
QTextDocument::FindFlags flags, QMap<QString, QString> fileToContentsMap) QTextDocument::FindFlags flags, QMap<QString, QString> fileToContentsMap)
{ {
return mapReduce<FileSearchResultList>(std::cref(*files), return mapReduce(std::cref(*files),
[searchTerm, files](QFutureInterface<FileSearchResultList> &futureInterface) { [searchTerm, files](QFutureInterface<FileSearchResultList> &futureInterface) {
return initFileSearch(futureInterface, searchTerm, files); return initFileSearch(futureInterface, searchTerm, files);
}, },
...@@ -357,7 +371,7 @@ QFuture<FileSearchResultList> Utils::findInFiles(const QString &searchTerm, File ...@@ -357,7 +371,7 @@ QFuture<FileSearchResultList> Utils::findInFiles(const QString &searchTerm, File
QFuture<FileSearchResultList> Utils::findInFilesRegExp(const QString &searchTerm, FileIterator *files, QFuture<FileSearchResultList> Utils::findInFilesRegExp(const QString &searchTerm, FileIterator *files,
QTextDocument::FindFlags flags, QMap<QString, QString> fileToContentsMap) QTextDocument::FindFlags flags, QMap<QString, QString> fileToContentsMap)
{ {
return mapReduce<FileSearchResultList>(std::cref(*files), return mapReduce(std::cref(*files),
[searchTerm, files](QFutureInterface<FileSearchResultList> &futureInterface) { [searchTerm, files](QFutureInterface<FileSearchResultList> &futureInterface) {
return initFileSearch(futureInterface, searchTerm, files); return initFileSearch(futureInterface, searchTerm, files);
}, },
......
...@@ -25,124 +25,167 @@ ...@@ -25,124 +25,167 @@
#pragma once #pragma once
#include "qtcassert.h" #include "runextensions.h"
#include <QFuture> #include <QFutureWatcher>
#include <QFutureInterface>
#include <chrono>
#include <future>
#include <thread>
#include <vector>
namespace Utils { namespace Utils {
template<typename T>
typename std::vector<std::future<T>>::iterator
waitForAny(std::vector<std::future<T>> &futures)
{
// Wait for any future to have a result ready.
// Unfortunately we have to do that in a busy loop because future doesn't have a feature to
// wait for any of a set of futures (yet? possibly when_any in C++17).
auto end = futures.end();
QTC_ASSERT(!futures.empty(), return end);
auto futureIterator = futures.begin();
forever {
if (futureIterator->wait_for(std::chrono::duration<quint64>::zero()) == std::future_status::ready)
return futureIterator;
++futureIterator;
if (futureIterator == end)
futureIterator = futures.begin();
}
}
namespace Internal { namespace Internal {
template<typename T> // TODO: try to use this for replacing MultiTask
void swapErase(std::vector<T> &vec, typename std::vector<T>::iterator it)
{
// efficient erasing by swapping with back element
*it = std::move(vec.back());
vec.pop_back();
}
template <typename MapResult, typename State, typename ReduceResult, typename ReduceFunction> class MapReduceBase : public QObject
void reduceOne(QFutureInterface<ReduceResult> &futureInterface,
std::vector<std::future<MapResult>> &futures,
State &state, const ReduceFunction &reduce)
{ {
auto futureIterator = waitForAny(futures); Q_OBJECT
if (futureIterator != futures.end()) { };
reduce(futureInterface, state, futureIterator->get());
swapErase(futures, futureIterator);
}
}
// This together with reduceOne can be replaced by std::transformReduce (parallelism TS)
// when that becomes widely available in C++ implementations
template <typename Container, typename MapFunction, typename State, typename ReduceResult, typename ReduceFunction> template <typename Container, typename MapFunction, typename State, typename ReduceResult, typename ReduceFunction>
void mapReduceLoop(QFutureInterface<ReduceResult> &futureInterface, const Container &container, class MapReduce : public MapReduceBase
const MapFunction &map, State &state, const ReduceFunction &reduce)
{ {
const unsigned MAX_THREADS = std::thread::hardware_concurrency(); using MapResult = typename Internal::resultType<MapFunction>::type;
using MapResult = typename std::result_of<MapFunction(QFutureInterface<ReduceResult>,typename Container::value_type)>::type; using Iterator = typename Container::const_iterator;
std::vector<std::future<MapResult>> futures;
futures.reserve(MAX_THREADS); public:
auto fileIterator = container.begin(); MapReduce(QFutureInterface<ReduceResult> futureInterface, const Container &container,
auto end = container.end(); const MapFunction &map, State &state, const ReduceFunction &reduce)
while (!futureInterface.isCanceled() && (fileIterator != end || futures.size() != 0)) { : m_futureInterface(futureInterface),
if (futures.size() >= MAX_THREADS || fileIterator == end) { m_container(container),
// We don't want to start a new thread (yet), so try to find a future that is ready and m_iterator(m_container.begin()),
// handle its result. m_map(map),
reduceOne(futureInterface, futures, state, reduce); m_state(state),
} else { // start a new thread m_reduce(reduce)
futures.push_back(std::async(std::launch::async, {
map, futureInterface, *fileIterator)); connect(&m_selfWatcher, &QFutureWatcher<void>::canceled,
++fileIterator; this, &MapReduce::cancelAll);
m_selfWatcher.setFuture(futureInterface.future());
}
void exec()
{
if (schedule()) // do not enter event loop for empty containers
m_loop.exec();
}
private:
bool schedule()
{
bool didSchedule = false;
while (m_iterator != m_container.end() && m_mapWatcher.size() < QThread::idealThreadCount()) {
didSchedule = true;
auto watcher = new QFutureWatcher<MapResult>();
connect(watcher, &QFutureWatcher<MapResult>::finished, this, [this, watcher]() {
mapFinished(watcher);
});
m_mapWatcher.append(watcher);
watcher->setFuture(runAsync(&m_threadPool, m_map, *m_iterator));
++m_iterator;
} }
return didSchedule;
} }
}
void mapFinished(QFutureWatcher<MapResult> *watcher)
{
m_mapWatcher.removeAll(watcher); // remove so we can schedule next one
bool didSchedule = false;
if (!m_futureInterface.isCanceled()) {
// first schedule the next map...
didSchedule = schedule();
// ...then reduce
const int resultCount = watcher->future().resultCount();
for (int i = 0; i < resultCount; ++i) {
Internal::runAsyncImpl(m_futureInterface, m_reduce, m_state, watcher->future().resultAt(i));
}
}
delete watcher;
if (!didSchedule && m_mapWatcher.isEmpty())
m_loop.quit();
}
void cancelAll()
{
foreach (QFutureWatcher<MapResult> *watcher, m_mapWatcher)
watcher->cancel();
}
QFutureWatcher<void> m_selfWatcher;
QFutureInterface<ReduceResult> m_futureInterface;
const Container &m_container;
Iterator m_iterator;
const MapFunction &m_map;
State &m_state;
const ReduceFunction &m_reduce;
QEventLoop m_loop;
QThreadPool m_threadPool; // for reusing threads
QList<QFutureWatcher<MapResult> *> m_mapWatcher;
};
template <typename Container, typename InitFunction, typename MapFunction, typename ReduceResult, template <typename Container, typename InitFunction, typename MapFunction, typename ReduceResult,
typename ReduceFunction, typename CleanUpFunction> typename ReduceFunction, typename CleanUpFunction>
void blockingMapReduce(QFutureInterface<ReduceResult> futureInterface, const Container &container, void blockingMapReduce(QFutureInterface<ReduceResult> &futureInterface, const Container &container,
const InitFunction &init, const MapFunction &map, const InitFunction &init, const MapFunction &map,
const ReduceFunction &reduce, const CleanUpFunction &cleanup) const ReduceFunction &reduce, const CleanUpFunction &cleanup)
{ {
auto state = init(futureInterface); auto state = init(futureInterface);
mapReduceLoop(futureInterface, container, map, state, reduce); MapReduce<Container, MapFunction, decltype(state), ReduceResult, ReduceFunction> mr(futureInterface, container, map, state, reduce);
mr.exec();
cleanup(futureInterface, state); cleanup(futureInterface, state);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
} }
} // Internal } // Internal
template <typename ReduceResult, typename Container, typename InitFunction, typename MapFunction, template <typename Container, typename InitFunction, typename MapFunction,
typename ReduceFunction, typename CleanUpFunction> typename ReduceFunction, typename CleanUpFunction,
QFuture<ReduceResult> mapReduce(std::reference_wrapper<Container> containerWrapper, typename ReduceResult = typename Internal::resultType<ReduceFunction>::type>
QFuture<ReduceResult>
mapReduce(std::reference_wrapper<Container> containerWrapper,
const InitFunction &init, const MapFunction &map, const InitFunction &init, const MapFunction &map,
const ReduceFunction &reduce, const CleanUpFunction &cleanup) const ReduceFunction &reduce, const CleanUpFunction &cleanup)
{ {
auto fi = QFutureInterface<ReduceResult>(); return runAsync(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
QFuture<ReduceResult> future = fi.future(); containerWrapper, init, map, reduce, cleanup);
fi.reportStarted();
std::thread(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
fi, containerWrapper, init, map, reduce, cleanup).detach();
return future;
} }
template <typename ReduceResult, typename Container, typename InitFunction, typename MapFunction, /*!
typename ReduceFunction, typename CleanUpFunction> Calls the map function on all items in \a container in parallel through Utils::runAsync.
QFuture<ReduceResult> mapReduce(const Container &container, const InitFunction &init, const MapFunction &map,
The reduce function is called in the mapReduce thread with each of the reported results from
the map function, in arbitrary order, but never in parallel.
It gets passed a reference to a user defined state object, and a result from the map function.
If it takes a QFutureInterface reference as its first argument, it can report results
for the mapReduce operation through that. Otherwise, any values returned by the reduce function
are reported as results of the mapReduce operation.
The init function is called in the mapReduce thread before the actual mapping starts,
and must return the initial state object for the reduce function. It gets the QFutureInterface
of the mapReduce operation passed as an argument.
The cleanup function is called in the mapReduce thread after all map and reduce calls have
finished, with the QFutureInterface of the mapReduce operation and the final state object
as arguments, and can be used to clean up any resources, or report a final result of the
mapReduce.
Container<ItemType>
StateType InitFunction(QFutureInterface<ReduceResultType>&)
void MapFunction(QFutureInterface<MapResultType>&, const ItemType&)
or
MapResultType MapFunction(const ItempType&)
void ReduceFunction(QFutureInterface<ReduceResultType>&, StateType&, const ItemType&)
or
ReduceResultType ReduceFunction(StateType&, const ItemType&)
void CleanUpFunction(QFutureInterface<ReduceResultType>&, StateType&)
*/
template <typename Container, typename InitFunction, typename MapFunction,
typename ReduceFunction, typename CleanUpFunction,
typename ReduceResult = typename Internal::resultType<ReduceFunction>::type>
QFuture<ReduceResult>
mapReduce(const Container &container, const InitFunction &init, const MapFunction &map,
const ReduceFunction &reduce, const CleanUpFunction &cleanup) const ReduceFunction &reduce, const CleanUpFunction &cleanup)
{ {
auto fi = QFutureInterface<ReduceResult>(); return runAsync(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
QFuture<ReduceResult> future = fi.future(); container, init, map, reduce, cleanup);
std::thread(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
fi, container, init, map, reduce, cleanup).detach();
return future;
} }
} // Utils } // Utils
...@@ -222,12 +222,9 @@ private: ...@@ -222,12 +222,9 @@ private:
// void function that does not take QFutureInterface // void function that does not take QFutureInterface
template <typename ResultType, typename Function, typename... Args> template <typename ResultType, typename Function, typename... Args>
void runAsyncReturnVoidDispatch(std::true_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args) void runAsyncReturnVoidDispatch(std::true_type, QFutureInterface<ResultType>, Function &&function, Args&&... args)
{ {
function(std::forward<Args>(args)...); function(std::forward<Args>(args)...);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
} }
// non-void function that does not take QFutureInterface // non-void function that does not take QFutureInterface
...@@ -235,9 +232,6 @@ template <typename ResultType, typename Function, typename... Args> ...@@ -235,9 +232,6 @@ template <typename ResultType, typename Function, typename... Args>
void runAsyncReturnVoidDispatch(std::false_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args) void runAsyncReturnVoidDispatch(std::false_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args)
{ {
futureInterface.reportResult(function(std::forward<Args>(args)...)); futureInterface.reportResult(function(std::forward<Args>(args)...));
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
} }
// function that takes QFutureInterface // function that takes QFutureInterface
...@@ -245,9 +239,6 @@ template <typename ResultType, typename Function, typename... Args> ...@@ -245,9 +239,6 @@ template <typename ResultType, typename Function, typename... Args>
void runAsyncQFutureInterfaceDispatch(std::true_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args) void runAsyncQFutureInterfaceDispatch(std::true_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args)
{ {
function(futureInterface, std::forward<Args>(args)...); function(futureInterface, std::forward<Args>(args)...);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
} }
// function that does not take QFutureInterface // function that does not take QFutureInterface
...@@ -374,6 +365,9 @@ private: ...@@ -374,6 +365,9 @@ private:
{ {
// invalidates data, which is moved into the call // invalidates data, which is moved into the call
runAsyncImpl(futureInterface, std::move(std::get<index>(data))...); runAsyncImpl(futureInterface, std::move(std::get<index>(data))...);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
} }
Data data; Data data;
......
...@@ -17,6 +17,7 @@ SUBDIRS += \ ...@@ -17,6 +17,7 @@ SUBDIRS += \
json \ json \
utils \ utils \
filesearch \ filesearch \
mapreduce \
runextensions \ runextensions \
sdktool \ sdktool \
valgrind valgrind
......
QTC_LIB_DEPENDS += utils
include(../qttest.pri)
# Input
SOURCES += tst_mapreduce.cpp
HEADERS += $$IDE_SOURCE_TREE/src/libs/utils/mapreduce.h
import qbs
QtcAutotest {
name: "Map reduce autotest"
Depends { name: "Utils" }
files: [
"tst_mapreduce.cpp",
]
}
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3 as published by the Free Software
** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-3.0.html.
**
****************************************************************************/
#include <utils/algorithm.h>
#include <utils/mapreduce.h>
#include <QtTest>
class tst_MapReduce : public QObject
{
Q_OBJECT
private slots:
void mapReduce();
};
static int returnxx(int x)
{
return x*x;
}
static void returnxxThroughFutureInterface(QFutureInterface<int> &fi, int x)
{
fi.reportResult(x*x);
}
void tst_MapReduce::mapReduce()
{
const auto dummyInit = [](QFutureInterface<double>& fi) -> double {
fi.reportResult(0.);
return 0.;
};
const auto reduceWithFutureInterface = [](QFutureInterface<double>& fi, double &state, int value) {
state += value;
fi.reportResult(value);
};
const auto reduceWithReturn = [](double &state, int value) -> double {
state += value;
return value;
};
const auto cleanupHalfState = [](QFutureInterface<double> &fi, double &state) {
state /= 2.;
fi.reportResult(state);
};
// TODO: cannot use function returnxx without pointer here because of decayCopy of arguments in runAsync