Commit a3c0f0f6 authored by Eike Ziller's avatar Eike Ziller

MapReduce refactoring and improvements

- QThread instead of std::thread for better integration with Qt
- Use thread pool for recycling threads
- Map and reduce functions are handled like any function passed
  to runAsync, so they either report results through the
  QFutureInterface, or through the return value.
- Automatically deduce the reduce result type

Change-Id: I7a31370c21f8c27b378cd87c3d5974b162449ce1
Reviewed-by: default avatarTobias Hunger <tobias.hunger@theqtcompany.com>
parent 36a61a94
......@@ -83,7 +83,7 @@ class FileSearch
public:
FileSearch(const QString &searchTerm, QTextDocument::FindFlags flags,
QMap<QString, QString> fileToContentsMap);
FileSearchResultList operator()(QFutureInterface<FileSearchResultList> futureInterface,
void operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const;
private:
......@@ -104,7 +104,7 @@ public:
FileSearchRegExp(const QString &searchTerm, QTextDocument::FindFlags flags,
QMap<QString, QString> fileToContentsMap);
FileSearchRegExp(const FileSearchRegExp &other);
FileSearchResultList operator()(QFutureInterface<FileSearchResultList> futureInterface,
void operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const;
private:
......@@ -129,17 +129,21 @@ FileSearch::FileSearch(const QString &searchTerm, QTextDocument::FindFlags flags
termDataUpper = searchTermUpper.constData();
}
FileSearchResultList FileSearch::operator()(QFutureInterface<FileSearchResultList> futureInterface,
void FileSearch::operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const
{
FileSearchResultList results;
if (futureInterface.isCanceled())
return results;
return;
futureInterface.setProgressRange(0, 1);
futureInterface.setProgressValue(0);
FileSearchResultList results;
QFile file;
QTextStream stream;
QString tempString;
if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap))
return results;
if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap)) {
futureInterface.cancel(); // failure
return;
}
int lineNr = 0;
while (!stream.atEnd()) {
......@@ -211,7 +215,10 @@ FileSearchResultList FileSearch::operator()(QFutureInterface<FileSearchResultLis
}
if (file.isOpen())
file.close();
return results;
if (!futureInterface.isCanceled()) {
futureInterface.reportResult(results);
futureInterface.setProgressValue(1);
}
}
FileSearchRegExp::FileSearchRegExp(const QString &searchTerm, QTextDocument::FindFlags flags,
......@@ -238,17 +245,21 @@ QRegularExpressionMatch FileSearchRegExp::doGuardedMatch(const QString &line, in
return expression.match(line, offset);
}
FileSearchResultList FileSearchRegExp::operator()(QFutureInterface<FileSearchResultList> futureInterface,
void FileSearchRegExp::operator()(QFutureInterface<FileSearchResultList> &futureInterface,
const FileIterator::Item &item) const
{
FileSearchResultList results;
if (futureInterface.isCanceled())
return results;
return;
futureInterface.setProgressRange(0, 1);
futureInterface.setProgressValue(0);
FileSearchResultList results;
QFile file;
QTextStream stream;
QString tempString;
if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap))
return results;
if (!openStream(item.filePath, item.encoding, &stream, &file, &tempString, fileToContentsMap)) {
futureInterface.cancel(); // failure
return;
}
int lineNr = 0;
QString line;
......@@ -277,7 +288,10 @@ FileSearchResultList FileSearchRegExp::operator()(QFutureInterface<FileSearchRes
}
if (file.isOpen())
file.close();
return results;
if (!futureInterface.isCanceled()) {
futureInterface.reportResult(results);
futureInterface.setProgressValue(1);
}
}
struct SearchState
......@@ -345,7 +359,7 @@ void cleanUpFileSearch(QFutureInterface<FileSearchResultList> &futureInterface,
QFuture<FileSearchResultList> Utils::findInFiles(const QString &searchTerm, FileIterator *files,
QTextDocument::FindFlags flags, QMap<QString, QString> fileToContentsMap)
{
return mapReduce<FileSearchResultList>(std::cref(*files),
return mapReduce(std::cref(*files),
[searchTerm, files](QFutureInterface<FileSearchResultList> &futureInterface) {
return initFileSearch(futureInterface, searchTerm, files);
},
......@@ -357,7 +371,7 @@ QFuture<FileSearchResultList> Utils::findInFiles(const QString &searchTerm, File
QFuture<FileSearchResultList> Utils::findInFilesRegExp(const QString &searchTerm, FileIterator *files,
QTextDocument::FindFlags flags, QMap<QString, QString> fileToContentsMap)
{
return mapReduce<FileSearchResultList>(std::cref(*files),
return mapReduce(std::cref(*files),
[searchTerm, files](QFutureInterface<FileSearchResultList> &futureInterface) {
return initFileSearch(futureInterface, searchTerm, files);
},
......
......@@ -25,124 +25,167 @@
#pragma once
#include "qtcassert.h"
#include "runextensions.h"
#include <QFuture>
#include <QFutureInterface>
#include <chrono>
#include <future>
#include <thread>
#include <vector>
#include <QFutureWatcher>
namespace Utils {
template<typename T>
typename std::vector<std::future<T>>::iterator
waitForAny(std::vector<std::future<T>> &futures)
{
// Wait for any future to have a result ready.
// Unfortunately we have to do that in a busy loop because future doesn't have a feature to
// wait for any of a set of futures (yet? possibly when_any in C++17).
auto end = futures.end();
QTC_ASSERT(!futures.empty(), return end);
auto futureIterator = futures.begin();
forever {
if (futureIterator->wait_for(std::chrono::duration<quint64>::zero()) == std::future_status::ready)
return futureIterator;
++futureIterator;
if (futureIterator == end)
futureIterator = futures.begin();
}
}
namespace Internal {
template<typename T>
void swapErase(std::vector<T> &vec, typename std::vector<T>::iterator it)
{
// efficient erasing by swapping with back element
*it = std::move(vec.back());
vec.pop_back();
}
// TODO: try to use this for replacing MultiTask
template <typename MapResult, typename State, typename ReduceResult, typename ReduceFunction>
void reduceOne(QFutureInterface<ReduceResult> &futureInterface,
std::vector<std::future<MapResult>> &futures,
State &state, const ReduceFunction &reduce)
class MapReduceBase : public QObject
{
auto futureIterator = waitForAny(futures);
if (futureIterator != futures.end()) {
reduce(futureInterface, state, futureIterator->get());
swapErase(futures, futureIterator);
}
}
Q_OBJECT
};
// This together with reduceOne can be replaced by std::transformReduce (parallelism TS)
// when that becomes widely available in C++ implementations
template <typename Container, typename MapFunction, typename State, typename ReduceResult, typename ReduceFunction>
void mapReduceLoop(QFutureInterface<ReduceResult> &futureInterface, const Container &container,
const MapFunction &map, State &state, const ReduceFunction &reduce)
class MapReduce : public MapReduceBase
{
const unsigned MAX_THREADS = std::thread::hardware_concurrency();
using MapResult = typename std::result_of<MapFunction(QFutureInterface<ReduceResult>,typename Container::value_type)>::type;
std::vector<std::future<MapResult>> futures;
futures.reserve(MAX_THREADS);
auto fileIterator = container.begin();
auto end = container.end();
while (!futureInterface.isCanceled() && (fileIterator != end || futures.size() != 0)) {
if (futures.size() >= MAX_THREADS || fileIterator == end) {
// We don't want to start a new thread (yet), so try to find a future that is ready and
// handle its result.
reduceOne(futureInterface, futures, state, reduce);
} else { // start a new thread
futures.push_back(std::async(std::launch::async,
map, futureInterface, *fileIterator));
++fileIterator;
using MapResult = typename Internal::resultType<MapFunction>::type;
using Iterator = typename Container::const_iterator;
public:
MapReduce(QFutureInterface<ReduceResult> futureInterface, const Container &container,
const MapFunction &map, State &state, const ReduceFunction &reduce)
: m_futureInterface(futureInterface),
m_container(container),
m_iterator(m_container.begin()),
m_map(map),
m_state(state),
m_reduce(reduce)
{
connect(&m_selfWatcher, &QFutureWatcher<void>::canceled,
this, &MapReduce::cancelAll);
m_selfWatcher.setFuture(futureInterface.future());
}
void exec()
{
if (schedule()) // do not enter event loop for empty containers
m_loop.exec();
}
private:
bool schedule()
{
bool didSchedule = false;
while (m_iterator != m_container.end() && m_mapWatcher.size() < QThread::idealThreadCount()) {
didSchedule = true;
auto watcher = new QFutureWatcher<MapResult>();
connect(watcher, &QFutureWatcher<MapResult>::finished, this, [this, watcher]() {
mapFinished(watcher);
});
m_mapWatcher.append(watcher);
watcher->setFuture(runAsync(&m_threadPool, m_map, *m_iterator));
++m_iterator;
}
return didSchedule;
}
void mapFinished(QFutureWatcher<MapResult> *watcher)
{
m_mapWatcher.removeAll(watcher); // remove so we can schedule next one
bool didSchedule = false;
if (!m_futureInterface.isCanceled()) {
// first schedule the next map...
didSchedule = schedule();
// ...then reduce
const int resultCount = watcher->future().resultCount();
for (int i = 0; i < resultCount; ++i) {
Internal::runAsyncImpl(m_futureInterface, m_reduce, m_state, watcher->future().resultAt(i));
}
}
delete watcher;
if (!didSchedule && m_mapWatcher.isEmpty())
m_loop.quit();
}
}
void cancelAll()
{
foreach (QFutureWatcher<MapResult> *watcher, m_mapWatcher)
watcher->cancel();
}
QFutureWatcher<void> m_selfWatcher;
QFutureInterface<ReduceResult> m_futureInterface;
const Container &m_container;
Iterator m_iterator;
const MapFunction &m_map;
State &m_state;
const ReduceFunction &m_reduce;
QEventLoop m_loop;
QThreadPool m_threadPool; // for reusing threads
QList<QFutureWatcher<MapResult> *> m_mapWatcher;
};
template <typename Container, typename InitFunction, typename MapFunction, typename ReduceResult,
typename ReduceFunction, typename CleanUpFunction>
void blockingMapReduce(QFutureInterface<ReduceResult> futureInterface, const Container &container,
void blockingMapReduce(QFutureInterface<ReduceResult> &futureInterface, const Container &container,
const InitFunction &init, const MapFunction &map,
const ReduceFunction &reduce, const CleanUpFunction &cleanup)
{
auto state = init(futureInterface);
mapReduceLoop(futureInterface, container, map, state, reduce);
MapReduce<Container, MapFunction, decltype(state), ReduceResult, ReduceFunction> mr(futureInterface, container, map, state, reduce);
mr.exec();
cleanup(futureInterface, state);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
}
} // Internal
template <typename ReduceResult, typename Container, typename InitFunction, typename MapFunction,
typename ReduceFunction, typename CleanUpFunction>
QFuture<ReduceResult> mapReduce(std::reference_wrapper<Container> containerWrapper,
template <typename Container, typename InitFunction, typename MapFunction,
typename ReduceFunction, typename CleanUpFunction,
typename ReduceResult = typename Internal::resultType<ReduceFunction>::type>
QFuture<ReduceResult>
mapReduce(std::reference_wrapper<Container> containerWrapper,
const InitFunction &init, const MapFunction &map,
const ReduceFunction &reduce, const CleanUpFunction &cleanup)
{
auto fi = QFutureInterface<ReduceResult>();
QFuture<ReduceResult> future = fi.future();
fi.reportStarted();
std::thread(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
fi, containerWrapper, init, map, reduce, cleanup).detach();
return future;
return runAsync(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
containerWrapper, init, map, reduce, cleanup);
}
template <typename ReduceResult, typename Container, typename InitFunction, typename MapFunction,
typename ReduceFunction, typename CleanUpFunction>
QFuture<ReduceResult> mapReduce(const Container &container, const InitFunction &init, const MapFunction &map,
/*!
Calls the map function on all items in \a container in parallel through Utils::runAsync.
The reduce function is called in the mapReduce thread with each of the reported results from
the map function, in arbitrary order, but never in parallel.
It gets passed a reference to a user defined state object, and a result from the map function.
If it takes a QFutureInterface reference as its first argument, it can report results
for the mapReduce operation through that. Otherwise, any values returned by the reduce function
are reported as results of the mapReduce operation.
The init function is called in the mapReduce thread before the actual mapping starts,
and must return the initial state object for the reduce function. It gets the QFutureInterface
of the mapReduce operation passed as an argument.
The cleanup function is called in the mapReduce thread after all map and reduce calls have
finished, with the QFutureInterface of the mapReduce operation and the final state object
as arguments, and can be used to clean up any resources, or report a final result of the
mapReduce.
Container<ItemType>
StateType InitFunction(QFutureInterface<ReduceResultType>&)
void MapFunction(QFutureInterface<MapResultType>&, const ItemType&)
or
MapResultType MapFunction(const ItempType&)
void ReduceFunction(QFutureInterface<ReduceResultType>&, StateType&, const ItemType&)
or
ReduceResultType ReduceFunction(StateType&, const ItemType&)
void CleanUpFunction(QFutureInterface<ReduceResultType>&, StateType&)
*/
template <typename Container, typename InitFunction, typename MapFunction,
typename ReduceFunction, typename CleanUpFunction,
typename ReduceResult = typename Internal::resultType<ReduceFunction>::type>
QFuture<ReduceResult>
mapReduce(const Container &container, const InitFunction &init, const MapFunction &map,
const ReduceFunction &reduce, const CleanUpFunction &cleanup)
{
auto fi = QFutureInterface<ReduceResult>();
QFuture<ReduceResult> future = fi.future();
std::thread(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
fi, container, init, map, reduce, cleanup).detach();
return future;
return runAsync(Internal::blockingMapReduce<Container, InitFunction, MapFunction, ReduceResult, ReduceFunction, CleanUpFunction>,
container, init, map, reduce, cleanup);
}
} // Utils
......@@ -222,12 +222,9 @@ private:
// void function that does not take QFutureInterface
template <typename ResultType, typename Function, typename... Args>
void runAsyncReturnVoidDispatch(std::true_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args)
void runAsyncReturnVoidDispatch(std::true_type, QFutureInterface<ResultType>, Function &&function, Args&&... args)
{
function(std::forward<Args>(args)...);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
}
// non-void function that does not take QFutureInterface
......@@ -235,9 +232,6 @@ template <typename ResultType, typename Function, typename... Args>
void runAsyncReturnVoidDispatch(std::false_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args)
{
futureInterface.reportResult(function(std::forward<Args>(args)...));
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
}
// function that takes QFutureInterface
......@@ -245,9 +239,6 @@ template <typename ResultType, typename Function, typename... Args>
void runAsyncQFutureInterfaceDispatch(std::true_type, QFutureInterface<ResultType> futureInterface, Function &&function, Args&&... args)
{
function(futureInterface, std::forward<Args>(args)...);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
}
// function that does not take QFutureInterface
......@@ -374,6 +365,9 @@ private:
{
// invalidates data, which is moved into the call
runAsyncImpl(futureInterface, std::move(std::get<index>(data))...);
if (futureInterface.isPaused())
futureInterface.waitForResume();
futureInterface.reportFinished();
}
Data data;
......
......@@ -17,6 +17,7 @@ SUBDIRS += \
json \
utils \
filesearch \
mapreduce \
runextensions \
sdktool \
valgrind
......
QTC_LIB_DEPENDS += utils
include(../qttest.pri)
# Input
SOURCES += tst_mapreduce.cpp
HEADERS += $$IDE_SOURCE_TREE/src/libs/utils/mapreduce.h
import qbs
QtcAutotest {
name: "Map reduce autotest"
Depends { name: "Utils" }
files: [
"tst_mapreduce.cpp",
]
}
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3 as published by the Free Software
** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-3.0.html.
**
****************************************************************************/
#include <utils/algorithm.h>
#include <utils/mapreduce.h>
#include <QtTest>
class tst_MapReduce : public QObject
{
Q_OBJECT
private slots:
void mapReduce();
};
static int returnxx(int x)
{
return x*x;
}
static void returnxxThroughFutureInterface(QFutureInterface<int> &fi, int x)
{
fi.reportResult(x*x);
}
void tst_MapReduce::mapReduce()
{
const auto dummyInit = [](QFutureInterface<double>& fi) -> double {
fi.reportResult(0.);
return 0.;
};
const auto reduceWithFutureInterface = [](QFutureInterface<double>& fi, double &state, int value) {
state += value;
fi.reportResult(value);
};
const auto reduceWithReturn = [](double &state, int value) -> double {
state += value;
return value;
};
const auto cleanupHalfState = [](QFutureInterface<double> &fi, double &state) {
state /= 2.;
fi.reportResult(state);
};
// TODO: cannot use function returnxx without pointer here because of decayCopy of arguments in runAsync
{
QList<double> results = Utils::mapReduce(QList<int>({1, 2, 3, 4, 5}),
dummyInit, &returnxx,
reduceWithFutureInterface, cleanupHalfState)
.results();
Utils::sort(results); // mapping order is undefined
QCOMPARE(results, QList<double>({0., 1., 4., 9., 16., 25., 27.5}));
}
{
QList<double> results = Utils::mapReduce(QList<int>({1, 2, 3, 4, 5}),
dummyInit, &returnxxThroughFutureInterface,
reduceWithFutureInterface, cleanupHalfState)
.results();
Utils::sort(results); // mapping order is undefined
QCOMPARE(results, QList<double>({0., 1., 4., 9., 16., 25., 27.5}));
}
{
QList<double> results = Utils::mapReduce(QList<int>({1, 2, 3, 4, 5}),
dummyInit, &returnxx,
reduceWithReturn, cleanupHalfState)
.results();
Utils::sort(results); // mapping order is undefined
QCOMPARE(results, QList<double>({0., 1., 4., 9., 16., 25., 27.5}));
}
}
QTEST_MAIN(tst_MapReduce)
#include "tst_mapreduce.moc"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment