From 1dadf27df6c5dd73cdf61505273e7598b13818fa Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Tue, 8 Apr 2025 17:58:20 +0300 Subject: [PATCH 01/14] Changes from origin + text2image support + fix for paths --- README.md | 3 + Screen01.ui.qml | 122 ++++++++++++------ aimodel/diffuser_server/diffuser_server.py | 82 ++++++++++++ aimodel/plugins/CMakeLists.txt | 1 + aimodel/plugins/asr/CMakeLists.txt | 2 +- aimodel/plugins/diffuser/CMakeLists.txt | 16 +++ aimodel/plugins/diffuser/plugin.json | 3 + .../plugins/diffuser/qdiffuseraimodel_p.cpp | 43 ++++++ aimodel/plugins/diffuser/qdiffuseraimodel_p.h | 33 +++++ aimodel/plugins/ollama/CMakeLists.txt | 2 +- aimodel/plugins/piper-tts/CMakeLists.txt | 2 +- aimodel/plugins/triton/CMakeLists.txt | 2 +- aimodel/plugins/tts/CMakeLists.txt | 2 +- aimodel/plugins/yolo/CMakeLists.txt | 2 +- aimodel/qaimodel.cpp | 5 +- 15 files changed, 271 insertions(+), 49 deletions(-) create mode 100755 aimodel/diffuser_server/diffuser_server.py create mode 100644 aimodel/plugins/diffuser/CMakeLists.txt create mode 100644 aimodel/plugins/diffuser/plugin.json create mode 100644 aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp create mode 100644 aimodel/plugins/diffuser/qdiffuseraimodel_p.h diff --git a/README.md b/README.md index 5b8e2e9..8820be2 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ This projects contains the proof-of-concept for a new Qt AI Inference API. The purpose of the API is to let you easily use different types of AI models for inference from your Qt code, either from C++ or directly from QML! The API abstracts the details of the underlying model and framework implementations, allowing you to just tell what type of input and output you would like to use, and Qt will set things up for you! You can also chain different models together for pipelines. +**Disclaimer** +This API is in proof-of-concept stage and under active development, and not yet a part of the Qt framework. Hence, Qt's compatibility promise does not apply; the API can still change in breaking ways. But, it is also a great time to impact the direction it will take! For suggestions feel free to create a ticket in the Qt project's [JIRA](https://bugreports.qt.io), please use the label "QtAiApi" so we can easily find them and collect them together. + ## How it works When you declare a model in your code, Qt will infer from the given input and output type what backend it will set up for the model. The backends are implemented as QPlugins. Currently, the backends are: diff --git a/Screen01.ui.qml b/Screen01.ui.qml index ebd7d30..b014ba0 100644 --- a/Screen01.ui.qml +++ b/Screen01.ui.qml @@ -20,58 +20,87 @@ Rectangle { ColumnLayout { RowLayout { - id: buttonRow - Button { - text: "Record audio" - onClicked: { - recorder.record() + ColumnLayout { + RowLayout { + id: buttonRow + Button { + text: "Record audio" + onClicked: { + recorder.record() + } + } + Button { + text: "Stop audio recording" + onClicked: { + recorder.stop() + if (recorder.actualLocation != "") { + speechToText.pushData(recorder.actualLocation) + } + if (imageFile != "") { + imageToText.pushData(imageFile) + } + } + } } - } - Button { - text: "Stop audio recording" - onClicked: { - recorder.stop() - if (recorder.actualLocation != "") { - speechToText.pushData(recorder.actualLocation) + + RowLayout { + Button { + text: qsTr("Open image") + onClicked: fileDialog.open() } - if (imageFile != "") { - imageToText.pushData(imageFile) + Text { + id: result + text: rectangle.imageFile } } - } - } - RowLayout { - Button { - text: qsTr("Open image") - onClicked: fileDialog.open() + TextField { + placeholderText: "Text2Image" + implicitWidth: 300 + onEditingFinished: diffuser.pushData(text) + } + + TextField { + text: llamaPrompt + placeholderText: "Llama prompt" + implicitWidth: 300 + onEditingFinished: llamaModel.prompt = text + } + TextField { + placeholderText: "Text2Text" + implicitWidth: 300 + onEditingFinished: llamaModel.pushData(text) + } + + TextArea { + placeholderText: "Enter context" + background: Rectangle { + color: "lightgreen" + } + + implicitWidth: 300 + implicitHeight: 200 + onEditingFinished: llamaModel.rag = [text] + } } - Text { - id: result - text: rectangle.imageFile + + Rectangle { + width: 300 + height: 300 + color: "red" + Image { + anchors.fill: parent + anchors.margins: 2 + id: imageLocation + source: imageFile + fillMode: Image.PreserveAspectFit + } } } - TextField { - text: llamaPrompt - implicitWidth: 300 - onEditingFinished: llamaModel.prompt = text - } - TextArea { - placeholderText: "Enter context" - background: Rectangle { - color: "lightgreen" - } - implicitWidth: 300 - implicitHeight: 200 - onEditingFinished: llamaModel.rag = [text] - } - Image { - source: imageFile - } } FileDialog { @@ -122,4 +151,17 @@ Rectangle { type: (MultiModal.InputText | MultiModal.OutputAudio) inputs: [ llamaModel ] } + + MultiModal { + id: diffuser + type: (MultiModal.InputText | MultiModal.OutputImage) + model: 'IDKiro/sdxs-512-dreamshaper' + } + + Connections { + target: diffuser + function onGotResult(result) { + imageLocation.source = result + } + } } diff --git a/aimodel/diffuser_server/diffuser_server.py b/aimodel/diffuser_server/diffuser_server.py new file mode 100755 index 0000000..e5ab83f --- /dev/null +++ b/aimodel/diffuser_server/diffuser_server.py @@ -0,0 +1,82 @@ +#!/usr/bin/python3 + +# Copyright (C) 2025 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only + +from http.server import BaseHTTPRequestHandler,HTTPServer +from os import curdir, sep +import torch +from diffusers import AutoPipelineForText2Image, LCMScheduler +import simplejson +import base64 +from io import BytesIO +from PIL import Image + + + +PORT_NUMBER = 8005 + +#This class will handles any incoming request from +#the browser +class myHandler(BaseHTTPRequestHandler): + model = '' + + + #Handler for the POST requests + def do_POST(self): + print("do_POST"); + if self.path=="/send": + self.data_string = self.rfile.read(int(self.headers['Content-Length'])) + print("data_string: " + self.data_string.decode()) + json_data = simplejson.loads(self.data_string) + #print("json_data: " + simplejson.dumps(json_data)) + print("MODEL: " + json_data["model"]) + if self.model != json_data["model"]: + self.pipeline = AutoPipelineForText2Image.from_pretrained( + json_data["model"], + #'black-forest-labs/FLUX.1-dev', + #'IDKiro/sdxs-512-dreamshaper', + #"stabilityai/stable-diffusion-2-1-base", + #'black-forest-labs/FLUX.1-schnell', + torch_dtype=torch.float32, + #variant="fp16", + use_safetensor=True).to('cpu') + self.model = json_data["model"] + + image = self.pipeline(json_data["prompt"], + height=512, + width=512, + guidance_scale=0.0, + target_size=(1024, 1024), + original_size=(4096, 4096), + num_inference_steps=1 + #max_sequence_length=256 + ).images[0] + + buffered = BytesIO() + image.save(buffered, format="PNG") + b64image = base64.b64encode(buffered.getvalue()) + print("Sending response") + self.send_response(200) + self.end_headers() + json_response = {} + json_response["image"] = b64image + self.wfile.write(simplejson.dumps(json_response).encode("utf-8")) + return + + +try: + #Create a web server and define the handler to manage the + #incoming request + server = HTTPServer(('', PORT_NUMBER), myHandler) + print('Started httpserver on port ' , PORT_NUMBER) + + + #Wait forever for incoming htto requests + server.serve_forever() + +except KeyboardInterrupt: + print('^C received, shutting down the web server') + server.socket.close() + + diff --git a/aimodel/plugins/CMakeLists.txt b/aimodel/plugins/CMakeLists.txt index 0cf7b15..095e554 100644 --- a/aimodel/plugins/CMakeLists.txt +++ b/aimodel/plugins/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(ollama) add_subdirectory(triton) add_subdirectory(yolo) add_subdirectory(piper-tts) +add_subdirectory(diffuser) diff --git a/aimodel/plugins/asr/CMakeLists.txt b/aimodel/plugins/asr/CMakeLists.txt index 9c19a88..4ecc6d9 100644 --- a/aimodel/plugins/asr/CMakeLists.txt +++ b/aimodel/plugins/asr/CMakeLists.txt @@ -5,7 +5,7 @@ qt_add_plugin(QtAsrModel qasraimodel_p.h qasraimodel_p.cpp ) set_target_properties(QtAsrModel PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel" ) target_link_libraries(QtAsrModel PRIVATE diff --git a/aimodel/plugins/diffuser/CMakeLists.txt b/aimodel/plugins/diffuser/CMakeLists.txt new file mode 100644 index 0000000..2ee0f90 --- /dev/null +++ b/aimodel/plugins/diffuser/CMakeLists.txt @@ -0,0 +1,16 @@ +find_package(Qt6 REQUIRED COMPONENTS Core Network Quick) + +qt_add_plugin(QtDiffuserModel + CLASS_NAME QAiModelPluginFactory + qdiffuseraimodel_p.h qdiffuseraimodel_p.cpp + ) +set_target_properties(QtDiffuserModel PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel" +) +target_link_libraries(QtDiffuserModel + PRIVATE + Qt6::Core + Qt6::Network + Qt6::Quick + QtAiModelPluginInterface) +include_directories(../..) diff --git a/aimodel/plugins/diffuser/plugin.json b/aimodel/plugins/diffuser/plugin.json new file mode 100644 index 0000000..e9a40a3 --- /dev/null +++ b/aimodel/plugins/diffuser/plugin.json @@ -0,0 +1,3 @@ +{ "name": "diffuserplugin", + "supportedTypes": ["InputText", "OutputImage"] +} diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp new file mode 100644 index 0000000..550fa67 --- /dev/null +++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2025 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only + +#include "qaimodel.h" +#include "qdiffuseraimodel_p.h" +#include <QJsonDocument> +#include <QJsonObject> +#include <QNetworkReply> +#include <QRestReply> +#include <QImage> + +QDiffuserAiModel::QDiffuserAiModel() + : AiModelPrivateInterface(), + m_manager(this) + , m_restApi(&m_manager) +{ +} + +void QDiffuserAiModel::pushData(QVariantList data) +{ + qDebug() << "QDiffuserAiModel::pushData(): data:" << data; + + if (data.isEmpty() || data.first().toString().isEmpty()) { + emit dataReceived(data.first().toString()); + return; + } + + QNetworkRequest request(QUrl("http://localhost:8005/send")); + request.setRawHeader("Content-Type", "application/json"); + QJsonDocument doc; + QJsonObject obj = doc.object(); + obj["model"] = m_owner->model(); + obj["prompt"] = data.first().toString(); + doc.setObject(obj); + qDebug() << doc.toJson(); + + m_restApi.post(request, doc.toJson(), this, [this](QRestReply &reply) { + if (auto json = reply.readJson()) { + emit dataReceived(QUrl( + QString("data:image/png;base64,") + json->object()["image"].toString().toUtf8())); + } + }); +} diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.h b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h new file mode 100644 index 0000000..d0adfc8 --- /dev/null +++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h @@ -0,0 +1,33 @@ +// Copyright (C) 2025 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only + +#ifndef QDIFFUSERAIMODEL_P_H +#define QDIFFUSERAIMODEL_P_H + +#include <QObject> +#include <QRestAccessManager> +#include "qaimodelinterface_p.h" + +class QDiffuserAiModel : public AiModelPrivateInterface +{ + Q_OBJECT +public: + QDiffuserAiModel(); + void pushData(QVariantList data) override; + +private: + QNetworkAccessManager m_manager; + QRestAccessManager m_restApi; +}; + +class QDiffuserAiModelPlugin : public QAiModelPluginFactory +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID "org.qt-project.Qt.QAiModelPluginFactory/1.0" FILE "plugin.json") + Q_INTERFACES(QAiModelPluginFactory) +public: + QDiffuserAiModelPlugin() {} + AiModelPrivateInterface* createInterface() { return new QDiffuserAiModel(); } +}; + +#endif // QDIFFUSERAIMODEL_P_H diff --git a/aimodel/plugins/ollama/CMakeLists.txt b/aimodel/plugins/ollama/CMakeLists.txt index 28a83e6..35122a7 100644 --- a/aimodel/plugins/ollama/CMakeLists.txt +++ b/aimodel/plugins/ollama/CMakeLists.txt @@ -6,7 +6,7 @@ qt_add_plugin(QtOllamaModel ) set_target_properties(QtOllamaModel PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel" ) target_link_libraries(QtOllamaModel PRIVATE diff --git a/aimodel/plugins/piper-tts/CMakeLists.txt b/aimodel/plugins/piper-tts/CMakeLists.txt index 651ef5b..b00f4f3 100644 --- a/aimodel/plugins/piper-tts/CMakeLists.txt +++ b/aimodel/plugins/piper-tts/CMakeLists.txt @@ -6,7 +6,7 @@ qt_add_plugin(QtPiperModel ) set_target_properties(QtPiperModel PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel" ) target_link_libraries(QtPiperModel PRIVATE diff --git a/aimodel/plugins/triton/CMakeLists.txt b/aimodel/plugins/triton/CMakeLists.txt index fb1ca94..69ecd1b 100644 --- a/aimodel/plugins/triton/CMakeLists.txt +++ b/aimodel/plugins/triton/CMakeLists.txt @@ -7,7 +7,7 @@ qt_add_plugin(QtTritonModel ) set_target_properties(QtTritonModel PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel" ) include_directories(../..) target_link_libraries(QtTritonModel diff --git a/aimodel/plugins/tts/CMakeLists.txt b/aimodel/plugins/tts/CMakeLists.txt index 92dcf3b..208fa72 100644 --- a/aimodel/plugins/tts/CMakeLists.txt +++ b/aimodel/plugins/tts/CMakeLists.txt @@ -6,7 +6,7 @@ qt_add_plugin(QtTtsModel ) set_target_properties(QtTtsModel PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel" ) target_link_libraries(QtTtsModel PRIVATE diff --git a/aimodel/plugins/yolo/CMakeLists.txt b/aimodel/plugins/yolo/CMakeLists.txt index 039fe5f..8720988 100644 --- a/aimodel/plugins/yolo/CMakeLists.txt +++ b/aimodel/plugins/yolo/CMakeLists.txt @@ -6,7 +6,7 @@ qt_add_plugin(QtYoloModel ) set_target_properties(QtYoloModel PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel" ) target_link_libraries(QtYoloModel PRIVATE diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp index 411691d..a47559c 100644 --- a/aimodel/qaimodel.cpp +++ b/aimodel/qaimodel.cpp @@ -44,8 +44,7 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType) m_interface.clear(); - QDir pluginsDir(QDir::currentPath() + "/qt-ai-inference-api/aimodel/plugins"); - //QDir pluginsDir(QDir::currentPath() + "/aimodel/plugins"); + QDir pluginsDir(QDir::currentPath() + "/plugins/aimodel"); qDebug() << "Plugins dir: " << pluginsDir.absolutePath(); const auto entryList = pluginsDir.entryList(QDir::Files); for (const QString &fileName : entryList) { @@ -165,7 +164,7 @@ void QAiModel::processCombinedData(QVariant data) void QAiModel::dataReceived(QVariant data) { - qDebug() << interface() << ":" << __func__ << "(): data:" << data; + qDebug() << interface() << ":" << __func__ << "(): data:" << data.typeName(); m_buffer = data; m_processing = false; -- GitLab From 39f348b6f60cbab753ef319dd3f42890e47438ab Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Wed, 9 Apr 2025 10:12:38 +0300 Subject: [PATCH 02/14] Improve test app --- Screen01.ui.qml | 179 ++++++++++++++++++++++++++++++------------------ 1 file changed, 114 insertions(+), 65 deletions(-) diff --git a/Screen01.ui.qml b/Screen01.ui.qml index b014ba0..a812ab4 100644 --- a/Screen01.ui.qml +++ b/Screen01.ui.qml @@ -22,14 +22,21 @@ Rectangle { RowLayout { ColumnLayout { RowLayout { - id: buttonRow + Rectangle { + width: 20 + height: 20 + color: speechToText.processing ? "red" : "green" + } + Button { + visible: recorder.recorderState !== MediaRecorder.RecordingState text: "Record audio" onClicked: { recorder.record() } } Button { + visible: recorder.recorderState === MediaRecorder.RecordingState text: "Stop audio recording" onClicked: { recorder.stop() @@ -41,35 +48,102 @@ Rectangle { } } } + CaptureSession { + audioInput: AudioInput {} + recorder: MediaRecorder { + id: recorder + mediaFormat { + fileFormat: MediaFormat.Wave + } + } + } + + MultiModal { + id: speechToText + type: (MultiModal.InputAudio | MultiModal.OutputText) + model: "turbo" + } } RowLayout { + Rectangle { + width: 20 + height: 20 + color: imageToText.processing ? "red" : "green" + } + Button { text: qsTr("Open image") onClicked: fileDialog.open() + + + FileDialog { + id: fileDialog + folder: StandardPaths.standardLocations(StandardPaths.PicturesLocation)[0] + nameFilters: ["*.*"] + onAccepted: { + imageFile = fileDialog.file + } + onRejected: {} + } } Text { id: result text: rectangle.imageFile } + MultiModal { + id: imageToText + type: (MultiModal.InputImage | MultiModal.OutputText) + model: "llava-phi3" // TODO: replace with Janus model from DeepSeek + prompt: "What is in the picture?" + optional: true + buffered: true + } } - TextField { - placeholderText: "Text2Image" - implicitWidth: 300 - onEditingFinished: diffuser.pushData(text) + RowLayout { + Rectangle { + width: 20 + height: 20 + color: diffuser.processing ? "red" : "green" + } + TextField { + placeholderText: "Text2Image" + implicitWidth: 300 + onEditingFinished: diffuser.pushData(text) + } + MultiModal { + id: diffuser + type: (MultiModal.InputText | MultiModal.OutputImage) + model: 'IDKiro/sdxs-512-dreamshaper' + } + + Connections { + target: diffuser + function onGotResult(result) { + imageLocation.source = result + } + } } + TextField { text: llamaPrompt placeholderText: "Llama prompt" implicitWidth: 300 onEditingFinished: llamaModel.prompt = text } - TextField { - placeholderText: "Text2Text" - implicitWidth: 300 - onEditingFinished: llamaModel.pushData(text) + RowLayout { + Rectangle { + width: 20 + height: 20 + color: llamaModel.processing ? "red" : "green" + } + TextField { + placeholderText: "Text2Text" + implicitWidth: 300 + onEditingFinished: llamaModel.pushData(text) + } } TextArea { @@ -79,9 +153,39 @@ Rectangle { } implicitWidth: 300 - implicitHeight: 200 + implicitHeight: 100 onEditingFinished: llamaModel.rag = [text] } + MultiModal { + id: llamaModel + type: (MultiModal.InputText | MultiModal.OutputText) + model: "gemma3:4b" + prompt: llamaPrompt + inputs: [ imageToText, speechToText ] + } + + RowLayout { + Rectangle { + width: 20 + height: 20 + color: text2speech.processing ? "red" : "green" + } + TextArea { + implicitWidth: 300 + implicitHeight: 100 + background: Rectangle { + color: "lightblue" + } + + } + + MultiModal { + id: text2speech + type: (MultiModal.InputText | MultiModal.OutputAudio) + inputs: [ llamaModel ] + } + } + } Rectangle { @@ -103,65 +207,10 @@ Rectangle { } - FileDialog { - id: fileDialog - folder: StandardPaths.standardLocations(StandardPaths.PicturesLocation)[0] - nameFilters: ["*.*"] - onAccepted: { - imageFile = fileDialog.file - } - onRejected: {} - } - CaptureSession { - audioInput: AudioInput {} - recorder: MediaRecorder { - id: recorder - mediaFormat { - fileFormat: MediaFormat.Wave - } - } - } - MultiModal { - id: imageToText - type: (MultiModal.InputImage | MultiModal.OutputText) - model: "llava-phi3" // TODO: replace with Janus model from DeepSeek - prompt: "What is in the picture?" - optional: true - buffered: true - } - MultiModal { - id: speechToText - type: (MultiModal.InputAudio | MultiModal.OutputText) - model: "turbo" - } - MultiModal { - id: llamaModel - type: (MultiModal.InputText | MultiModal.OutputText) - model: "gemma3:4b" - prompt: llamaPrompt - inputs: [ imageToText, speechToText ] - } - - MultiModal { - id: text2speech - type: (MultiModal.InputText | MultiModal.OutputAudio) - inputs: [ llamaModel ] - } - MultiModal { - id: diffuser - type: (MultiModal.InputText | MultiModal.OutputImage) - model: 'IDKiro/sdxs-512-dreamshaper' - } - Connections { - target: diffuser - function onGotResult(result) { - imageLocation.source = result - } - } } -- GitLab From 57b4c302b86fc5093544edc066b00ffb71022477 Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Thu, 10 Apr 2025 15:21:46 +0300 Subject: [PATCH 03/14] Move test app into tests directory --- CMakeLists.txt | 46 +---------------- tests/CMakeLists.txt | 2 + tests/app/App.qml | 25 ++++++++++ tests/app/CMakeLists copy.txt | 48 ++++++++++++++++++ tests/app/CMakeLists.txt | 49 +++++++++++++++++++ .../app/QtAiInferenceApi.qmlproject | 0 Screen01.ui.qml => tests/app/Screen01.ui.qml | 0 main.cpp => tests/app/main.cpp | 5 +- .../app/qtquickcontrols2.conf | 0 9 files changed, 130 insertions(+), 45 deletions(-) create mode 100644 tests/CMakeLists.txt create mode 100644 tests/app/App.qml create mode 100644 tests/app/CMakeLists copy.txt create mode 100644 tests/app/CMakeLists.txt rename QtAiInferenceApi.qmlproject => tests/app/QtAiInferenceApi.qmlproject (100%) rename Screen01.ui.qml => tests/app/Screen01.ui.qml (100%) rename main.cpp => tests/app/main.cpp (84%) rename qtquickcontrols2.conf => tests/app/qtquickcontrols2.conf (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6ba292..4a08cf1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,48 +1,6 @@ - cmake_minimum_required(VERSION 3.21.1) - -add_subdirectory(aimodel) - project(QtAiInferenceApi LANGUAGES CXX) -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake") - -set(CMAKE_AUTOMOC ON) -set(CMAKE_INCLUDE_CURRENT_DIR ON) -set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml) -set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY} - CACHE STRING "Import paths for Qt Creator's code model" - FORCE -) - -find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia) -qt_standard_project_setup(REQUIRES 6.8) -qt_add_executable(${CMAKE_PROJECT_NAME} - main.cpp -) - -qt_add_qml_module(${CMAKE_PROJECT_NAME} - URI qtaiinferenceapi - VERSION 1.0 - RESOURCES - qtquickcontrols2.conf - QML_FILES - App.qml - Screen01.ui.qml - ) - -target_link_libraries(${CMAKE_PROJECT_NAME} - PRIVATE - Qt6::Quick - Qt6::Multimedia - QtAiModelApi -) - - -include(GNUInstallDirs) -install(TARGETS ${CMAKE_PROJECT_NAME} - BUNDLE DESTINATION . - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} -) +add_subdirectory(aimodel) +add_subdirectory(tests) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..3f46259 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,2 @@ + +add_subdirectory(app) diff --git a/tests/app/App.qml b/tests/app/App.qml new file mode 100644 index 0000000..ab4337c --- /dev/null +++ b/tests/app/App.qml @@ -0,0 +1,25 @@ +// Copyright (C) 2025 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only + +import QtQuick 2.15 +import QtQuick.Controls 2.15 +import QtQuick.Window 2.15 + +ApplicationWindow { + id: mainWindow + visible: true + width: Screen.width / 3 + height: Screen.height / 3 + //width: Screen.width / 2 + //height: Screen.height / 2 + //flags: Qt.FramelessWindowHint | Qt.Window + //visibility: Window.FullScreen + color: "black" // Optional background color for the main window + + // @disable-check M300 + Screen01 { + anchors.fill: parent + } + +} + diff --git a/tests/app/CMakeLists copy.txt b/tests/app/CMakeLists copy.txt new file mode 100644 index 0000000..a6ba292 --- /dev/null +++ b/tests/app/CMakeLists copy.txt @@ -0,0 +1,48 @@ + +cmake_minimum_required(VERSION 3.21.1) + +add_subdirectory(aimodel) + +project(QtAiInferenceApi LANGUAGES CXX) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +set(CMAKE_AUTOMOC ON) +set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml) +set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY} + CACHE STRING "Import paths for Qt Creator's code model" + FORCE +) + +find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia) +qt_standard_project_setup(REQUIRES 6.8) + +qt_add_executable(${CMAKE_PROJECT_NAME} + main.cpp +) + +qt_add_qml_module(${CMAKE_PROJECT_NAME} + URI qtaiinferenceapi + VERSION 1.0 + RESOURCES + qtquickcontrols2.conf + QML_FILES + App.qml + Screen01.ui.qml + ) + +target_link_libraries(${CMAKE_PROJECT_NAME} + PRIVATE + Qt6::Quick + Qt6::Multimedia + QtAiModelApi +) + + +include(GNUInstallDirs) +install(TARGETS ${CMAKE_PROJECT_NAME} + BUNDLE DESTINATION . + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) diff --git a/tests/app/CMakeLists.txt b/tests/app/CMakeLists.txt new file mode 100644 index 0000000..0f51923 --- /dev/null +++ b/tests/app/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.21.1) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +set(CMAKE_AUTOMOC ON) +set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml) +set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY} + CACHE STRING "Import paths for Qt Creator's code model" + FORCE +) + + +find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia) +qt_standard_project_setup(REQUIRES 6.8) + +qt_add_executable(QtAiTestApp + main.cpp +) + +qt_add_qml_module(QtAiTestApp + URI qtaiinferenceapi + VERSION 1.0 + RESOURCES + qtquickcontrols2.conf + QML_FILES + App.qml + Screen01.ui.qml + ) + +target_link_libraries(QtAiTestApp + PRIVATE + Qt6::Quick + Qt6::Multimedia + QtAiModelApi +) + +set_target_properties(QtAiTestApp + PROPERTIES + QT_QML_ROOT_PATH ${QT_QML_OUTPUT_DIRECTORY} + QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY} + QT_RESOURCE_PREFIX ${QT_QML_OUTPUT_DIRECTORY}) + +include(GNUInstallDirs) +install(TARGETS QtAiTestApp + BUNDLE DESTINATION . + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) diff --git a/QtAiInferenceApi.qmlproject b/tests/app/QtAiInferenceApi.qmlproject similarity index 100% rename from QtAiInferenceApi.qmlproject rename to tests/app/QtAiInferenceApi.qmlproject diff --git a/Screen01.ui.qml b/tests/app/Screen01.ui.qml similarity index 100% rename from Screen01.ui.qml rename to tests/app/Screen01.ui.qml diff --git a/main.cpp b/tests/app/main.cpp similarity index 84% rename from main.cpp rename to tests/app/main.cpp index 52e40fa..14d5c21 100644 --- a/main.cpp +++ b/tests/app/main.cpp @@ -20,9 +20,12 @@ int main(int argc, char *argv[]) }, Qt::QueuedConnection); engine.addImportPath("qml"); + engine.addImportPath("../../qml"); + engine.loadFromModule("qtaiinferenceapi", "App"); - qDebug() << "Standard path for pictures: " << QStandardPaths::standardLocations(QStandardPaths::PicturesLocation); + qDebug() << "Standard path for pictures: " << QStandardPaths::standardLocations(QStandardPaths::PicturesLocation) + << "QML import path" << engine.importPathList(); if (engine.rootObjects().isEmpty()) return -1; diff --git a/qtquickcontrols2.conf b/tests/app/qtquickcontrols2.conf similarity index 100% rename from qtquickcontrols2.conf rename to tests/app/qtquickcontrols2.conf -- GitLab From 1863cb7fe100f5a597a6376ab44d836386783978 Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Thu, 10 Apr 2025 15:41:00 +0300 Subject: [PATCH 04/14] Fix backend plugin loading --- aimodel/qaimodel.cpp | 59 +++++++++++++++++++++++--------------------- tests/app/main.cpp | 3 ++- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp index a47559c..1a57681 100644 --- a/aimodel/qaimodel.cpp +++ b/aimodel/qaimodel.cpp @@ -8,6 +8,7 @@ #include <QDir> #include <QJsonArray> #include <QList> +#include <QCoreApplication> QAiModel::QAiModel() { @@ -44,35 +45,37 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType) m_interface.clear(); - QDir pluginsDir(QDir::currentPath() + "/plugins/aimodel"); - qDebug() << "Plugins dir: " << pluginsDir.absolutePath(); - const auto entryList = pluginsDir.entryList(QDir::Files); - for (const QString &fileName : entryList) { - qDebug() << "Loading " << fileName << "..."; - QPluginLoader loader(pluginsDir.absoluteFilePath(fileName)); - QJsonObject object{ loader.metaData().value("MetaData").toObject() }; - qDebug() << "Metadata for " << fileName << ": " << object; - if (!object.value("supportedTypes").isArray()) { - qDebug() << "Incorrect json format in" << loader.metaData() - << "for plugin:" << fileName; - continue; - } - auto flagArray = object.value("supportedTypes").toArray().toVariantList(); - auto pluginFlags = constructAiModelTypeFlags(flagArray); - - qDebug() << pluginFlags; - if (pluginFlags.testFlags(newType)) { - auto *instance = loader.instance(); - QAiModelPluginFactory *plugin = qobject_cast<QAiModelPluginFactory*>(instance); - if (plugin) { - qDebug() << plugin << "created"; - m_interface.reset(plugin->createInterface()); - m_interface->init(this); - break; - } else { - qDebug() << "Could not convert" << instance << "to AiModelPrivateInterface*"; + for (auto &&pluginDir : QCoreApplication::libraryPaths()) { + QDir aiModelPluginsDir(pluginDir + "/aimodel"); + qDebug() << "Plugins dir: " << aiModelPluginsDir.absolutePath(); + const auto entryList = aiModelPluginsDir.entryList(QDir::Files); + for (const QString &fileName : entryList) { + qDebug() << "Loading " << fileName << "..."; + QPluginLoader loader(aiModelPluginsDir.absoluteFilePath(fileName)); + QJsonObject object{ loader.metaData().value("MetaData").toObject() }; + qDebug() << "Metadata for " << fileName << ": " << object; + if (!object.value("supportedTypes").isArray()) { + qDebug() << "Incorrect json format in" << loader.metaData() + << "for plugin:" << fileName; + continue; } - } + auto flagArray = object.value("supportedTypes").toArray().toVariantList(); + auto pluginFlags = constructAiModelTypeFlags(flagArray); + + qDebug() << pluginFlags; + if (pluginFlags.testFlags(newType)) { + auto *instance = loader.instance(); + QAiModelPluginFactory *plugin = qobject_cast<QAiModelPluginFactory*>(instance); + if (plugin) { + qDebug() << plugin << "created"; + m_interface.reset(plugin->createInterface()); + m_interface->init(this); + break; + } else { + qDebug() << "Could not convert" << instance << "to AiModelPrivateInterface*"; + } + } + } } diff --git a/tests/app/main.cpp b/tests/app/main.cpp index 14d5c21..6edfc01 100644 --- a/tests/app/main.cpp +++ b/tests/app/main.cpp @@ -20,7 +20,8 @@ int main(int argc, char *argv[]) }, Qt::QueuedConnection); engine.addImportPath("qml"); - engine.addImportPath("../../qml"); + engine.addImportPath("../../qml"); // For QML plugins + QCoreApplication::addLibraryPath("../../plugins"); // For backend plugins engine.loadFromModule("qtaiinferenceapi", "App"); -- GitLab From c25e4b47bf0c2789ed8b863cfe44e6ca79a44d2b Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Fri, 11 Apr 2025 16:37:09 +0300 Subject: [PATCH 05/14] Add seed support and change rag to documents --- aimodel/chromadb.cpp | 4 +- aimodel/plugins/asr/qasraimodel_p.cpp | 2 +- aimodel/plugins/asr/qasraimodel_p.h | 2 +- .../plugins/diffuser/qdiffuseraimodel_p.cpp | 2 +- aimodel/plugins/diffuser/qdiffuseraimodel_p.h | 2 +- aimodel/plugins/ollama/qllmaimodel_p.cpp | 28 ++++++++++---- aimodel/plugins/ollama/qllmaimodel_p.h | 2 +- aimodel/plugins/piper-tts/qpiperaimodel_p.cpp | 2 +- aimodel/plugins/piper-tts/qpiperaimodel_p.h | 2 +- aimodel/plugins/triton/qtritonmodel_p.cpp | 2 +- aimodel/plugins/triton/qtritonmodel_p.h | 2 +- aimodel/plugins/yolo/qyoloaimodel.cpp | 2 +- aimodel/plugins/yolo/qyoloaimodel.h | 2 +- aimodel/qaimodel.cpp | 37 ++++++++++++------- aimodel/qaimodel.h | 18 ++++++--- aimodel/qaimodelinterface_p.h | 2 +- tests/app/Screen01.ui.qml | 2 +- 17 files changed, 72 insertions(+), 41 deletions(-) diff --git a/aimodel/chromadb.cpp b/aimodel/chromadb.cpp index 22ee94f..294e383 100644 --- a/aimodel/chromadb.cpp +++ b/aimodel/chromadb.cpp @@ -25,7 +25,7 @@ void ChromaDb::sendRequest( if (reply.isHttpStatusSuccess()) { lambda(json ? json.value() : QJsonDocument(), reply.httpStatus()); } else { - qDebug() << "JSON decode error:" << request.url() << "HTTP status:" << reply.httpStatus(); + qDebug() << request.url() << "responded with error:" << reply.errorString(); setError(true); } }); @@ -74,7 +74,7 @@ void ChromaDb::reset() connect(true); } } else { - qDebug() << url << "deleted"; + qDebug() << url << "responded with error:" << reply.errorString(); } }); diff --git a/aimodel/plugins/asr/qasraimodel_p.cpp b/aimodel/plugins/asr/qasraimodel_p.cpp index a5f835c..d2d5a24 100644 --- a/aimodel/plugins/asr/qasraimodel_p.cpp +++ b/aimodel/plugins/asr/qasraimodel_p.cpp @@ -15,7 +15,7 @@ QAsrAiModel::QAsrAiModel() { } -void QAsrAiModel::pushData(QVariantList data) +void QAsrAiModel::pushData(QVariantList data, int seed) { qDebug() << "QAsrAiModel::pushData(): data:" << data; diff --git a/aimodel/plugins/asr/qasraimodel_p.h b/aimodel/plugins/asr/qasraimodel_p.h index 44a5708..dd53072 100644 --- a/aimodel/plugins/asr/qasraimodel_p.h +++ b/aimodel/plugins/asr/qasraimodel_p.h @@ -13,7 +13,7 @@ class QAsrAiModel : public AiModelPrivateInterface Q_OBJECT public: QAsrAiModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; private: QNetworkAccessManager m_manager; diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp index 550fa67..49120f0 100644 --- a/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp +++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp @@ -16,7 +16,7 @@ QDiffuserAiModel::QDiffuserAiModel() { } -void QDiffuserAiModel::pushData(QVariantList data) +void QDiffuserAiModel::pushData(QVariantList data, int seed) { qDebug() << "QDiffuserAiModel::pushData(): data:" << data; diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.h b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h index d0adfc8..4187de4 100644 --- a/aimodel/plugins/diffuser/qdiffuseraimodel_p.h +++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h @@ -13,7 +13,7 @@ class QDiffuserAiModel : public AiModelPrivateInterface Q_OBJECT public: QDiffuserAiModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; private: QNetworkAccessManager m_manager; diff --git a/aimodel/plugins/ollama/qllmaimodel_p.cpp b/aimodel/plugins/ollama/qllmaimodel_p.cpp index f1a5726..b6caad4 100644 --- a/aimodel/plugins/ollama/qllmaimodel_p.cpp +++ b/aimodel/plugins/ollama/qllmaimodel_p.cpp @@ -6,6 +6,7 @@ #include <QJsonArray> #include <QJsonDocument> #include <QJsonObject> +#include <QJsonValue> #include <QNetworkReply> #include <QNetworkRequest> #include <QRestReply> @@ -35,7 +36,9 @@ static inline void sendRequest( QNetworkRequest request(url); request.setRawHeader("Content-Type", "application/json"); restApi->post(request, QJsonDocument(object).toJson(), owner, [=](QRestReply &reply) { - if (std::optional<QJsonDocument> json = reply.readJson()) { + if (!reply.isHttpStatusSuccess()) { + qDebug() << request.url() << "responded with error" << reply.errorString() << " and status:" << reply.httpStatus(); + } else if (std::optional<QJsonDocument> json = reply.readJson()) { lambda(json.value()); } else { qDebug() << "Error. No data received from" << request.url() << reply; @@ -43,7 +46,7 @@ static inline void sendRequest( }); } -void QLlmAiModel::pushData(QVariantList data) +void QLlmAiModel::pushData(QVariantList data, int seed) { QString query = m_owner->prompt(); QJsonArray images; @@ -63,13 +66,16 @@ void QLlmAiModel::pushData(QVariantList data) } qDebug() << this << "[\"prompt\"]: " << query << "[images]" << images.count(); - - - auto promptResponseReceived = [=](auto json) { emit dataReceived(json.object()["response"].toString().toUtf8()); }; + QPair<QString, QJsonValue> options; + if (seed != 0) { + options = {"options", QJsonObject({{"seed", {seed}}})}; + } + + if (m_chromadb.connected()) { connect(&m_chromadb, &ChromaDb::embeddingsFound, this, [=](auto embeddings) { QString documents; @@ -83,14 +89,19 @@ void QLlmAiModel::pushData(QVariantList data) qDebug() << q; sendRequest(&m_restApi, m_ollama_url_base + "generate", - QJsonObject({{"model", m_owner->model()}, {"prompt", q}, {"stream", false}}), + QJsonObject({{"model", m_owner->model()}, + {"prompt", q}, + {"stream", false}, + options}), this, promptResponseReceived); }, Qt::SingleShotConnection); sendRequest(&m_restApi, m_ollama_url_base + "embed", - QJsonObject({{"model", m_owner->model()}, {"input", query}}), + QJsonObject({{"model", m_owner->model()}, + {"input", query}, + options}), this, [this](auto json) { m_chromadb.fetchEmbeddings(json.object()["embeddings"].toArray().toVariantList()); @@ -101,7 +112,8 @@ void QLlmAiModel::pushData(QVariantList data) QJsonObject({{"model", m_owner->model()}, {"prompt", query}, {"stream", false}, - {"images", images}}), + {"images", images}, + options}), this, promptResponseReceived); } diff --git a/aimodel/plugins/ollama/qllmaimodel_p.h b/aimodel/plugins/ollama/qllmaimodel_p.h index e70180f..fc62d74 100644 --- a/aimodel/plugins/ollama/qllmaimodel_p.h +++ b/aimodel/plugins/ollama/qllmaimodel_p.h @@ -14,7 +14,7 @@ class QLlmAiModel : public AiModelPrivateInterface Q_OBJECT public: QLlmAiModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; void setRag(QVariantList data) override; private: diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp index 107f800..63d43f3 100644 --- a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp +++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp @@ -17,7 +17,7 @@ QPiperAiModel::QPiperAiModel() m_player.setAudioOutput(&m_audioOutput); } -void QPiperAiModel::pushData(QVariantList data) +void QPiperAiModel::pushData(QVariantList data, int seed) { qDebug() << "QPiperAiModel::pushData(): data:" << data; diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h index 5515abb..a220ad2 100644 --- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h +++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h @@ -15,7 +15,7 @@ class QPiperAiModel : public AiModelPrivateInterface Q_OBJECT public: QPiperAiModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; private: QNetworkAccessManager m_manager; diff --git a/aimodel/plugins/triton/qtritonmodel_p.cpp b/aimodel/plugins/triton/qtritonmodel_p.cpp index bd620bb..741e58c 100644 --- a/aimodel/plugins/triton/qtritonmodel_p.cpp +++ b/aimodel/plugins/triton/qtritonmodel_p.cpp @@ -151,7 +151,7 @@ Preprocess( // KServe (Open Inference Protocol API) -void QTritonModel::pushData(QVariantList data) +void QTritonModel::pushData(QVariantList data, int seed) { // Load the specified image. std::ifstream file(data.first().toByteArray().toStdString()); diff --git a/aimodel/plugins/triton/qtritonmodel_p.h b/aimodel/plugins/triton/qtritonmodel_p.h index 04f0127..ae5c16c 100644 --- a/aimodel/plugins/triton/qtritonmodel_p.h +++ b/aimodel/plugins/triton/qtritonmodel_p.h @@ -13,7 +13,7 @@ class QTritonModel : public AiModelPrivateInterface Q_OBJECT public: QTritonModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; private: diff --git a/aimodel/plugins/yolo/qyoloaimodel.cpp b/aimodel/plugins/yolo/qyoloaimodel.cpp index 59b7224..b367b98 100644 --- a/aimodel/plugins/yolo/qyoloaimodel.cpp +++ b/aimodel/plugins/yolo/qyoloaimodel.cpp @@ -19,7 +19,7 @@ QYoloAiModel::QYoloAiModel() } -void QYoloAiModel::pushData(QVariantList data) +void QYoloAiModel::pushData(QVariantList data, int seed) { QImage image; if (data.first().canConvert<QImage>()) { diff --git a/aimodel/plugins/yolo/qyoloaimodel.h b/aimodel/plugins/yolo/qyoloaimodel.h index e32c3f8..92f63fe 100644 --- a/aimodel/plugins/yolo/qyoloaimodel.h +++ b/aimodel/plugins/yolo/qyoloaimodel.h @@ -13,7 +13,7 @@ class QYoloAiModel : public AiModelPrivateInterface Q_OBJECT public: QYoloAiModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; private: QNetworkAccessManager m_manager; diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp index 1a57681..89b86cc 100644 --- a/aimodel/qaimodel.cpp +++ b/aimodel/qaimodel.cpp @@ -12,7 +12,6 @@ QAiModel::QAiModel() { - qRegisterMetaType<AiModelPrivateInterface::AiModelTypes>(); } AiModelPrivateInterface::AiModelTypes QAiModel::type() const @@ -43,7 +42,6 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType) for (auto *plugin : staticPlugins) qDebug() << "Static plugin: " << plugin; - m_interface.clear(); for (auto &&pluginDir : QCoreApplication::libraryPaths()) { QDir aiModelPluginsDir(pluginDir + "/aimodel"); @@ -165,6 +163,19 @@ void QAiModel::processCombinedData(QVariant data) } } +int QAiModel::seed() const +{ + return m_seed; +} + +void QAiModel::setSeed(int newSeed) +{ + if (m_seed == newSeed) + return; + m_seed = newSeed; + emit seedChanged(); +} + void QAiModel::dataReceived(QVariant data) { qDebug() << interface() << ":" << __func__ << "(): data:" << data.typeName(); @@ -189,30 +200,30 @@ void QAiModel::pushData(QVariant data) m_buffer.clear(); m_processing = true; emit processingChanged(); + qDebug() << data.typeName() << data; if (QByteArrayView(data.typeName()) == "QVariantList") - interface()->pushData(data.value<QVariantList>()); + interface()->pushData(data.value<QVariantList>(), m_seed); else - interface()->pushData({data}); + interface()->pushData({data}, m_seed); } } - -QVariantList QAiModel::rag() const +QVariantList QAiModel::documents() const { - return m_rag; + return m_documents; } -void QAiModel::setRag(const QVariantList &newRag) +void QAiModel::setDocuments(const QVariantList &newDocuments) { - if (m_rag == newRag) + if (m_documents == newDocuments) return; - m_rag = newRag; - qDebug() << newRag << m_output; + m_documents = newDocuments; + qDebug() << newDocuments << m_output; if (!m_interface.isNull()) { - m_interface->setRag(newRag); + m_interface->setRag(newDocuments); } - emit ragChanged(); + emit documentsChanged(); } diff --git a/aimodel/qaimodel.h b/aimodel/qaimodel.h index 620bbc7..4df08b2 100644 --- a/aimodel/qaimodel.h +++ b/aimodel/qaimodel.h @@ -23,7 +23,7 @@ class QAiModel : public QObject Q_PROPERTY( QString model READ model WRITE setModel NOTIFY modelChanged FINAL) Q_PROPERTY( - QVariantList rag READ rag WRITE setRag NOTIFY ragChanged FINAL) + QVariantList documents READ documents WRITE setDocuments NOTIFY documentsChanged FINAL) Q_PROPERTY( QVector<QAiModel*> inputs READ inputs WRITE setInputs NOTIFY inputsChanged FINAL) Q_PROPERTY( @@ -32,6 +32,8 @@ class QAiModel : public QObject bool buffered READ buffered WRITE setBuffered NOTIFY bufferedChanged FINAL) Q_PROPERTY( bool optional READ optional WRITE setOptional NOTIFY optionalChanged FINAL) + Q_PROPERTY( + int seed READ seed WRITE setSeed NOTIFY seedChanged FINAL) public: @@ -51,8 +53,8 @@ public: QVector<QAiModel*> inputs() const; void setInputs(QVector<QAiModel*>newInputs); - QVariantList rag() const; - void setRag(const QVariantList &newRag); + QVariantList documents() const; + void setDocuments(const QVariantList &newDocuments); Q_INVOKABLE void pushData(QVariant data); Q_INVOKABLE void clearBuffer(); @@ -66,7 +68,7 @@ signals: void inputsChanged(); - void ragChanged(); + void documentsChanged(); void processingChanged(); @@ -76,6 +78,8 @@ signals: void optionalChanged(); + void seedChanged(); + private Q_SLOTS: void dataReceived(QVariant data); @@ -88,6 +92,9 @@ public: bool optional() const; void setOptional(bool newOptional); + int seed() const; + void setSeed(int newSeed); + private: void processCombinedData(QVariant data); @@ -98,9 +105,10 @@ private: QString m_inputModel{}; QAiModel* m_output{nullptr}; QSharedPointer<AiModelPrivateInterface> m_interface; - QVariantList m_rag; + QVariantList m_documents; AiModelPrivateInterface::AiModelTypes m_type; QVariant m_buffer; // TODO: replace with QVariant + int m_seed{0}; bool m_processing {false}; bool m_buffered {false}; bool m_optional {false}; diff --git a/aimodel/qaimodelinterface_p.h b/aimodel/qaimodelinterface_p.h index 177cc11..d8144bb 100644 --- a/aimodel/qaimodelinterface_p.h +++ b/aimodel/qaimodelinterface_p.h @@ -33,7 +33,7 @@ public: void init(QAiModel *owner) { m_owner = owner;} virtual ~AiModelPrivateInterface() {} - virtual void pushData(QVariantList data) = 0; + virtual void pushData(QVariantList data, int seed) = 0; virtual void setRag(QVariantList data) {} diff --git a/tests/app/Screen01.ui.qml b/tests/app/Screen01.ui.qml index a812ab4..511cdde 100644 --- a/tests/app/Screen01.ui.qml +++ b/tests/app/Screen01.ui.qml @@ -154,7 +154,7 @@ Rectangle { implicitWidth: 300 implicitHeight: 100 - onEditingFinished: llamaModel.rag = [text] + onEditingFinished: llamaModel.documents = [text] } MultiModal { id: llamaModel -- GitLab From 6ddbe96e465bcb49ac8a40ce844e3ce62af9eb37 Mon Sep 17 00:00:00 2001 From: Alex Bu <alex.bu@qt.io> Date: Tue, 15 Apr 2025 14:59:16 +0300 Subject: [PATCH 06/14] piper tts: use raw data rather than saving to temp wav file add MIT license to piper server. add qt tts model back to readme. --- README.md | 1 + aimodel/plugins/piper-tts/qpiperaimodel_p.cpp | 31 ++++++++------- aimodel/plugins/piper-tts/qpiperaimodel_p.h | 16 ++++---- aimodel/tts_server/piper_server.py | 38 +++++++++++++------ 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 8820be2..a54fd99 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ When you declare a model in your code, Qt will infer from the given input and ou | Speech | Text | QtAsrModel | Uses Whisper for Automatic Speech Recognition (ASR), or speech-to-text | | Image | Json | QtTritonModel | Uses Triton to load a model for object detection from images | | Image | Json | QtYoloModel | Uses a YOLO model for object detection from images | +| Text | Speech | QtTtsModel | Uses QtTextToSpeech (QtSpeech) to convert text into speech | | Text | Speech | QtPiperModel | Uses Piper TTS model to convert text into speech | Note, the Qt backends expect the underlying backend implementation (ollama, Whisper...) to be running, and will not take care of starting them up for you. You need to start them yourself, e.g. in the case of QtOllamaModel, loading the intended model to ollama's memory by running: diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp index 63d43f3..b773803 100644 --- a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp +++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp @@ -1,23 +1,25 @@ -// Copyright (C) 2025 The Qt Company Ltd. -// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only - #include "qpiperaimodel_p.h" #include "qaimodel.h" #include <QJsonDocument> #include <QJsonObject> #include <QNetworkReply> #include <QRestReply> -#include <QFile> +#include <QMediaDevices> QPiperAiModel::QPiperAiModel() : AiModelPrivateInterface() , m_manager(this) , m_restApi(&m_manager) { - m_player.setAudioOutput(&m_audioOutput); + m_audioFormat.setSampleRate(22050); + m_audioFormat.setChannelCount(1); + m_audioFormat.setSampleFormat(QAudioFormat::Int16); + + m_audioSink.reset(new QAudioSink(QMediaDevices::defaultAudioOutput(),m_audioFormat)); + m_audioBuffer.reset(new QBuffer); } -void QPiperAiModel::pushData(QVariantList data, int seed) +void QPiperAiModel::pushData(QVariantList data) { qDebug() << "QPiperAiModel::pushData(): data:" << data; @@ -34,17 +36,14 @@ void QPiperAiModel::pushData(QVariantList data, int seed) doc.setObject(obj); m_restApi.post(request, doc.toJson(), this, [this](QRestReply &reply) { if (auto json = reply.readJson()) { - //qDebug() << "[\"response\"]=" << json->object()["response"].toString(); - //emit dataReceived(json->object()["response"].toString().toUtf8()); - QFile file("test.wav"); - file.open(QIODevice::WriteOnly); - file.write(QByteArray::fromBase64(json->object()["response"].toString().toUtf8())); - file.close(); + m_audioSink->reset(); + m_audioBuffer->close(); + m_audioBuffer->setData(QByteArray::fromBase64(json->object()["response"].toString().toUtf8())); + m_audioBuffer->open(QIODevice::ReadOnly); - m_player.stop(); - m_player.setSource(QUrl::fromLocalFile("test.wav")); - m_player.play(); + // Start playback + m_audioSink->start(m_audioBuffer.data()); } }); - + emit dataReceived(data.first().toByteArray()); } diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h index a220ad2..8431333 100644 --- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h +++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h @@ -1,13 +1,12 @@ -// Copyright (C) 2025 The Qt Company Ltd. -// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only - #ifndef QTEXT2SPEEHCAIMODEL_P_H #define QTEXT2SPEEHCAIMODEL_P_H #include <QObject> #include <QRestAccessManager> -#include <QMediaPlayer> -#include <QAudioOutput> +#include <QAudioSink> +#include <QAudioFormat> +#include <QScopedPointer> +#include <QBuffer> #include "qaimodelinterface_p.h" class QPiperAiModel : public AiModelPrivateInterface @@ -15,13 +14,14 @@ class QPiperAiModel : public AiModelPrivateInterface Q_OBJECT public: QPiperAiModel(); - void pushData(QVariantList data, int seed) override; + void pushData(QVariantList data) override; private: QNetworkAccessManager m_manager; QRestAccessManager m_restApi; - QMediaPlayer m_player; - QAudioOutput m_audioOutput; + QScopedPointer<QBuffer> m_audioBuffer; + QScopedPointer<QAudioSink> m_audioSink; + QAudioFormat m_audioFormat; }; diff --git a/aimodel/tts_server/piper_server.py b/aimodel/tts_server/piper_server.py index 97328a6..53d9597 100644 --- a/aimodel/tts_server/piper_server.py +++ b/aimodel/tts_server/piper_server.py @@ -1,7 +1,25 @@ #!/usr/bin/env python3 - -# Copyright (C) 2025 The Qt Company Ltd. -# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only +# +# MIT License +# +# Copyright (c) 2022 Michael Hansen +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in the +# Software without restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import argparse import io @@ -113,15 +131,11 @@ def main() -> None: _LOGGER.debug("get text: %s", text["text"]) json_response = {} - with io.BytesIO() as wav_io: - with wave.open(wav_io, "wb") as wav_file: - voice.synthesize(text["text"], wav_file, **synthesize_args) - - json_response["response"] = base64.b64encode(wav_io.getvalue()).decode("utf-8") - #audio_str = ""; - #for audio_bytes in voice.synthesize_stream_raw(text["text"], **synthesize_args): - # audio_str += (base64.b64encode(audio_bytes).decode("utf-8")) - #json_response["response"] = audio_str + audio_str = b''; + for audio_bytes in voice.synthesize_stream_raw(text["text"], **synthesize_args): + _LOGGER.debug( len(audio_bytes) ) + audio_str = audio_str + audio_bytes + json_response["response"] = base64.b64encode(audio_str).decode("utf-8") return jsonify(json_response) app.run(host=args.host, port=args.port) -- GitLab From 673a474c5004a638c4d3882e08558e02aaf146bf Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Tue, 15 Apr 2025 17:57:27 +0300 Subject: [PATCH 07/14] Fix application startup on windows --- aimodel/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/aimodel/CMakeLists.txt b/aimodel/CMakeLists.txt index 605edc9..be119e7 100644 --- a/aimodel/CMakeLists.txt +++ b/aimodel/CMakeLists.txt @@ -19,7 +19,7 @@ add_subdirectory(plugins) find_package(Qt6 6.8 REQUIRED COMPONENTS Core Qml Quick Network) qt_standard_project_setup(REQUIRES 6.8) -qt_add_library(QtAiModelPluginInterface +qt_add_library(QtAiModelPluginInterface SHARED qaimodelinterface_p.h chromadb.h chromadb.cpp ) @@ -29,15 +29,13 @@ target_link_libraries(QtAiModelPluginInterface Qt6::Network ) -qt_add_qml_module(QtAiModelApi +qt_add_qml_module(QtAiModelApi STATIC URI qtaimodel VERSION 1.0 - SHARED SOURCES qaimodel.h qaimodel.cpp ) -qt_import_qml_plugins(QtAiModelApi) target_link_libraries(QtAiModelApi PRIVATE -- GitLab From f7c6bd40c1c9baf78838db5b511ff39221d4b2fe Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Tue, 15 Apr 2025 17:59:03 +0300 Subject: [PATCH 08/14] Fix piper plugin compilation --- aimodel/plugins/piper-tts/qpiperaimodel_p.cpp | 3 ++- aimodel/plugins/piper-tts/qpiperaimodel_p.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp index b773803..1970554 100644 --- a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp +++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp @@ -19,7 +19,8 @@ QPiperAiModel::QPiperAiModel() m_audioBuffer.reset(new QBuffer); } -void QPiperAiModel::pushData(QVariantList data) +void QPiperAiModel::pushData( + QVariantList data, int seed) { qDebug() << "QPiperAiModel::pushData(): data:" << data; diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h index 8431333..c1b266f 100644 --- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h +++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h @@ -14,7 +14,7 @@ class QPiperAiModel : public AiModelPrivateInterface Q_OBJECT public: QPiperAiModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; private: QNetworkAccessManager m_manager; -- GitLab From c527a3ae478124e1ae8b9b26a4ef37ef3dd80e0c Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Wed, 16 Apr 2025 12:02:07 +0300 Subject: [PATCH 09/14] Fix buggy debug print --- aimodel/plugins/piper-tts/qpiperaimodel_p.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h index c1b266f..bd60c6a 100644 --- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h +++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h @@ -24,8 +24,7 @@ private: QAudioFormat m_audioFormat; }; - -class QLlmAiModelPlugin : public QAiModelPluginFactory +class QPiperTtsAiModelPlugin : public QAiModelPluginFactory { Q_OBJECT Q_PLUGIN_METADATA(IID "org.qt-project.Qt.QAiModelPluginFactory/1.0" FILE "plugin.json") -- GitLab From c45182695ce16e9740f063504a91ca166cec8bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tinja=20Paavosepp=C3=A4?= <tinja.paavoseppa@qt.io> Date: Thu, 24 Apr 2025 08:33:41 +0300 Subject: [PATCH 10/14] Add the Qt export macro to the C++ library --- aimodel/CMakeLists.txt | 5 ++++- aimodel/chromadb.h | 3 ++- aimodel/qaimodelinterface_p.h | 5 +++-- aimodel/qtaiapiexports_p.h | 7 +++++++ 4 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 aimodel/qtaiapiexports_p.h diff --git a/aimodel/CMakeLists.txt b/aimodel/CMakeLists.txt index be119e7..5733bb4 100644 --- a/aimodel/CMakeLists.txt +++ b/aimodel/CMakeLists.txt @@ -20,9 +20,12 @@ find_package(Qt6 6.8 REQUIRED COMPONENTS Core Qml Quick Network) qt_standard_project_setup(REQUIRES 6.8) qt_add_library(QtAiModelPluginInterface SHARED - qaimodelinterface_p.h + qaimodelinterface_p.h qtaiapiexports_p.h chromadb.h chromadb.cpp ) + +target_compile_definitions(QtAiModelPluginInterface PRIVATE QTAIAPI_LIBRARY) + target_link_libraries(QtAiModelPluginInterface PRIVATE Qt6::Core diff --git a/aimodel/chromadb.h b/aimodel/chromadb.h index 11b2906..efee875 100644 --- a/aimodel/chromadb.h +++ b/aimodel/chromadb.h @@ -6,8 +6,9 @@ #include <QObject> #include <QRestAccessManager> +#include "qtaiapiexports_p.h" -class ChromaDb : public QObject +class QTAIAPI_EXPORT ChromaDb : public QObject { Q_OBJECT Q_PROPERTY(bool connected READ connected WRITE connect NOTIFY connectedChanged FINAL) diff --git a/aimodel/qaimodelinterface_p.h b/aimodel/qaimodelinterface_p.h index d8144bb..4d7742e 100644 --- a/aimodel/qaimodelinterface_p.h +++ b/aimodel/qaimodelinterface_p.h @@ -6,10 +6,11 @@ #include <QObject> #include <QVariant> +#include "qtaiapiexports_p.h" class QAiModel; -class AiModelPrivateInterface : public QObject +class QTAIAPI_EXPORT AiModelPrivateInterface : public QObject { Q_OBJECT public: @@ -47,7 +48,7 @@ public: }; Q_DECLARE_OPERATORS_FOR_FLAGS(AiModelPrivateInterface::AiModelTypes) -class QAiModelPluginFactory : public QObject +class QTAIAPI_EXPORT QAiModelPluginFactory : public QObject { Q_OBJECT public: diff --git a/aimodel/qtaiapiexports_p.h b/aimodel/qtaiapiexports_p.h new file mode 100644 index 0000000..7b94d78 --- /dev/null +++ b/aimodel/qtaiapiexports_p.h @@ -0,0 +1,7 @@ +#include <QtCore/QtGlobal> + +#if defined(QTAIAPI_LIBRARY) +# define QTAIAPI_EXPORT Q_DECL_EXPORT +#else +# define QTAIAPI_EXPORT Q_DECL_IMPORT +#endif -- GitLab From 2aff06320d00df058b2fd3205bbe94110ba91eb2 Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Fri, 2 May 2025 06:56:08 +0000 Subject: [PATCH 11/14] Documentation up-to-date --- README.md | 58 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index a54fd99..a3bbef8 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,15 @@ This API is in proof-of-concept stage and under active development, and not yet When you declare a model in your code, Qt will infer from the given input and output type what backend it will set up for the model. The backends are implemented as QPlugins. Currently, the backends are: -| Input type | Output type | Qt backend | Description | -|------------|-------------|---------------|-------------------------------------------------------------------------------| -| Text\|Image| Text | QtOllamaModel | Uses ollama to load LLM models and communicate to them with ollama's REST API | -| Speech | Text | QtAsrModel | Uses Whisper for Automatic Speech Recognition (ASR), or speech-to-text | -| Image | Json | QtTritonModel | Uses Triton to load a model for object detection from images | -| Image | Json | QtYoloModel | Uses a YOLO model for object detection from images | -| Text | Speech | QtTtsModel | Uses QtTextToSpeech (QtSpeech) to convert text into speech | -| Text | Speech | QtPiperModel | Uses Piper TTS model to convert text into speech | +| Input type | Output type | Qt backend | Description | +|------------|-------------|-----------------|-------------------------------------------------------------------------------| +| Text\|Image| Text | QtOllamaModel | Uses ollama to load LLM models and communicate to them with ollama's REST API | +| Speech | Text | QtAsrModel | Uses Whisper for Automatic Speech Recognition (ASR), or speech-to-text | +| Image | Json | QtTritonModel | Uses Triton to load a model for object detection from images | +| Image | Json | QtYoloModel | Uses a YOLO model for object detection from images | +| Text | Speech | QtTtsModel | Uses QtTextToSpeech (QtSpeech) to convert text into speech | +| Text | Speech | QtPiperModel | Uses Piper TTS model to convert text into speech | +| Text | Image | QtDiffuserModel | Uses Diffusers to convert text into images | Note, the Qt backends expect the underlying backend implementation (ollama, Whisper...) to be running, and will not take care of starting them up for you. You need to start them yourself, e.g. in the case of QtOllamaModel, loading the intended model to ollama's memory by running: ``` @@ -55,10 +56,10 @@ A combination of AiModelType flags to tell what type of model to instantiate. Po | InputImage | 0x00008 | The model takes image as input | | InputJson | 0x00010 | The model takes JSON as input | | OutputText | 0x00100 | The model outputs text | -| OutputAudio | 0x00200 |The model outputs speech | -| OutputVideo | 0x00400 |The model outputs video | -| OutputImage | 0x00800 |The model outputs image | -| OutputJson | 0x01000 |The model outputs JSON | +| OutputAudio | 0x00200 | The model outputs speech | +| OutputVideo | 0x00400 | The model outputs video | +| OutputImage | 0x00800 | The model outputs image | +| OutputJson | 0x01000 | The model outputs JSON | For supported input-output combinations, see the table under "How it works" section. @@ -125,7 +126,7 @@ MultiModal { | Read method: | QString model() | | Notifier signal: | void modelChanged() | -**QVariantList rag** +**QVariantList documents** Retrieval-Augmented Generation data to use for the model, if it supports it. RAG supports currently only chromadb, which should be running on background. @@ -138,7 +139,7 @@ import qtaimodel type: (MultiModal.InputText | MultiModal.OutputText) model: "llama3.2" prompt: "Which item has best armor bonus?" - rag: ["Cloth of Authority | Armour Class +1", + documents: ["Cloth of Authority | Armour Class +1", "Drunken Cloth | Constitution +2 (up to 20)", "Icebite Robe | Resistance to Damage Types: Cold damage.", "Obsidian Laced Robe | Grants Resistance to Damage Types: Fire damage.", @@ -149,9 +150,32 @@ import qtaimodel | | | |------------------|-----------------------------------------------------| -| Write method: | void setRag(QByteArray) | -| Read method: | QByteArray rag() | -| Notifier signal: | void ragChanged() | +| Write method: | void setDocuments(QByteArray) | +| Read method: | QByteArray documents() | +| Notifier signal: | void documentsChanged() | + +**int seed** + +Seed to use with model prompts. Seed reduces randomness in model answers. + +Example: +``` +import qtaimodel + + MultiModal { + id: llamaModel + type: (MultiModal.InputText | MultiModal.OutputText) + model: "gemma3" + prompt: "Say hello?" + seed: 3453654 + } +``` + +| | | +|------------------|-----------------------------------------------------| +| Write method: | void setDocuments(QByteArray) | +| Read method: | QByteArray documents() | +| Notifier signal: | void documentsChanged() | **QVector<QAiModel*> inputs** -- GitLab From 17aa780935ce82bccec2e22f35c1684e89d5ed19 Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Fri, 2 May 2025 06:58:58 +0000 Subject: [PATCH 12/14] Fixes to documentation --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a3bbef8..380ce8e 100644 --- a/README.md +++ b/README.md @@ -150,8 +150,8 @@ import qtaimodel | | | |------------------|-----------------------------------------------------| -| Write method: | void setDocuments(QByteArray) | -| Read method: | QByteArray documents() | +| Write method: | void setDocuments(QVariantList) | +| Read method: | QVariantList documents() | | Notifier signal: | void documentsChanged() | **int seed** @@ -177,7 +177,7 @@ import qtaimodel | Read method: | QByteArray documents() | | Notifier signal: | void documentsChanged() | -**QVector<QAiModel*> inputs** +**QVector<QAiModel\*> inputs** A list of models this model will use as its inputs. This allows for chaining models together to create pipelines. You can use the Optional flag with the model's type to tell whether it's an optional or mandatory input. For mandatory inputs, this model will not process any other inputs before the mandatory one has something to offer. For optional ones, other inputs will be processed regardless if that input has data available or not. -- GitLab From 08ef1bf34fbed237fe771d867afa7ce2d2dfd154 Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Fri, 2 May 2025 12:56:27 +0000 Subject: [PATCH 13/14] Remove unneeded file --- tests/app/CMakeLists copy.txt | 48 ----------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 tests/app/CMakeLists copy.txt diff --git a/tests/app/CMakeLists copy.txt b/tests/app/CMakeLists copy.txt deleted file mode 100644 index a6ba292..0000000 --- a/tests/app/CMakeLists copy.txt +++ /dev/null @@ -1,48 +0,0 @@ - -cmake_minimum_required(VERSION 3.21.1) - -add_subdirectory(aimodel) - -project(QtAiInferenceApi LANGUAGES CXX) - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake") - -set(CMAKE_AUTOMOC ON) -set(CMAKE_INCLUDE_CURRENT_DIR ON) -set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml) -set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY} - CACHE STRING "Import paths for Qt Creator's code model" - FORCE -) - -find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia) -qt_standard_project_setup(REQUIRES 6.8) - -qt_add_executable(${CMAKE_PROJECT_NAME} - main.cpp -) - -qt_add_qml_module(${CMAKE_PROJECT_NAME} - URI qtaiinferenceapi - VERSION 1.0 - RESOURCES - qtquickcontrols2.conf - QML_FILES - App.qml - Screen01.ui.qml - ) - -target_link_libraries(${CMAKE_PROJECT_NAME} - PRIVATE - Qt6::Quick - Qt6::Multimedia - QtAiModelApi -) - - -include(GNUInstallDirs) -install(TARGETS ${CMAKE_PROJECT_NAME} - BUNDLE DESTINATION . - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} -) -- GitLab From 3e61cfaeb6b708e2e9b1c309736f9cb114f391c5 Mon Sep 17 00:00:00 2001 From: Jari Helaakoski <jari.helaakoski@qt.io> Date: Sat, 3 May 2025 23:02:46 +0300 Subject: [PATCH 14/14] Fixes for windows --- aimodel/plugins/asr/qasraimodel_p.cpp | 5 +++-- aimodel/plugins/ollama/qllmaimodel_p.cpp | 2 +- aimodel/plugins/piper-tts/plugin.json | 2 +- aimodel/plugins/tts/qtext2speechaimodel_p.cpp | 2 +- aimodel/plugins/tts/qtext2speechaimodel_p.h | 2 +- aimodel/qaimodel.cpp | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/aimodel/plugins/asr/qasraimodel_p.cpp b/aimodel/plugins/asr/qasraimodel_p.cpp index d2d5a24..ae1a331 100644 --- a/aimodel/plugins/asr/qasraimodel_p.cpp +++ b/aimodel/plugins/asr/qasraimodel_p.cpp @@ -3,6 +3,7 @@ #include "qaimodel.h" #include "qasraimodel_p.h" +#include <QDir> #include <QJsonDocument> #include <QJsonObject> #include <QNetworkReply> @@ -20,7 +21,7 @@ void QAsrAiModel::pushData(QVariantList data, int seed) qDebug() << "QAsrAiModel::pushData(): data:" << data; if (data.isEmpty() || data.first().toUrl().isEmpty()) { - emit dataReceived(data.first().toUrl()); + emit dataReceived(data.first().toUrl().toLocalFile()); return; } @@ -29,7 +30,7 @@ void QAsrAiModel::pushData(QVariantList data, int seed) QJsonDocument doc; QJsonObject obj = doc.object(); obj["model"] = m_owner->model(); - obj["file"] = data.first().toUrl().path(); + obj["file"] = data.first().toUrl().toLocalFile(); //obj["stream"] = false; doc.setObject(obj); m_restApi.post(request, doc.toJson(), this, [this](QRestReply &reply) { diff --git a/aimodel/plugins/ollama/qllmaimodel_p.cpp b/aimodel/plugins/ollama/qllmaimodel_p.cpp index b6caad4..50eb4a2 100644 --- a/aimodel/plugins/ollama/qllmaimodel_p.cpp +++ b/aimodel/plugins/ollama/qllmaimodel_p.cpp @@ -55,7 +55,7 @@ void QLlmAiModel::pushData(QVariantList data, int seed) query.append(QString::fromLatin1(i.toByteArray())); if (i.canConvert<QUrl>()) { - QFile file(QUrl(i.toUrl()).path()); + QFile file(QUrl(i.toUrl()).toLocalFile()); if (file.open(QIODevice::ReadOnly) != 0) { QByteArray ba = file.readAll(); QByteArray ba2 = ba.toBase64(); diff --git a/aimodel/plugins/piper-tts/plugin.json b/aimodel/plugins/piper-tts/plugin.json index bb84b6f..a949a0e 100644 --- a/aimodel/plugins/piper-tts/plugin.json +++ b/aimodel/plugins/piper-tts/plugin.json @@ -1,3 +1,3 @@ -{ "name": "ttsplugin", +{ "name": "ttspiperplugin", "supportedTypes": ["InputText", "OutputAudio"] } diff --git a/aimodel/plugins/tts/qtext2speechaimodel_p.cpp b/aimodel/plugins/tts/qtext2speechaimodel_p.cpp index da18908..505e52d 100644 --- a/aimodel/plugins/tts/qtext2speechaimodel_p.cpp +++ b/aimodel/plugins/tts/qtext2speechaimodel_p.cpp @@ -18,7 +18,7 @@ QText2SpeechAiModel::QText2SpeechAiModel() }); } -void QText2SpeechAiModel::pushData(QVariantList data) +void QText2SpeechAiModel::pushData(QVariantList data, int seed) { m_speech->stop(); diff --git a/aimodel/plugins/tts/qtext2speechaimodel_p.h b/aimodel/plugins/tts/qtext2speechaimodel_p.h index 2666fa8..0e7b438 100644 --- a/aimodel/plugins/tts/qtext2speechaimodel_p.h +++ b/aimodel/plugins/tts/qtext2speechaimodel_p.h @@ -15,7 +15,7 @@ class QText2SpeechAiModel : public AiModelPrivateInterface Q_OBJECT public: QText2SpeechAiModel(); - void pushData(QVariantList data) override; + void pushData(QVariantList data, int seed) override; QSharedPointer<QTextToSpeech> m_speech; }; diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp index 89b86cc..8f81cdb 100644 --- a/aimodel/qaimodel.cpp +++ b/aimodel/qaimodel.cpp @@ -70,7 +70,7 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType) m_interface->init(this); break; } else { - qDebug() << "Could not convert" << instance << "to AiModelPrivateInterface*"; + qDebug() << "Could not convert" << instance << "to QAiModelPluginFactory*"; } } } -- GitLab