From 1dadf27df6c5dd73cdf61505273e7598b13818fa Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Tue, 8 Apr 2025 17:58:20 +0300
Subject: [PATCH 01/14] Changes from origin + text2image support + fix for
 paths

---
 README.md                                     |   3 +
 Screen01.ui.qml                               | 122 ++++++++++++------
 aimodel/diffuser_server/diffuser_server.py    |  82 ++++++++++++
 aimodel/plugins/CMakeLists.txt                |   1 +
 aimodel/plugins/asr/CMakeLists.txt            |   2 +-
 aimodel/plugins/diffuser/CMakeLists.txt       |  16 +++
 aimodel/plugins/diffuser/plugin.json          |   3 +
 .../plugins/diffuser/qdiffuseraimodel_p.cpp   |  43 ++++++
 aimodel/plugins/diffuser/qdiffuseraimodel_p.h |  33 +++++
 aimodel/plugins/ollama/CMakeLists.txt         |   2 +-
 aimodel/plugins/piper-tts/CMakeLists.txt      |   2 +-
 aimodel/plugins/triton/CMakeLists.txt         |   2 +-
 aimodel/plugins/tts/CMakeLists.txt            |   2 +-
 aimodel/plugins/yolo/CMakeLists.txt           |   2 +-
 aimodel/qaimodel.cpp                          |   5 +-
 15 files changed, 271 insertions(+), 49 deletions(-)
 create mode 100755 aimodel/diffuser_server/diffuser_server.py
 create mode 100644 aimodel/plugins/diffuser/CMakeLists.txt
 create mode 100644 aimodel/plugins/diffuser/plugin.json
 create mode 100644 aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp
 create mode 100644 aimodel/plugins/diffuser/qdiffuseraimodel_p.h

diff --git a/README.md b/README.md
index 5b8e2e9..8820be2 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,9 @@
 
 This projects contains the proof-of-concept for a new Qt AI Inference API. The purpose of the API is to let you easily use different types of AI models for inference from your Qt code, either from C++ or directly from QML! The API abstracts the details of the underlying model and framework implementations, allowing you to just tell what type of input and output you would like to use, and Qt will set things up for you! You can also chain different models together for pipelines.
 
+**Disclaimer**
+This API is in proof-of-concept stage and under active development, and not yet a part of the Qt framework. Hence, Qt's compatibility promise does not apply; the API can still change in breaking ways. But, it is also a great time to impact the direction it will take! For suggestions feel free to create a ticket in the Qt project's [JIRA](https://bugreports.qt.io), please use the label "QtAiApi" so we can easily find them and collect them together.
+
 ## How it works
 
 When you declare a model in your code, Qt will infer from the given input and output type what backend it will set up for the model. The backends are implemented as QPlugins. Currently, the backends are:
diff --git a/Screen01.ui.qml b/Screen01.ui.qml
index ebd7d30..b014ba0 100644
--- a/Screen01.ui.qml
+++ b/Screen01.ui.qml
@@ -20,58 +20,87 @@ Rectangle {
 
     ColumnLayout {
         RowLayout {
-            id: buttonRow
-            Button {
-                text: "Record audio"
-                onClicked: {
-                    recorder.record()
+            ColumnLayout {
+                RowLayout {
+                    id: buttonRow
+                    Button {
+                        text: "Record audio"
+                        onClicked: {
+                            recorder.record()
+                        }
+                    }
+                    Button {
+                        text: "Stop audio recording"
+                        onClicked: {
+                            recorder.stop()
+                            if (recorder.actualLocation != "") {
+                                speechToText.pushData(recorder.actualLocation)
+                            }
+                            if (imageFile != "") {
+                                imageToText.pushData(imageFile)
+                            }
+                        }
+                    }
                 }
-            }
-            Button {
-                text: "Stop audio recording"
-                onClicked: {
-                    recorder.stop()
-                    if (recorder.actualLocation != "") {
-                        speechToText.pushData(recorder.actualLocation)
+
+                RowLayout {
+                    Button {
+                        text: qsTr("Open image")
+                        onClicked: fileDialog.open()
                     }
-                    if (imageFile != "") {
-                        imageToText.pushData(imageFile)
+                    Text {
+                        id: result
+                        text: rectangle.imageFile
                     }
                 }
-            }
-        }
 
-        RowLayout {
-            Button {
-                text: qsTr("Open image")
-                onClicked: fileDialog.open()
+                TextField {
+                    placeholderText: "Text2Image"
+                    implicitWidth: 300
+                    onEditingFinished: diffuser.pushData(text)
+                }
+
+                TextField {
+                    text: llamaPrompt
+                    placeholderText: "Llama prompt"
+                    implicitWidth: 300
+                    onEditingFinished: llamaModel.prompt = text
+                }
+                TextField {
+                    placeholderText: "Text2Text"
+                    implicitWidth: 300
+                    onEditingFinished: llamaModel.pushData(text)
+                }
+
+                TextArea {
+                    placeholderText: "Enter context"
+                    background: Rectangle {
+                        color: "lightgreen"
+                    }
+
+                    implicitWidth: 300
+                    implicitHeight: 200
+                    onEditingFinished: llamaModel.rag = [text]
+                }
             }
-            Text {
-                id: result
-                text: rectangle.imageFile
+
+            Rectangle {
+                width: 300
+                height: 300
+                color: "red"
+                Image {
+                    anchors.fill: parent
+                    anchors.margins: 2
+                    id: imageLocation
+                    source: imageFile
+                    fillMode: Image.PreserveAspectFit
+                }
             }
         }
 
-        TextField {
-            text: llamaPrompt
-            implicitWidth: 300
-            onEditingFinished: llamaModel.prompt = text
-        }
 
-        TextArea {
-            placeholderText: "Enter context"
-            background: Rectangle {
-                color: "lightgreen"
-            }
 
-            implicitWidth: 300
-            implicitHeight: 200
-            onEditingFinished: llamaModel.rag = [text]
-        }
 
-        Image {
-            source: imageFile
-        }
     }
 
     FileDialog {
@@ -122,4 +151,17 @@ Rectangle {
         type: (MultiModal.InputText | MultiModal.OutputAudio)
         inputs: [ llamaModel ]
     }
+
+    MultiModal {
+        id: diffuser
+        type: (MultiModal.InputText | MultiModal.OutputImage)
+        model: 'IDKiro/sdxs-512-dreamshaper'
+    }
+
+    Connections {
+        target: diffuser
+        function onGotResult(result) {
+            imageLocation.source = result
+        }
+    }
 }
diff --git a/aimodel/diffuser_server/diffuser_server.py b/aimodel/diffuser_server/diffuser_server.py
new file mode 100755
index 0000000..e5ab83f
--- /dev/null
+++ b/aimodel/diffuser_server/diffuser_server.py
@@ -0,0 +1,82 @@
+#!/usr/bin/python3
+
+# Copyright (C) 2025 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only
+
+from http.server import BaseHTTPRequestHandler,HTTPServer
+from os import curdir, sep
+import torch
+from diffusers import AutoPipelineForText2Image, LCMScheduler
+import simplejson
+import base64
+from io import BytesIO
+from PIL import Image
+
+
+
+PORT_NUMBER = 8005
+
+#This class will handles any incoming request from
+#the browser 
+class myHandler(BaseHTTPRequestHandler):
+    model = ''
+
+
+    #Handler for the POST requests
+    def do_POST(self):
+        print("do_POST");
+        if self.path=="/send":
+            self.data_string = self.rfile.read(int(self.headers['Content-Length']))
+            print("data_string: " + self.data_string.decode())
+            json_data = simplejson.loads(self.data_string)
+            #print("json_data: " + simplejson.dumps(json_data))
+            print("MODEL: " + json_data["model"])
+            if self.model != json_data["model"]:
+                self.pipeline = AutoPipelineForText2Image.from_pretrained(
+                    json_data["model"],
+                    #'black-forest-labs/FLUX.1-dev',
+                    #'IDKiro/sdxs-512-dreamshaper',
+                    #"stabilityai/stable-diffusion-2-1-base",
+                    #'black-forest-labs/FLUX.1-schnell',
+                    torch_dtype=torch.float32,
+                    #variant="fp16",
+                    use_safetensor=True).to('cpu')
+                self.model = json_data["model"]
+
+            image = self.pipeline(json_data["prompt"],
+                height=512,
+                width=512,
+                guidance_scale=0.0,
+                target_size=(1024, 1024),
+                original_size=(4096, 4096),
+                num_inference_steps=1
+                #max_sequence_length=256
+                ).images[0]
+            
+            buffered = BytesIO()
+            image.save(buffered, format="PNG")
+            b64image = base64.b64encode(buffered.getvalue())
+            print("Sending response")
+            self.send_response(200)
+            self.end_headers()
+            json_response = {}
+            json_response["image"] = b64image
+            self.wfile.write(simplejson.dumps(json_response).encode("utf-8"))
+            return
+
+
+try:
+    #Create a web server and define the handler to manage the
+    #incoming request
+    server = HTTPServer(('', PORT_NUMBER), myHandler)
+    print('Started httpserver on port ' , PORT_NUMBER)
+
+
+    #Wait forever for incoming htto requests
+    server.serve_forever()
+
+except KeyboardInterrupt:
+    print('^C received, shutting down the web server')
+    server.socket.close()
+
+
diff --git a/aimodel/plugins/CMakeLists.txt b/aimodel/plugins/CMakeLists.txt
index 0cf7b15..095e554 100644
--- a/aimodel/plugins/CMakeLists.txt
+++ b/aimodel/plugins/CMakeLists.txt
@@ -4,3 +4,4 @@ add_subdirectory(ollama)
 add_subdirectory(triton)
 add_subdirectory(yolo)
 add_subdirectory(piper-tts)
+add_subdirectory(diffuser)
diff --git a/aimodel/plugins/asr/CMakeLists.txt b/aimodel/plugins/asr/CMakeLists.txt
index 9c19a88..4ecc6d9 100644
--- a/aimodel/plugins/asr/CMakeLists.txt
+++ b/aimodel/plugins/asr/CMakeLists.txt
@@ -5,7 +5,7 @@ qt_add_plugin(QtAsrModel
     qasraimodel_p.h qasraimodel_p.cpp
     )
 set_target_properties(QtAsrModel PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel"
 )
 target_link_libraries(QtAsrModel
     PRIVATE
diff --git a/aimodel/plugins/diffuser/CMakeLists.txt b/aimodel/plugins/diffuser/CMakeLists.txt
new file mode 100644
index 0000000..2ee0f90
--- /dev/null
+++ b/aimodel/plugins/diffuser/CMakeLists.txt
@@ -0,0 +1,16 @@
+find_package(Qt6 REQUIRED COMPONENTS Core Network Quick)
+
+qt_add_plugin(QtDiffuserModel
+    CLASS_NAME QAiModelPluginFactory
+    qdiffuseraimodel_p.h qdiffuseraimodel_p.cpp
+    )
+set_target_properties(QtDiffuserModel PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel"
+)
+target_link_libraries(QtDiffuserModel
+    PRIVATE
+        Qt6::Core
+        Qt6::Network
+        Qt6::Quick
+        QtAiModelPluginInterface)
+include_directories(../..)
diff --git a/aimodel/plugins/diffuser/plugin.json b/aimodel/plugins/diffuser/plugin.json
new file mode 100644
index 0000000..e9a40a3
--- /dev/null
+++ b/aimodel/plugins/diffuser/plugin.json
@@ -0,0 +1,3 @@
+{ "name": "diffuserplugin",
+  "supportedTypes": ["InputText", "OutputImage"]
+}
diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp
new file mode 100644
index 0000000..550fa67
--- /dev/null
+++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2025 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only
+
+#include "qaimodel.h"
+#include "qdiffuseraimodel_p.h"
+#include <QJsonDocument>
+#include <QJsonObject>
+#include <QNetworkReply>
+#include <QRestReply>
+#include <QImage>
+
+QDiffuserAiModel::QDiffuserAiModel()
+    : AiModelPrivateInterface(),
+      m_manager(this)
+    , m_restApi(&m_manager)
+{
+}
+
+void QDiffuserAiModel::pushData(QVariantList data)
+{
+    qDebug() << "QDiffuserAiModel::pushData(): data:" << data;
+
+    if (data.isEmpty() || data.first().toString().isEmpty()) {
+        emit dataReceived(data.first().toString());
+        return;
+    }
+
+    QNetworkRequest request(QUrl("http://localhost:8005/send"));
+    request.setRawHeader("Content-Type", "application/json");
+    QJsonDocument doc;
+    QJsonObject obj = doc.object();
+    obj["model"] = m_owner->model();
+    obj["prompt"] = data.first().toString();
+    doc.setObject(obj);
+    qDebug() << doc.toJson();
+
+    m_restApi.post(request, doc.toJson(), this, [this](QRestReply &reply) {
+        if (auto json = reply.readJson()) {
+            emit dataReceived(QUrl(
+                QString("data:image/png;base64,") + json->object()["image"].toString().toUtf8()));
+        }
+    });
+}
diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.h b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h
new file mode 100644
index 0000000..d0adfc8
--- /dev/null
+++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h
@@ -0,0 +1,33 @@
+// Copyright (C) 2025 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only
+
+#ifndef QDIFFUSERAIMODEL_P_H
+#define QDIFFUSERAIMODEL_P_H
+
+#include <QObject>
+#include <QRestAccessManager>
+#include "qaimodelinterface_p.h"
+
+class QDiffuserAiModel : public AiModelPrivateInterface
+{
+    Q_OBJECT
+public:
+    QDiffuserAiModel();
+    void pushData(QVariantList data) override;
+
+private:
+    QNetworkAccessManager m_manager;
+    QRestAccessManager m_restApi;
+};
+
+class QDiffuserAiModelPlugin : public QAiModelPluginFactory
+{
+    Q_OBJECT
+    Q_PLUGIN_METADATA(IID "org.qt-project.Qt.QAiModelPluginFactory/1.0" FILE "plugin.json")
+    Q_INTERFACES(QAiModelPluginFactory)
+public:
+    QDiffuserAiModelPlugin() {}
+    AiModelPrivateInterface* createInterface() { return new QDiffuserAiModel(); }
+};
+
+#endif // QDIFFUSERAIMODEL_P_H
diff --git a/aimodel/plugins/ollama/CMakeLists.txt b/aimodel/plugins/ollama/CMakeLists.txt
index 28a83e6..35122a7 100644
--- a/aimodel/plugins/ollama/CMakeLists.txt
+++ b/aimodel/plugins/ollama/CMakeLists.txt
@@ -6,7 +6,7 @@ qt_add_plugin(QtOllamaModel
     )
 
 set_target_properties(QtOllamaModel PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel"
 )
 target_link_libraries(QtOllamaModel
     PRIVATE
diff --git a/aimodel/plugins/piper-tts/CMakeLists.txt b/aimodel/plugins/piper-tts/CMakeLists.txt
index 651ef5b..b00f4f3 100644
--- a/aimodel/plugins/piper-tts/CMakeLists.txt
+++ b/aimodel/plugins/piper-tts/CMakeLists.txt
@@ -6,7 +6,7 @@ qt_add_plugin(QtPiperModel
     )
 
 set_target_properties(QtPiperModel PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel"
 )
 target_link_libraries(QtPiperModel
     PRIVATE
diff --git a/aimodel/plugins/triton/CMakeLists.txt b/aimodel/plugins/triton/CMakeLists.txt
index fb1ca94..69ecd1b 100644
--- a/aimodel/plugins/triton/CMakeLists.txt
+++ b/aimodel/plugins/triton/CMakeLists.txt
@@ -7,7 +7,7 @@ qt_add_plugin(QtTritonModel
     )
 
 set_target_properties(QtTritonModel PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel"
 )
 include_directories(../..)
 target_link_libraries(QtTritonModel
diff --git a/aimodel/plugins/tts/CMakeLists.txt b/aimodel/plugins/tts/CMakeLists.txt
index 92dcf3b..208fa72 100644
--- a/aimodel/plugins/tts/CMakeLists.txt
+++ b/aimodel/plugins/tts/CMakeLists.txt
@@ -6,7 +6,7 @@ qt_add_plugin(QtTtsModel
     )
 
 set_target_properties(QtTtsModel PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel"
 )
 target_link_libraries(QtTtsModel
     PRIVATE
diff --git a/aimodel/plugins/yolo/CMakeLists.txt b/aimodel/plugins/yolo/CMakeLists.txt
index 039fe5f..8720988 100644
--- a/aimodel/plugins/yolo/CMakeLists.txt
+++ b/aimodel/plugins/yolo/CMakeLists.txt
@@ -6,7 +6,7 @@ qt_add_plugin(QtYoloModel
     )
 
 set_target_properties(QtYoloModel PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/plugins"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/plugins/aimodel"
 )
 target_link_libraries(QtYoloModel
     PRIVATE
diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp
index 411691d..a47559c 100644
--- a/aimodel/qaimodel.cpp
+++ b/aimodel/qaimodel.cpp
@@ -44,8 +44,7 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType)
 
 
     m_interface.clear();
-    QDir pluginsDir(QDir::currentPath() + "/qt-ai-inference-api/aimodel/plugins");
-    //QDir pluginsDir(QDir::currentPath() + "/aimodel/plugins");
+    QDir pluginsDir(QDir::currentPath() + "/plugins/aimodel");
     qDebug() << "Plugins dir: " << pluginsDir.absolutePath();
     const auto entryList = pluginsDir.entryList(QDir::Files);
     for (const QString &fileName : entryList) {
@@ -165,7 +164,7 @@ void QAiModel::processCombinedData(QVariant data)
 
 void QAiModel::dataReceived(QVariant data)
 {
-    qDebug() << interface() << ":" << __func__ << "(): data:" << data;
+    qDebug() << interface() << ":" << __func__ << "(): data:" << data.typeName();
 
     m_buffer = data;
     m_processing = false;
-- 
GitLab


From 39f348b6f60cbab753ef319dd3f42890e47438ab Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Wed, 9 Apr 2025 10:12:38 +0300
Subject: [PATCH 02/14] Improve test app

---
 Screen01.ui.qml | 179 ++++++++++++++++++++++++++++++------------------
 1 file changed, 114 insertions(+), 65 deletions(-)

diff --git a/Screen01.ui.qml b/Screen01.ui.qml
index b014ba0..a812ab4 100644
--- a/Screen01.ui.qml
+++ b/Screen01.ui.qml
@@ -22,14 +22,21 @@ Rectangle {
         RowLayout {
             ColumnLayout {
                 RowLayout {
-                    id: buttonRow
+                    Rectangle {
+                        width: 20
+                        height: 20
+                        color: speechToText.processing ? "red" : "green"
+                    }
+
                     Button {
+                        visible: recorder.recorderState !== MediaRecorder.RecordingState
                         text: "Record audio"
                         onClicked: {
                             recorder.record()
                         }
                     }
                     Button {
+                        visible: recorder.recorderState === MediaRecorder.RecordingState
                         text: "Stop audio recording"
                         onClicked: {
                             recorder.stop()
@@ -41,35 +48,102 @@ Rectangle {
                             }
                         }
                     }
+                    CaptureSession {
+                        audioInput: AudioInput {}
+                        recorder: MediaRecorder {
+                            id: recorder
+                            mediaFormat {
+                                fileFormat: MediaFormat.Wave
+                            }
+                        }
+                    }
+
+                    MultiModal {
+                        id: speechToText
+                        type: (MultiModal.InputAudio | MultiModal.OutputText)
+                        model: "turbo"
+                    }
                 }
 
                 RowLayout {
+                    Rectangle {
+                        width: 20
+                        height: 20
+                        color: imageToText.processing ? "red" : "green"
+                    }
+
                     Button {
                         text: qsTr("Open image")
                         onClicked: fileDialog.open()
+
+
+                        FileDialog {
+                            id: fileDialog
+                            folder: StandardPaths.standardLocations(StandardPaths.PicturesLocation)[0]
+                            nameFilters: ["*.*"]
+                            onAccepted: {
+                                imageFile = fileDialog.file
+                            }
+                            onRejected: {}
+                        }
                     }
                     Text {
                         id: result
                         text: rectangle.imageFile
                     }
+                    MultiModal {
+                        id: imageToText
+                        type: (MultiModal.InputImage | MultiModal.OutputText)
+                        model: "llava-phi3"  // TODO: replace with Janus model from DeepSeek
+                        prompt: "What is in the picture?"
+                        optional: true
+                        buffered: true
+                    }
                 }
 
-                TextField {
-                    placeholderText: "Text2Image"
-                    implicitWidth: 300
-                    onEditingFinished: diffuser.pushData(text)
+                RowLayout {
+                    Rectangle {
+                        width: 20
+                        height: 20
+                        color: diffuser.processing ? "red" : "green"
+                    }
+                    TextField {
+                        placeholderText: "Text2Image"
+                        implicitWidth: 300
+                        onEditingFinished: diffuser.pushData(text)
+                    }
+                    MultiModal {
+                        id: diffuser
+                        type: (MultiModal.InputText | MultiModal.OutputImage)
+                        model: 'IDKiro/sdxs-512-dreamshaper'
+                    }
+
+                    Connections {
+                        target: diffuser
+                        function onGotResult(result) {
+                            imageLocation.source = result
+                        }
+                    }
                 }
 
+
                 TextField {
                     text: llamaPrompt
                     placeholderText: "Llama prompt"
                     implicitWidth: 300
                     onEditingFinished: llamaModel.prompt = text
                 }
-                TextField {
-                    placeholderText: "Text2Text"
-                    implicitWidth: 300
-                    onEditingFinished: llamaModel.pushData(text)
+                RowLayout {
+                    Rectangle {
+                        width: 20
+                        height: 20
+                        color: llamaModel.processing ? "red" : "green"
+                    }
+                    TextField {
+                        placeholderText: "Text2Text"
+                        implicitWidth: 300
+                        onEditingFinished: llamaModel.pushData(text)
+                    }
                 }
 
                 TextArea {
@@ -79,9 +153,39 @@ Rectangle {
                     }
 
                     implicitWidth: 300
-                    implicitHeight: 200
+                    implicitHeight: 100
                     onEditingFinished: llamaModel.rag = [text]
                 }
+                MultiModal {
+                    id: llamaModel
+                    type: (MultiModal.InputText | MultiModal.OutputText)
+                    model: "gemma3:4b"
+                    prompt: llamaPrompt
+                    inputs: [ imageToText, speechToText ]
+                }
+
+                RowLayout {
+                    Rectangle {
+                        width: 20
+                        height: 20
+                        color: text2speech.processing ? "red" : "green"
+                    }
+                    TextArea {
+                        implicitWidth: 300
+                        implicitHeight: 100
+                        background: Rectangle {
+                            color: "lightblue"
+                        }
+
+                    }
+
+                    MultiModal {
+                        id: text2speech
+                        type: (MultiModal.InputText | MultiModal.OutputAudio)
+                        inputs: [ llamaModel ]
+                    }
+                }
+
             }
 
             Rectangle {
@@ -103,65 +207,10 @@ Rectangle {
 
     }
 
-    FileDialog {
-        id: fileDialog
-        folder: StandardPaths.standardLocations(StandardPaths.PicturesLocation)[0]
-        nameFilters: ["*.*"]
-        onAccepted: {
-            imageFile = fileDialog.file
-        }
-        onRejected: {}
-    }
 
-    CaptureSession {
-        audioInput: AudioInput {}
-        recorder: MediaRecorder {
-            id: recorder
-            mediaFormat {
-                fileFormat: MediaFormat.Wave
-            }
-        }
-    }
 
-    MultiModal {
-        id: imageToText
-        type: (MultiModal.InputImage | MultiModal.OutputText)
-        model: "llava-phi3"  // TODO: replace with Janus model from DeepSeek
-        prompt: "What is in the picture?"
-        optional: true
-        buffered: true
-    }
 
-    MultiModal {
-        id: speechToText
-        type: (MultiModal.InputAudio | MultiModal.OutputText)
-        model: "turbo"
-    }
 
-    MultiModal {
-        id: llamaModel
-        type: (MultiModal.InputText | MultiModal.OutputText)
-        model: "gemma3:4b"
-        prompt: llamaPrompt
-        inputs: [ imageToText, speechToText ]
-    }
-
-    MultiModal {
-        id: text2speech
-        type: (MultiModal.InputText | MultiModal.OutputAudio)
-        inputs: [ llamaModel ]
-    }
 
-    MultiModal {
-        id: diffuser
-        type: (MultiModal.InputText | MultiModal.OutputImage)
-        model: 'IDKiro/sdxs-512-dreamshaper'
-    }
 
-    Connections {
-        target: diffuser
-        function onGotResult(result) {
-            imageLocation.source = result
-        }
-    }
 }
-- 
GitLab


From 57b4c302b86fc5093544edc066b00ffb71022477 Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Thu, 10 Apr 2025 15:21:46 +0300
Subject: [PATCH 03/14] Move test app into tests directory

---
 CMakeLists.txt                                | 46 +----------------
 tests/CMakeLists.txt                          |  2 +
 tests/app/App.qml                             | 25 ++++++++++
 tests/app/CMakeLists copy.txt                 | 48 ++++++++++++++++++
 tests/app/CMakeLists.txt                      | 49 +++++++++++++++++++
 .../app/QtAiInferenceApi.qmlproject           |  0
 Screen01.ui.qml => tests/app/Screen01.ui.qml  |  0
 main.cpp => tests/app/main.cpp                |  5 +-
 .../app/qtquickcontrols2.conf                 |  0
 9 files changed, 130 insertions(+), 45 deletions(-)
 create mode 100644 tests/CMakeLists.txt
 create mode 100644 tests/app/App.qml
 create mode 100644 tests/app/CMakeLists copy.txt
 create mode 100644 tests/app/CMakeLists.txt
 rename QtAiInferenceApi.qmlproject => tests/app/QtAiInferenceApi.qmlproject (100%)
 rename Screen01.ui.qml => tests/app/Screen01.ui.qml (100%)
 rename main.cpp => tests/app/main.cpp (84%)
 rename qtquickcontrols2.conf => tests/app/qtquickcontrols2.conf (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a6ba292..4a08cf1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,48 +1,6 @@
-
 cmake_minimum_required(VERSION 3.21.1)
-
-add_subdirectory(aimodel)
-
 project(QtAiInferenceApi LANGUAGES CXX)
 
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
-
-set(CMAKE_AUTOMOC ON)
-set(CMAKE_INCLUDE_CURRENT_DIR ON)
-set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml)
-set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY}
-    CACHE STRING "Import paths for Qt Creator's code model"
-    FORCE
-)
-
-find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia)
-qt_standard_project_setup(REQUIRES 6.8)
 
-qt_add_executable(${CMAKE_PROJECT_NAME}
-    main.cpp
-)
-
-qt_add_qml_module(${CMAKE_PROJECT_NAME}
-    URI qtaiinferenceapi
-    VERSION 1.0
-    RESOURCES
-        qtquickcontrols2.conf
-    QML_FILES
-        App.qml
-        Screen01.ui.qml
-    )
-
-target_link_libraries(${CMAKE_PROJECT_NAME}
-    PRIVATE
-        Qt6::Quick
-        Qt6::Multimedia
-        QtAiModelApi
-)
-
-
-include(GNUInstallDirs)
-install(TARGETS ${CMAKE_PROJECT_NAME}
-  BUNDLE DESTINATION .
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-)
+add_subdirectory(aimodel)
+add_subdirectory(tests)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..3f46259
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,2 @@
+
+add_subdirectory(app)
diff --git a/tests/app/App.qml b/tests/app/App.qml
new file mode 100644
index 0000000..ab4337c
--- /dev/null
+++ b/tests/app/App.qml
@@ -0,0 +1,25 @@
+// Copyright (C) 2025 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only
+
+import QtQuick 2.15
+import QtQuick.Controls 2.15
+import QtQuick.Window 2.15
+
+ApplicationWindow {
+    id: mainWindow
+    visible: true
+    width: Screen.width / 3
+    height: Screen.height / 3
+    //width: Screen.width / 2
+    //height: Screen.height / 2
+    //flags: Qt.FramelessWindowHint | Qt.Window
+    //visibility: Window.FullScreen
+    color: "black" // Optional background color for the main window
+
+    // @disable-check M300
+    Screen01 {
+        anchors.fill: parent
+    }
+
+}
+
diff --git a/tests/app/CMakeLists copy.txt b/tests/app/CMakeLists copy.txt
new file mode 100644
index 0000000..a6ba292
--- /dev/null
+++ b/tests/app/CMakeLists copy.txt	
@@ -0,0 +1,48 @@
+
+cmake_minimum_required(VERSION 3.21.1)
+
+add_subdirectory(aimodel)
+
+project(QtAiInferenceApi LANGUAGES CXX)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+set(CMAKE_AUTOMOC ON)
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml)
+set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY}
+    CACHE STRING "Import paths for Qt Creator's code model"
+    FORCE
+)
+
+find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia)
+qt_standard_project_setup(REQUIRES 6.8)
+
+qt_add_executable(${CMAKE_PROJECT_NAME}
+    main.cpp
+)
+
+qt_add_qml_module(${CMAKE_PROJECT_NAME}
+    URI qtaiinferenceapi
+    VERSION 1.0
+    RESOURCES
+        qtquickcontrols2.conf
+    QML_FILES
+        App.qml
+        Screen01.ui.qml
+    )
+
+target_link_libraries(${CMAKE_PROJECT_NAME}
+    PRIVATE
+        Qt6::Quick
+        Qt6::Multimedia
+        QtAiModelApi
+)
+
+
+include(GNUInstallDirs)
+install(TARGETS ${CMAKE_PROJECT_NAME}
+  BUNDLE DESTINATION .
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/tests/app/CMakeLists.txt b/tests/app/CMakeLists.txt
new file mode 100644
index 0000000..0f51923
--- /dev/null
+++ b/tests/app/CMakeLists.txt
@@ -0,0 +1,49 @@
+cmake_minimum_required(VERSION 3.21.1)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+set(CMAKE_AUTOMOC ON)
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml)
+set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY}
+    CACHE STRING "Import paths for Qt Creator's code model"
+    FORCE
+)
+
+
+find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia)
+qt_standard_project_setup(REQUIRES 6.8)
+
+qt_add_executable(QtAiTestApp
+    main.cpp
+)
+
+qt_add_qml_module(QtAiTestApp
+    URI qtaiinferenceapi
+    VERSION 1.0
+    RESOURCES
+        qtquickcontrols2.conf
+    QML_FILES
+        App.qml
+        Screen01.ui.qml
+    )
+
+target_link_libraries(QtAiTestApp
+    PRIVATE
+        Qt6::Quick
+        Qt6::Multimedia
+        QtAiModelApi
+)
+
+set_target_properties(QtAiTestApp
+    PROPERTIES
+        QT_QML_ROOT_PATH ${QT_QML_OUTPUT_DIRECTORY}
+        QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY}
+        QT_RESOURCE_PREFIX ${QT_QML_OUTPUT_DIRECTORY})
+
+include(GNUInstallDirs)
+install(TARGETS QtAiTestApp
+  BUNDLE DESTINATION .
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/QtAiInferenceApi.qmlproject b/tests/app/QtAiInferenceApi.qmlproject
similarity index 100%
rename from QtAiInferenceApi.qmlproject
rename to tests/app/QtAiInferenceApi.qmlproject
diff --git a/Screen01.ui.qml b/tests/app/Screen01.ui.qml
similarity index 100%
rename from Screen01.ui.qml
rename to tests/app/Screen01.ui.qml
diff --git a/main.cpp b/tests/app/main.cpp
similarity index 84%
rename from main.cpp
rename to tests/app/main.cpp
index 52e40fa..14d5c21 100644
--- a/main.cpp
+++ b/tests/app/main.cpp
@@ -20,9 +20,12 @@ int main(int argc, char *argv[])
     }, Qt::QueuedConnection);
 
     engine.addImportPath("qml");
+    engine.addImportPath("../../qml");
+
     engine.loadFromModule("qtaiinferenceapi", "App");
 
-    qDebug() << "Standard path for pictures: " << QStandardPaths::standardLocations(QStandardPaths::PicturesLocation);
+    qDebug() << "Standard path for pictures: " << QStandardPaths::standardLocations(QStandardPaths::PicturesLocation)
+             << "QML import path" << engine.importPathList();
 
     if (engine.rootObjects().isEmpty())
         return -1;
diff --git a/qtquickcontrols2.conf b/tests/app/qtquickcontrols2.conf
similarity index 100%
rename from qtquickcontrols2.conf
rename to tests/app/qtquickcontrols2.conf
-- 
GitLab


From 1863cb7fe100f5a597a6376ab44d836386783978 Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Thu, 10 Apr 2025 15:41:00 +0300
Subject: [PATCH 04/14] Fix backend plugin loading

---
 aimodel/qaimodel.cpp | 59 +++++++++++++++++++++++---------------------
 tests/app/main.cpp   |  3 ++-
 2 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp
index a47559c..1a57681 100644
--- a/aimodel/qaimodel.cpp
+++ b/aimodel/qaimodel.cpp
@@ -8,6 +8,7 @@
 #include <QDir>
 #include <QJsonArray>
 #include <QList>
+#include <QCoreApplication>
 
 QAiModel::QAiModel()
 {
@@ -44,35 +45,37 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType)
 
 
     m_interface.clear();
-    QDir pluginsDir(QDir::currentPath() + "/plugins/aimodel");
-    qDebug() << "Plugins dir: " << pluginsDir.absolutePath();
-    const auto entryList = pluginsDir.entryList(QDir::Files);
-    for (const QString &fileName : entryList) {
-        qDebug() << "Loading " << fileName << "...";
-        QPluginLoader loader(pluginsDir.absoluteFilePath(fileName));
-        QJsonObject object{ loader.metaData().value("MetaData").toObject() };
-        qDebug() << "Metadata for " << fileName << ": " << object;
-        if (!object.value("supportedTypes").isArray()) {
-            qDebug() << "Incorrect json format in" << loader.metaData()
-                     << "for plugin:" << fileName;
-            continue;
-        }
-        auto flagArray = object.value("supportedTypes").toArray().toVariantList();
-        auto pluginFlags = constructAiModelTypeFlags(flagArray);
-
-        qDebug() << pluginFlags;
-        if (pluginFlags.testFlags(newType)) {
-            auto *instance = loader.instance();
-            QAiModelPluginFactory *plugin = qobject_cast<QAiModelPluginFactory*>(instance);
-            if (plugin) {
-                qDebug() << plugin << "created";
-                m_interface.reset(plugin->createInterface());
-                m_interface->init(this);
-                break;
-            } else {
-                qDebug() << "Could not convert" << instance << "to AiModelPrivateInterface*";
+    for (auto &&pluginDir : QCoreApplication::libraryPaths()) {
+        QDir aiModelPluginsDir(pluginDir + "/aimodel");
+        qDebug() << "Plugins dir: " << aiModelPluginsDir.absolutePath();
+        const auto entryList = aiModelPluginsDir.entryList(QDir::Files);
+        for (const QString &fileName : entryList) {
+            qDebug() << "Loading " << fileName << "...";
+            QPluginLoader loader(aiModelPluginsDir.absoluteFilePath(fileName));
+            QJsonObject object{ loader.metaData().value("MetaData").toObject() };
+            qDebug() << "Metadata for " << fileName << ": " << object;
+            if (!object.value("supportedTypes").isArray()) {
+                qDebug() << "Incorrect json format in" << loader.metaData()
+                         << "for plugin:" << fileName;
+                continue;
             }
-        }
+            auto flagArray = object.value("supportedTypes").toArray().toVariantList();
+            auto pluginFlags = constructAiModelTypeFlags(flagArray);
+
+            qDebug() << pluginFlags;
+            if (pluginFlags.testFlags(newType)) {
+                auto *instance = loader.instance();
+                QAiModelPluginFactory *plugin = qobject_cast<QAiModelPluginFactory*>(instance);
+                if (plugin) {
+                    qDebug() << plugin << "created";
+                    m_interface.reset(plugin->createInterface());
+                    m_interface->init(this);
+                    break;
+                } else {
+                    qDebug() << "Could not convert" << instance << "to AiModelPrivateInterface*";
+                }
+            }
+    }
 
     }
 
diff --git a/tests/app/main.cpp b/tests/app/main.cpp
index 14d5c21..6edfc01 100644
--- a/tests/app/main.cpp
+++ b/tests/app/main.cpp
@@ -20,7 +20,8 @@ int main(int argc, char *argv[])
     }, Qt::QueuedConnection);
 
     engine.addImportPath("qml");
-    engine.addImportPath("../../qml");
+    engine.addImportPath("../../qml"); // For QML plugins
+    QCoreApplication::addLibraryPath("../../plugins"); // For backend plugins
 
     engine.loadFromModule("qtaiinferenceapi", "App");
 
-- 
GitLab


From c25e4b47bf0c2789ed8b863cfe44e6ca79a44d2b Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Fri, 11 Apr 2025 16:37:09 +0300
Subject: [PATCH 05/14] Add seed support and change rag to documents

---
 aimodel/chromadb.cpp                          |  4 +-
 aimodel/plugins/asr/qasraimodel_p.cpp         |  2 +-
 aimodel/plugins/asr/qasraimodel_p.h           |  2 +-
 .../plugins/diffuser/qdiffuseraimodel_p.cpp   |  2 +-
 aimodel/plugins/diffuser/qdiffuseraimodel_p.h |  2 +-
 aimodel/plugins/ollama/qllmaimodel_p.cpp      | 28 ++++++++++----
 aimodel/plugins/ollama/qllmaimodel_p.h        |  2 +-
 aimodel/plugins/piper-tts/qpiperaimodel_p.cpp |  2 +-
 aimodel/plugins/piper-tts/qpiperaimodel_p.h   |  2 +-
 aimodel/plugins/triton/qtritonmodel_p.cpp     |  2 +-
 aimodel/plugins/triton/qtritonmodel_p.h       |  2 +-
 aimodel/plugins/yolo/qyoloaimodel.cpp         |  2 +-
 aimodel/plugins/yolo/qyoloaimodel.h           |  2 +-
 aimodel/qaimodel.cpp                          | 37 ++++++++++++-------
 aimodel/qaimodel.h                            | 18 ++++++---
 aimodel/qaimodelinterface_p.h                 |  2 +-
 tests/app/Screen01.ui.qml                     |  2 +-
 17 files changed, 72 insertions(+), 41 deletions(-)

diff --git a/aimodel/chromadb.cpp b/aimodel/chromadb.cpp
index 22ee94f..294e383 100644
--- a/aimodel/chromadb.cpp
+++ b/aimodel/chromadb.cpp
@@ -25,7 +25,7 @@ void ChromaDb::sendRequest(
         if (reply.isHttpStatusSuccess()) {
             lambda(json ? json.value() : QJsonDocument(), reply.httpStatus());
         } else {
-            qDebug() << "JSON decode error:" << request.url() << "HTTP status:" << reply.httpStatus();
+            qDebug() << request.url() << "responded with error:" << reply.errorString();
             setError(true);
         }
     });
@@ -74,7 +74,7 @@ void ChromaDb::reset()
                 connect(true);
             }
         } else {
-            qDebug() << url << "deleted";
+            qDebug() << url << "responded with error:" << reply.errorString();
         }
 
     });
diff --git a/aimodel/plugins/asr/qasraimodel_p.cpp b/aimodel/plugins/asr/qasraimodel_p.cpp
index a5f835c..d2d5a24 100644
--- a/aimodel/plugins/asr/qasraimodel_p.cpp
+++ b/aimodel/plugins/asr/qasraimodel_p.cpp
@@ -15,7 +15,7 @@ QAsrAiModel::QAsrAiModel()
 {
 }
 
-void QAsrAiModel::pushData(QVariantList data)
+void QAsrAiModel::pushData(QVariantList data, int seed)
 {
     qDebug() << "QAsrAiModel::pushData(): data:" << data;
 
diff --git a/aimodel/plugins/asr/qasraimodel_p.h b/aimodel/plugins/asr/qasraimodel_p.h
index 44a5708..dd53072 100644
--- a/aimodel/plugins/asr/qasraimodel_p.h
+++ b/aimodel/plugins/asr/qasraimodel_p.h
@@ -13,7 +13,7 @@ class QAsrAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QAsrAiModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
 
 private:
     QNetworkAccessManager m_manager;
diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp
index 550fa67..49120f0 100644
--- a/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp
+++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.cpp
@@ -16,7 +16,7 @@ QDiffuserAiModel::QDiffuserAiModel()
 {
 }
 
-void QDiffuserAiModel::pushData(QVariantList data)
+void QDiffuserAiModel::pushData(QVariantList data, int seed)
 {
     qDebug() << "QDiffuserAiModel::pushData(): data:" << data;
 
diff --git a/aimodel/plugins/diffuser/qdiffuseraimodel_p.h b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h
index d0adfc8..4187de4 100644
--- a/aimodel/plugins/diffuser/qdiffuseraimodel_p.h
+++ b/aimodel/plugins/diffuser/qdiffuseraimodel_p.h
@@ -13,7 +13,7 @@ class QDiffuserAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QDiffuserAiModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
 
 private:
     QNetworkAccessManager m_manager;
diff --git a/aimodel/plugins/ollama/qllmaimodel_p.cpp b/aimodel/plugins/ollama/qllmaimodel_p.cpp
index f1a5726..b6caad4 100644
--- a/aimodel/plugins/ollama/qllmaimodel_p.cpp
+++ b/aimodel/plugins/ollama/qllmaimodel_p.cpp
@@ -6,6 +6,7 @@
 #include <QJsonArray>
 #include <QJsonDocument>
 #include <QJsonObject>
+#include <QJsonValue>
 #include <QNetworkReply>
 #include <QNetworkRequest>
 #include <QRestReply>
@@ -35,7 +36,9 @@ static inline void sendRequest(
     QNetworkRequest request(url);
     request.setRawHeader("Content-Type", "application/json");
     restApi->post(request, QJsonDocument(object).toJson(), owner, [=](QRestReply &reply) {
-        if (std::optional<QJsonDocument> json = reply.readJson()) {
+        if (!reply.isHttpStatusSuccess()) {
+            qDebug() << request.url() << "responded with error" << reply.errorString() << " and status:" << reply.httpStatus();
+        } else if (std::optional<QJsonDocument> json = reply.readJson()) {
             lambda(json.value());
         } else {
             qDebug() << "Error. No data received from" << request.url() << reply;
@@ -43,7 +46,7 @@ static inline void sendRequest(
     });
 }
 
-void QLlmAiModel::pushData(QVariantList data)
+void QLlmAiModel::pushData(QVariantList data, int seed)
 {
     QString query = m_owner->prompt();
     QJsonArray images;
@@ -63,13 +66,16 @@ void QLlmAiModel::pushData(QVariantList data)
     }
     qDebug() << this << "[\"prompt\"]: " << query << "[images]" << images.count();
 
-
-
-
     auto promptResponseReceived = [=](auto json) {
         emit dataReceived(json.object()["response"].toString().toUtf8());
     };
 
+    QPair<QString, QJsonValue> options;
+    if (seed != 0) {
+        options = {"options", QJsonObject({{"seed", {seed}}})};
+    }
+
+
     if (m_chromadb.connected()) {
         connect(&m_chromadb, &ChromaDb::embeddingsFound, this, [=](auto embeddings) {
             QString documents;
@@ -83,14 +89,19 @@ void QLlmAiModel::pushData(QVariantList data)
             qDebug() << q;
             sendRequest(&m_restApi,
                         m_ollama_url_base + "generate",
-                        QJsonObject({{"model", m_owner->model()}, {"prompt", q}, {"stream", false}}),
+                        QJsonObject({{"model", m_owner->model()},
+                                     {"prompt", q},
+                                     {"stream", false},
+                                     options}),
                         this,
                         promptResponseReceived);
         }, Qt::SingleShotConnection);
 
         sendRequest(&m_restApi,
                     m_ollama_url_base + "embed",
-                    QJsonObject({{"model", m_owner->model()}, {"input", query}}),
+                    QJsonObject({{"model", m_owner->model()},
+                                 {"input", query},
+                                options}),
                     this,
                     [this](auto json) {
                         m_chromadb.fetchEmbeddings(json.object()["embeddings"].toArray().toVariantList());
@@ -101,7 +112,8 @@ void QLlmAiModel::pushData(QVariantList data)
                     QJsonObject({{"model", m_owner->model()},
                                  {"prompt", query},
                                  {"stream", false},
-                                 {"images", images}}),
+                                 {"images", images},
+                                 options}),
                     this,
                     promptResponseReceived);
     }
diff --git a/aimodel/plugins/ollama/qllmaimodel_p.h b/aimodel/plugins/ollama/qllmaimodel_p.h
index e70180f..fc62d74 100644
--- a/aimodel/plugins/ollama/qllmaimodel_p.h
+++ b/aimodel/plugins/ollama/qllmaimodel_p.h
@@ -14,7 +14,7 @@ class QLlmAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QLlmAiModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
     void setRag(QVariantList data) override;
 
 private:
diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
index 107f800..63d43f3 100644
--- a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
+++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
@@ -17,7 +17,7 @@ QPiperAiModel::QPiperAiModel()
     m_player.setAudioOutput(&m_audioOutput);
 }
 
-void QPiperAiModel::pushData(QVariantList data)
+void QPiperAiModel::pushData(QVariantList data, int seed)
 {
     qDebug() << "QPiperAiModel::pushData(): data:" << data;
 
diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
index 5515abb..a220ad2 100644
--- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h
+++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
@@ -15,7 +15,7 @@ class QPiperAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QPiperAiModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
 
 private:
     QNetworkAccessManager m_manager;
diff --git a/aimodel/plugins/triton/qtritonmodel_p.cpp b/aimodel/plugins/triton/qtritonmodel_p.cpp
index bd620bb..741e58c 100644
--- a/aimodel/plugins/triton/qtritonmodel_p.cpp
+++ b/aimodel/plugins/triton/qtritonmodel_p.cpp
@@ -151,7 +151,7 @@ Preprocess(
 
 
 // KServe (Open Inference Protocol API)
-void QTritonModel::pushData(QVariantList data)
+void QTritonModel::pushData(QVariantList data, int seed)
 {
     // Load the specified image.
     std::ifstream file(data.first().toByteArray().toStdString());
diff --git a/aimodel/plugins/triton/qtritonmodel_p.h b/aimodel/plugins/triton/qtritonmodel_p.h
index 04f0127..ae5c16c 100644
--- a/aimodel/plugins/triton/qtritonmodel_p.h
+++ b/aimodel/plugins/triton/qtritonmodel_p.h
@@ -13,7 +13,7 @@ class QTritonModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QTritonModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
 
 private:
 
diff --git a/aimodel/plugins/yolo/qyoloaimodel.cpp b/aimodel/plugins/yolo/qyoloaimodel.cpp
index 59b7224..b367b98 100644
--- a/aimodel/plugins/yolo/qyoloaimodel.cpp
+++ b/aimodel/plugins/yolo/qyoloaimodel.cpp
@@ -19,7 +19,7 @@ QYoloAiModel::QYoloAiModel()
 
 }
 
-void QYoloAiModel::pushData(QVariantList data)
+void QYoloAiModel::pushData(QVariantList data, int seed)
 {
     QImage image;
     if (data.first().canConvert<QImage>()) {
diff --git a/aimodel/plugins/yolo/qyoloaimodel.h b/aimodel/plugins/yolo/qyoloaimodel.h
index e32c3f8..92f63fe 100644
--- a/aimodel/plugins/yolo/qyoloaimodel.h
+++ b/aimodel/plugins/yolo/qyoloaimodel.h
@@ -13,7 +13,7 @@ class QYoloAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QYoloAiModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
 
 private:
     QNetworkAccessManager m_manager;
diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp
index 1a57681..89b86cc 100644
--- a/aimodel/qaimodel.cpp
+++ b/aimodel/qaimodel.cpp
@@ -12,7 +12,6 @@
 
 QAiModel::QAiModel()
 {
-    qRegisterMetaType<AiModelPrivateInterface::AiModelTypes>();
 }
 
 AiModelPrivateInterface::AiModelTypes QAiModel::type() const
@@ -43,7 +42,6 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType)
     for (auto *plugin : staticPlugins)
         qDebug() << "Static plugin: " << plugin;
 
-
     m_interface.clear();
     for (auto &&pluginDir : QCoreApplication::libraryPaths()) {
         QDir aiModelPluginsDir(pluginDir + "/aimodel");
@@ -165,6 +163,19 @@ void QAiModel::processCombinedData(QVariant data)
     }
 }
 
+int QAiModel::seed() const
+{
+    return m_seed;
+}
+
+void QAiModel::setSeed(int newSeed)
+{
+    if (m_seed == newSeed)
+        return;
+    m_seed = newSeed;
+    emit seedChanged();
+}
+
 void QAiModel::dataReceived(QVariant data)
 {
     qDebug() << interface() << ":" << __func__ << "(): data:" << data.typeName();
@@ -189,30 +200,30 @@ void QAiModel::pushData(QVariant data)
         m_buffer.clear();
         m_processing = true;
         emit processingChanged();
+        qDebug() << data.typeName() << data;
         if (QByteArrayView(data.typeName()) == "QVariantList")
-            interface()->pushData(data.value<QVariantList>());
+            interface()->pushData(data.value<QVariantList>(), m_seed);
         else
-            interface()->pushData({data});
+            interface()->pushData({data}, m_seed);
     }
 }
 
-
-QVariantList QAiModel::rag() const
+QVariantList QAiModel::documents() const
 {
-    return m_rag;
+    return m_documents;
 }
 
-void QAiModel::setRag(const QVariantList &newRag)
+void QAiModel::setDocuments(const QVariantList &newDocuments)
 {
-    if (m_rag == newRag)
+    if (m_documents == newDocuments)
         return;
-    m_rag = newRag;
-    qDebug() << newRag << m_output;
+    m_documents = newDocuments;
+    qDebug() << newDocuments << m_output;
     if (!m_interface.isNull()) {
-        m_interface->setRag(newRag);
+        m_interface->setRag(newDocuments);
     }
 
-    emit ragChanged();
+    emit documentsChanged();
 }
 
 
diff --git a/aimodel/qaimodel.h b/aimodel/qaimodel.h
index 620bbc7..4df08b2 100644
--- a/aimodel/qaimodel.h
+++ b/aimodel/qaimodel.h
@@ -23,7 +23,7 @@ class QAiModel : public QObject
     Q_PROPERTY(
         QString model READ model WRITE setModel NOTIFY modelChanged FINAL)
     Q_PROPERTY(
-        QVariantList rag READ rag WRITE setRag NOTIFY ragChanged FINAL)
+        QVariantList documents READ documents WRITE setDocuments NOTIFY documentsChanged FINAL)
     Q_PROPERTY(
         QVector<QAiModel*> inputs READ inputs WRITE setInputs NOTIFY inputsChanged FINAL)
     Q_PROPERTY(
@@ -32,6 +32,8 @@ class QAiModel : public QObject
         bool buffered READ buffered WRITE setBuffered NOTIFY bufferedChanged FINAL)
     Q_PROPERTY(
         bool optional READ optional WRITE setOptional NOTIFY optionalChanged FINAL)
+    Q_PROPERTY(
+        int seed READ seed WRITE setSeed NOTIFY seedChanged FINAL)
 
 public:
 
@@ -51,8 +53,8 @@ public:
     QVector<QAiModel*> inputs() const;
     void setInputs(QVector<QAiModel*>newInputs);
 
-    QVariantList rag() const;
-    void setRag(const QVariantList &newRag);
+    QVariantList documents() const;
+    void setDocuments(const QVariantList &newDocuments);
 
     Q_INVOKABLE void pushData(QVariant data);
     Q_INVOKABLE void clearBuffer();
@@ -66,7 +68,7 @@ signals:
 
     void inputsChanged();
 
-    void ragChanged();
+    void documentsChanged();
 
     void processingChanged();
 
@@ -76,6 +78,8 @@ signals:
 
     void optionalChanged();
 
+    void seedChanged();
+
 private Q_SLOTS:
     void dataReceived(QVariant data);
 
@@ -88,6 +92,9 @@ public:
     bool optional() const;
     void setOptional(bool newOptional);
 
+    int seed() const;
+    void setSeed(int newSeed);
+
 private:
     void processCombinedData(QVariant data);
 
@@ -98,9 +105,10 @@ private:
     QString m_inputModel{};
     QAiModel* m_output{nullptr};
     QSharedPointer<AiModelPrivateInterface> m_interface;
-    QVariantList m_rag;
+    QVariantList m_documents;
     AiModelPrivateInterface::AiModelTypes m_type;
     QVariant m_buffer;  // TODO: replace with QVariant
+    int m_seed{0};
     bool m_processing {false};
     bool m_buffered {false};
     bool m_optional {false};
diff --git a/aimodel/qaimodelinterface_p.h b/aimodel/qaimodelinterface_p.h
index 177cc11..d8144bb 100644
--- a/aimodel/qaimodelinterface_p.h
+++ b/aimodel/qaimodelinterface_p.h
@@ -33,7 +33,7 @@ public:
     void init(QAiModel *owner) { m_owner = owner;}
     virtual ~AiModelPrivateInterface() {}
 
-    virtual void pushData(QVariantList data) = 0;
+    virtual void pushData(QVariantList data, int seed) = 0;
     virtual void setRag(QVariantList data) {}
 
 
diff --git a/tests/app/Screen01.ui.qml b/tests/app/Screen01.ui.qml
index a812ab4..511cdde 100644
--- a/tests/app/Screen01.ui.qml
+++ b/tests/app/Screen01.ui.qml
@@ -154,7 +154,7 @@ Rectangle {
 
                     implicitWidth: 300
                     implicitHeight: 100
-                    onEditingFinished: llamaModel.rag = [text]
+                    onEditingFinished: llamaModel.documents = [text]
                 }
                 MultiModal {
                     id: llamaModel
-- 
GitLab


From 6ddbe96e465bcb49ac8a40ce844e3ce62af9eb37 Mon Sep 17 00:00:00 2001
From: Alex Bu <alex.bu@qt.io>
Date: Tue, 15 Apr 2025 14:59:16 +0300
Subject: [PATCH 06/14] piper tts: use raw data rather than saving to temp wav
 file

add MIT license to piper server.
add qt tts model back to readme.
---
 README.md                                     |  1 +
 aimodel/plugins/piper-tts/qpiperaimodel_p.cpp | 31 ++++++++-------
 aimodel/plugins/piper-tts/qpiperaimodel_p.h   | 16 ++++----
 aimodel/tts_server/piper_server.py            | 38 +++++++++++++------
 4 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 8820be2..a54fd99 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ When you declare a model in your code, Qt will infer from the given input and ou
 | Speech     | Text        | QtAsrModel    | Uses Whisper for Automatic Speech Recognition (ASR), or speech-to-text        |
 | Image      | Json        | QtTritonModel | Uses Triton to load a model for object detection from images                  |
 | Image      | Json        | QtYoloModel   | Uses a YOLO model for object detection from images                            |
+| Text       | Speech      | QtTtsModel    | Uses QtTextToSpeech (QtSpeech) to convert text into speech                    |
 | Text       | Speech      | QtPiperModel  | Uses Piper TTS model to convert text into speech                              |
 
 Note, the Qt backends expect the underlying backend implementation (ollama, Whisper...) to be running, and will not take care of starting them up for you. You need to start them yourself, e.g. in the case of QtOllamaModel, loading the intended model to ollama's memory by running:
diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
index 63d43f3..b773803 100644
--- a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
+++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
@@ -1,23 +1,25 @@
-// Copyright (C) 2025 The Qt Company Ltd.
-// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only
-
 #include "qpiperaimodel_p.h"
 #include "qaimodel.h"
 #include <QJsonDocument>
 #include <QJsonObject>
 #include <QNetworkReply>
 #include <QRestReply>
-#include <QFile>
+#include <QMediaDevices>
 
 QPiperAiModel::QPiperAiModel()
     : AiModelPrivateInterface()
     , m_manager(this)
     , m_restApi(&m_manager)
 {
-    m_player.setAudioOutput(&m_audioOutput);
+    m_audioFormat.setSampleRate(22050);
+    m_audioFormat.setChannelCount(1);
+    m_audioFormat.setSampleFormat(QAudioFormat::Int16);
+
+    m_audioSink.reset(new QAudioSink(QMediaDevices::defaultAudioOutput(),m_audioFormat));
+    m_audioBuffer.reset(new QBuffer);
 }
 
-void QPiperAiModel::pushData(QVariantList data, int seed)
+void QPiperAiModel::pushData(QVariantList data)
 {
     qDebug() << "QPiperAiModel::pushData(): data:" << data;
 
@@ -34,17 +36,14 @@ void QPiperAiModel::pushData(QVariantList data, int seed)
     doc.setObject(obj);
     m_restApi.post(request, doc.toJson(), this, [this](QRestReply &reply) {
         if (auto json = reply.readJson()) {
-            //qDebug() << "[\"response\"]=" << json->object()["response"].toString();
-            //emit dataReceived(json->object()["response"].toString().toUtf8());
-            QFile file("test.wav");
-            file.open(QIODevice::WriteOnly);
-            file.write(QByteArray::fromBase64(json->object()["response"].toString().toUtf8()));
-            file.close();
+            m_audioSink->reset();
+            m_audioBuffer->close();
+            m_audioBuffer->setData(QByteArray::fromBase64(json->object()["response"].toString().toUtf8()));
+            m_audioBuffer->open(QIODevice::ReadOnly);
 
-            m_player.stop();
-            m_player.setSource(QUrl::fromLocalFile("test.wav"));
-            m_player.play();
+            // Start playback
+            m_audioSink->start(m_audioBuffer.data());
         }
     });
-
+    emit dataReceived(data.first().toByteArray());
 }
diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
index a220ad2..8431333 100644
--- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h
+++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
@@ -1,13 +1,12 @@
-// Copyright (C) 2025 The Qt Company Ltd.
-// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only
-
 #ifndef QTEXT2SPEEHCAIMODEL_P_H
 #define QTEXT2SPEEHCAIMODEL_P_H
 
 #include <QObject>
 #include <QRestAccessManager>
-#include <QMediaPlayer>
-#include <QAudioOutput>
+#include <QAudioSink>
+#include <QAudioFormat>
+#include <QScopedPointer>
+#include <QBuffer>
 #include "qaimodelinterface_p.h"
 
 class QPiperAiModel : public AiModelPrivateInterface
@@ -15,13 +14,14 @@ class QPiperAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QPiperAiModel();
-    void pushData(QVariantList data, int seed) override;
+    void pushData(QVariantList data) override;
 
 private:
     QNetworkAccessManager m_manager;
     QRestAccessManager m_restApi;
-    QMediaPlayer m_player;
-    QAudioOutput m_audioOutput;
+    QScopedPointer<QBuffer> m_audioBuffer;
+    QScopedPointer<QAudioSink> m_audioSink;
+    QAudioFormat m_audioFormat;
 };
 
 
diff --git a/aimodel/tts_server/piper_server.py b/aimodel/tts_server/piper_server.py
index 97328a6..53d9597 100644
--- a/aimodel/tts_server/piper_server.py
+++ b/aimodel/tts_server/piper_server.py
@@ -1,7 +1,25 @@
 #!/usr/bin/env python3
-
-# Copyright (C) 2025 The Qt Company Ltd.
-# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only
+#
+# MIT License
+# 
+# Copyright (c) 2022 Michael Hansen
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy of 
+# this software and associated documentation files (the "Software"), to deal in the
+# Software without restriction, including without limitation the rights to use, 
+# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
+# Software, and to permit persons to whom the Software is furnished to do so, 
+# subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all 
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
+# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 import argparse
 import io
@@ -113,15 +131,11 @@ def main() -> None:
 
         _LOGGER.debug("get text: %s", text["text"])
         json_response = {}
-        with io.BytesIO() as wav_io:
-            with wave.open(wav_io, "wb") as wav_file:
-                voice.synthesize(text["text"], wav_file, **synthesize_args)
-
-            json_response["response"] = base64.b64encode(wav_io.getvalue()).decode("utf-8")
-        #audio_str = "";
-        #for audio_bytes in voice.synthesize_stream_raw(text["text"], **synthesize_args):
-        #    audio_str += (base64.b64encode(audio_bytes).decode("utf-8"))
-        #json_response["response"] = audio_str
+        audio_str = b'';
+        for audio_bytes in voice.synthesize_stream_raw(text["text"], **synthesize_args):
+            _LOGGER.debug( len(audio_bytes) )
+            audio_str = audio_str + audio_bytes
+        json_response["response"] = base64.b64encode(audio_str).decode("utf-8")
         return jsonify(json_response)
 
     app.run(host=args.host, port=args.port)
-- 
GitLab


From 673a474c5004a638c4d3882e08558e02aaf146bf Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Tue, 15 Apr 2025 17:57:27 +0300
Subject: [PATCH 07/14] Fix application startup on windows

---
 aimodel/CMakeLists.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/aimodel/CMakeLists.txt b/aimodel/CMakeLists.txt
index 605edc9..be119e7 100644
--- a/aimodel/CMakeLists.txt
+++ b/aimodel/CMakeLists.txt
@@ -19,7 +19,7 @@ add_subdirectory(plugins)
 find_package(Qt6 6.8 REQUIRED COMPONENTS Core Qml Quick Network)
 qt_standard_project_setup(REQUIRES 6.8)
 
-qt_add_library(QtAiModelPluginInterface
+qt_add_library(QtAiModelPluginInterface SHARED
     qaimodelinterface_p.h
     chromadb.h chromadb.cpp
 )
@@ -29,15 +29,13 @@ target_link_libraries(QtAiModelPluginInterface
         Qt6::Network
 )
 
-qt_add_qml_module(QtAiModelApi
+qt_add_qml_module(QtAiModelApi STATIC
     URI qtaimodel
     VERSION 1.0
-    SHARED
     SOURCES
         qaimodel.h qaimodel.cpp
     )
 
-qt_import_qml_plugins(QtAiModelApi)
 
 target_link_libraries(QtAiModelApi
     PRIVATE
-- 
GitLab


From f7c6bd40c1c9baf78838db5b511ff39221d4b2fe Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Tue, 15 Apr 2025 17:59:03 +0300
Subject: [PATCH 08/14] Fix piper plugin compilation

---
 aimodel/plugins/piper-tts/qpiperaimodel_p.cpp | 3 ++-
 aimodel/plugins/piper-tts/qpiperaimodel_p.h   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
index b773803..1970554 100644
--- a/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
+++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.cpp
@@ -19,7 +19,8 @@ QPiperAiModel::QPiperAiModel()
     m_audioBuffer.reset(new QBuffer);
 }
 
-void QPiperAiModel::pushData(QVariantList data)
+void QPiperAiModel::pushData(
+    QVariantList data, int seed)
 {
     qDebug() << "QPiperAiModel::pushData(): data:" << data;
 
diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
index 8431333..c1b266f 100644
--- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h
+++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
@@ -14,7 +14,7 @@ class QPiperAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QPiperAiModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
 
 private:
     QNetworkAccessManager m_manager;
-- 
GitLab


From c527a3ae478124e1ae8b9b26a4ef37ef3dd80e0c Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Wed, 16 Apr 2025 12:02:07 +0300
Subject: [PATCH 09/14] Fix buggy debug print

---
 aimodel/plugins/piper-tts/qpiperaimodel_p.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/aimodel/plugins/piper-tts/qpiperaimodel_p.h b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
index c1b266f..bd60c6a 100644
--- a/aimodel/plugins/piper-tts/qpiperaimodel_p.h
+++ b/aimodel/plugins/piper-tts/qpiperaimodel_p.h
@@ -24,8 +24,7 @@ private:
     QAudioFormat m_audioFormat;
 };
 
-
-class QLlmAiModelPlugin : public QAiModelPluginFactory
+class QPiperTtsAiModelPlugin : public QAiModelPluginFactory
 {
     Q_OBJECT
     Q_PLUGIN_METADATA(IID "org.qt-project.Qt.QAiModelPluginFactory/1.0" FILE "plugin.json")
-- 
GitLab


From c45182695ce16e9740f063504a91ca166cec8bcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tinja=20Paavosepp=C3=A4?= <tinja.paavoseppa@qt.io>
Date: Thu, 24 Apr 2025 08:33:41 +0300
Subject: [PATCH 10/14] Add the Qt export macro to the C++ library

---
 aimodel/CMakeLists.txt        | 5 ++++-
 aimodel/chromadb.h            | 3 ++-
 aimodel/qaimodelinterface_p.h | 5 +++--
 aimodel/qtaiapiexports_p.h    | 7 +++++++
 4 files changed, 16 insertions(+), 4 deletions(-)
 create mode 100644 aimodel/qtaiapiexports_p.h

diff --git a/aimodel/CMakeLists.txt b/aimodel/CMakeLists.txt
index be119e7..5733bb4 100644
--- a/aimodel/CMakeLists.txt
+++ b/aimodel/CMakeLists.txt
@@ -20,9 +20,12 @@ find_package(Qt6 6.8 REQUIRED COMPONENTS Core Qml Quick Network)
 qt_standard_project_setup(REQUIRES 6.8)
 
 qt_add_library(QtAiModelPluginInterface SHARED
-    qaimodelinterface_p.h
+    qaimodelinterface_p.h qtaiapiexports_p.h
     chromadb.h chromadb.cpp
 )
+
+target_compile_definitions(QtAiModelPluginInterface PRIVATE QTAIAPI_LIBRARY)
+
 target_link_libraries(QtAiModelPluginInterface
     PRIVATE
         Qt6::Core
diff --git a/aimodel/chromadb.h b/aimodel/chromadb.h
index 11b2906..efee875 100644
--- a/aimodel/chromadb.h
+++ b/aimodel/chromadb.h
@@ -6,8 +6,9 @@
 
 #include <QObject>
 #include <QRestAccessManager>
+#include "qtaiapiexports_p.h"
 
-class ChromaDb : public QObject
+class QTAIAPI_EXPORT ChromaDb : public QObject
 {
     Q_OBJECT
     Q_PROPERTY(bool connected READ connected WRITE connect NOTIFY connectedChanged FINAL)
diff --git a/aimodel/qaimodelinterface_p.h b/aimodel/qaimodelinterface_p.h
index d8144bb..4d7742e 100644
--- a/aimodel/qaimodelinterface_p.h
+++ b/aimodel/qaimodelinterface_p.h
@@ -6,10 +6,11 @@
 
 #include <QObject>
 #include <QVariant>
+#include "qtaiapiexports_p.h"
 
 class QAiModel;
 
-class AiModelPrivateInterface : public QObject
+class QTAIAPI_EXPORT AiModelPrivateInterface : public QObject
 {
     Q_OBJECT
 public:
@@ -47,7 +48,7 @@ public:
 };
 Q_DECLARE_OPERATORS_FOR_FLAGS(AiModelPrivateInterface::AiModelTypes)
 
-class QAiModelPluginFactory : public QObject
+class QTAIAPI_EXPORT QAiModelPluginFactory : public QObject
 {
     Q_OBJECT
 public:
diff --git a/aimodel/qtaiapiexports_p.h b/aimodel/qtaiapiexports_p.h
new file mode 100644
index 0000000..7b94d78
--- /dev/null
+++ b/aimodel/qtaiapiexports_p.h
@@ -0,0 +1,7 @@
+#include <QtCore/QtGlobal>
+
+#if defined(QTAIAPI_LIBRARY)
+#  define QTAIAPI_EXPORT Q_DECL_EXPORT
+#else
+#  define QTAIAPI_EXPORT Q_DECL_IMPORT
+#endif
-- 
GitLab


From 2aff06320d00df058b2fd3205bbe94110ba91eb2 Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Fri, 2 May 2025 06:56:08 +0000
Subject: [PATCH 11/14] Documentation up-to-date

---
 README.md | 58 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index a54fd99..a3bbef8 100644
--- a/README.md
+++ b/README.md
@@ -9,14 +9,15 @@ This API is in proof-of-concept stage and under active development, and not yet
 
 When you declare a model in your code, Qt will infer from the given input and output type what backend it will set up for the model. The backends are implemented as QPlugins. Currently, the backends are:
 
-| Input type | Output type | Qt backend    | Description                                                                   |
-|------------|-------------|---------------|-------------------------------------------------------------------------------|
-| Text\|Image| Text        | QtOllamaModel | Uses ollama to load LLM models and communicate to them with ollama's REST API |
-| Speech     | Text        | QtAsrModel    | Uses Whisper for Automatic Speech Recognition (ASR), or speech-to-text        |
-| Image      | Json        | QtTritonModel | Uses Triton to load a model for object detection from images                  |
-| Image      | Json        | QtYoloModel   | Uses a YOLO model for object detection from images                            |
-| Text       | Speech      | QtTtsModel    | Uses QtTextToSpeech (QtSpeech) to convert text into speech                    |
-| Text       | Speech      | QtPiperModel  | Uses Piper TTS model to convert text into speech                              |
+| Input type | Output type | Qt backend      | Description                                                                   |
+|------------|-------------|-----------------|-------------------------------------------------------------------------------|
+| Text\|Image| Text        | QtOllamaModel   | Uses ollama to load LLM models and communicate to them with ollama's REST API |
+| Speech     | Text        | QtAsrModel      | Uses Whisper for Automatic Speech Recognition (ASR), or speech-to-text        |
+| Image      | Json        | QtTritonModel   | Uses Triton to load a model for object detection from images                  |
+| Image      | Json        | QtYoloModel     | Uses a YOLO model for object detection from images                            |
+| Text       | Speech      | QtTtsModel      | Uses QtTextToSpeech (QtSpeech) to convert text into speech                    |
+| Text       | Speech      | QtPiperModel    | Uses Piper TTS model to convert text into speech                              |
+| Text       | Image       | QtDiffuserModel | Uses Diffusers to convert text into images                                    |
 
 Note, the Qt backends expect the underlying backend implementation (ollama, Whisper...) to be running, and will not take care of starting them up for you. You need to start them yourself, e.g. in the case of QtOllamaModel, loading the intended model to ollama's memory by running:
 ```
@@ -55,10 +56,10 @@ A combination of AiModelType flags to tell what type of model to instantiate. Po
 | InputImage  | 0x00008 | The model takes image as input                                           |
 | InputJson   | 0x00010 | The model takes JSON as input                                            |
 | OutputText  | 0x00100 | The model outputs text                                                   |
-| OutputAudio | 0x00200 |The model outputs speech                                                  |
-| OutputVideo | 0x00400 |The model outputs video                                                   |
-| OutputImage | 0x00800 |The model outputs image                                                   |
-| OutputJson  | 0x01000 |The model outputs JSON                                                    |
+| OutputAudio | 0x00200 | The model outputs speech                                                 |
+| OutputVideo | 0x00400 | The model outputs video                                                  |
+| OutputImage | 0x00800 | The model outputs image                                                  |
+| OutputJson  | 0x01000 | The model outputs JSON                                                   |
 
 For supported input-output combinations, see the table under "How it works" section.
 
@@ -125,7 +126,7 @@ MultiModal {
 | Read method:     | QString model()                                     |
 | Notifier signal: | void modelChanged()                                 |
 
-**QVariantList rag**
+**QVariantList documents**
 
 Retrieval-Augmented Generation data to use for the model, if it supports it. RAG supports currently only chromadb, which should be running on background.
 
@@ -138,7 +139,7 @@ import qtaimodel
         type: (MultiModal.InputText | MultiModal.OutputText)
         model: "llama3.2"
         prompt: "Which item has best armor bonus?"
-        rag: ["Cloth of Authority | Armour Class +1",
+        documents: ["Cloth of Authority | Armour Class +1",
               "Drunken Cloth |  Constitution +2 (up to 20)",
               "Icebite Robe | Resistance to Damage Types: Cold damage.",
               "Obsidian Laced Robe | Grants Resistance to Damage Types: Fire damage.",
@@ -149,9 +150,32 @@ import qtaimodel
 
 |                  |                                                     |
 |------------------|-----------------------------------------------------|
-| Write method:    | void setRag(QByteArray)                             |
-| Read method:     | QByteArray rag()                                    |
-| Notifier signal: | void ragChanged()                                   |
+| Write method:    | void setDocuments(QByteArray)                       |
+| Read method:     | QByteArray documents()                              |
+| Notifier signal: | void documentsChanged()                             |
+
+**int seed**
+
+Seed to use with model prompts. Seed reduces randomness in model answers.
+
+Example:
+```
+import qtaimodel
+
+    MultiModal {
+        id: llamaModel
+        type: (MultiModal.InputText | MultiModal.OutputText)
+        model: "gemma3"
+        prompt: "Say hello?"
+        seed: 3453654
+    }
+```
+
+|                  |                                                     |
+|------------------|-----------------------------------------------------|
+| Write method:    | void setDocuments(QByteArray)                       |
+| Read method:     | QByteArray documents()                              |
+| Notifier signal: | void documentsChanged()                             |
 
 **QVector<QAiModel*> inputs**
 
-- 
GitLab


From 17aa780935ce82bccec2e22f35c1684e89d5ed19 Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Fri, 2 May 2025 06:58:58 +0000
Subject: [PATCH 12/14] Fixes to documentation

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a3bbef8..380ce8e 100644
--- a/README.md
+++ b/README.md
@@ -150,8 +150,8 @@ import qtaimodel
 
 |                  |                                                     |
 |------------------|-----------------------------------------------------|
-| Write method:    | void setDocuments(QByteArray)                       |
-| Read method:     | QByteArray documents()                              |
+| Write method:    | void setDocuments(QVariantList)                     |
+| Read method:     | QVariantList documents()                            |
 | Notifier signal: | void documentsChanged()                             |
 
 **int seed**
@@ -177,7 +177,7 @@ import qtaimodel
 | Read method:     | QByteArray documents()                              |
 | Notifier signal: | void documentsChanged()                             |
 
-**QVector<QAiModel*> inputs**
+**QVector<QAiModel\*> inputs**
 
 A list of models this model will use as its inputs. This allows for chaining models together to create pipelines. You can use the Optional flag with the model's type to tell whether it's an optional or mandatory input. For mandatory inputs, this model will not process any other inputs before the mandatory one has something to offer. For optional ones, other inputs will be processed regardless if that input has data available or not.
 
-- 
GitLab


From 08ef1bf34fbed237fe771d867afa7ce2d2dfd154 Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Fri, 2 May 2025 12:56:27 +0000
Subject: [PATCH 13/14] Remove unneeded file

---
 tests/app/CMakeLists copy.txt | 48 -----------------------------------
 1 file changed, 48 deletions(-)
 delete mode 100644 tests/app/CMakeLists copy.txt

diff --git a/tests/app/CMakeLists copy.txt b/tests/app/CMakeLists copy.txt
deleted file mode 100644
index a6ba292..0000000
--- a/tests/app/CMakeLists copy.txt	
+++ /dev/null
@@ -1,48 +0,0 @@
-
-cmake_minimum_required(VERSION 3.21.1)
-
-add_subdirectory(aimodel)
-
-project(QtAiInferenceApi LANGUAGES CXX)
-
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
-
-set(CMAKE_AUTOMOC ON)
-set(CMAKE_INCLUDE_CURRENT_DIR ON)
-set(QT_QML_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/qml)
-set(QML_IMPORT_PATH ${QT_QML_OUTPUT_DIRECTORY}
-    CACHE STRING "Import paths for Qt Creator's code model"
-    FORCE
-)
-
-find_package(Qt6 6.8 REQUIRED COMPONENTS Core Gui Qml Quick Multimedia)
-qt_standard_project_setup(REQUIRES 6.8)
-
-qt_add_executable(${CMAKE_PROJECT_NAME}
-    main.cpp
-)
-
-qt_add_qml_module(${CMAKE_PROJECT_NAME}
-    URI qtaiinferenceapi
-    VERSION 1.0
-    RESOURCES
-        qtquickcontrols2.conf
-    QML_FILES
-        App.qml
-        Screen01.ui.qml
-    )
-
-target_link_libraries(${CMAKE_PROJECT_NAME}
-    PRIVATE
-        Qt6::Quick
-        Qt6::Multimedia
-        QtAiModelApi
-)
-
-
-include(GNUInstallDirs)
-install(TARGETS ${CMAKE_PROJECT_NAME}
-  BUNDLE DESTINATION .
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-)
-- 
GitLab


From 3e61cfaeb6b708e2e9b1c309736f9cb114f391c5 Mon Sep 17 00:00:00 2001
From: Jari Helaakoski <jari.helaakoski@qt.io>
Date: Sat, 3 May 2025 23:02:46 +0300
Subject: [PATCH 14/14] Fixes for windows

---
 aimodel/plugins/asr/qasraimodel_p.cpp         | 5 +++--
 aimodel/plugins/ollama/qllmaimodel_p.cpp      | 2 +-
 aimodel/plugins/piper-tts/plugin.json         | 2 +-
 aimodel/plugins/tts/qtext2speechaimodel_p.cpp | 2 +-
 aimodel/plugins/tts/qtext2speechaimodel_p.h   | 2 +-
 aimodel/qaimodel.cpp                          | 2 +-
 6 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/aimodel/plugins/asr/qasraimodel_p.cpp b/aimodel/plugins/asr/qasraimodel_p.cpp
index d2d5a24..ae1a331 100644
--- a/aimodel/plugins/asr/qasraimodel_p.cpp
+++ b/aimodel/plugins/asr/qasraimodel_p.cpp
@@ -3,6 +3,7 @@
 
 #include "qaimodel.h"
 #include "qasraimodel_p.h"
+#include <QDir>
 #include <QJsonDocument>
 #include <QJsonObject>
 #include <QNetworkReply>
@@ -20,7 +21,7 @@ void QAsrAiModel::pushData(QVariantList data, int seed)
     qDebug() << "QAsrAiModel::pushData(): data:" << data;
 
     if (data.isEmpty() || data.first().toUrl().isEmpty()) {
-        emit dataReceived(data.first().toUrl());
+        emit dataReceived(data.first().toUrl().toLocalFile());
         return;
     }
 
@@ -29,7 +30,7 @@ void QAsrAiModel::pushData(QVariantList data, int seed)
     QJsonDocument doc;
     QJsonObject obj = doc.object();
     obj["model"] = m_owner->model();
-    obj["file"] = data.first().toUrl().path();
+    obj["file"] = data.first().toUrl().toLocalFile();
     //obj["stream"] = false;
     doc.setObject(obj);
     m_restApi.post(request, doc.toJson(), this, [this](QRestReply &reply) {
diff --git a/aimodel/plugins/ollama/qllmaimodel_p.cpp b/aimodel/plugins/ollama/qllmaimodel_p.cpp
index b6caad4..50eb4a2 100644
--- a/aimodel/plugins/ollama/qllmaimodel_p.cpp
+++ b/aimodel/plugins/ollama/qllmaimodel_p.cpp
@@ -55,7 +55,7 @@ void QLlmAiModel::pushData(QVariantList data, int seed)
         query.append(QString::fromLatin1(i.toByteArray()));
 
         if (i.canConvert<QUrl>()) {
-            QFile file(QUrl(i.toUrl()).path());
+            QFile file(QUrl(i.toUrl()).toLocalFile());
             if (file.open(QIODevice::ReadOnly) != 0) {
                 QByteArray ba = file.readAll();
                 QByteArray ba2 = ba.toBase64();
diff --git a/aimodel/plugins/piper-tts/plugin.json b/aimodel/plugins/piper-tts/plugin.json
index bb84b6f..a949a0e 100644
--- a/aimodel/plugins/piper-tts/plugin.json
+++ b/aimodel/plugins/piper-tts/plugin.json
@@ -1,3 +1,3 @@
-{ "name": "ttsplugin",
+{ "name": "ttspiperplugin",
   "supportedTypes": ["InputText", "OutputAudio"]
 }
diff --git a/aimodel/plugins/tts/qtext2speechaimodel_p.cpp b/aimodel/plugins/tts/qtext2speechaimodel_p.cpp
index da18908..505e52d 100644
--- a/aimodel/plugins/tts/qtext2speechaimodel_p.cpp
+++ b/aimodel/plugins/tts/qtext2speechaimodel_p.cpp
@@ -18,7 +18,7 @@ QText2SpeechAiModel::QText2SpeechAiModel()
     });
 }
 
-void QText2SpeechAiModel::pushData(QVariantList data)
+void QText2SpeechAiModel::pushData(QVariantList data, int seed)
 {
     m_speech->stop();
 
diff --git a/aimodel/plugins/tts/qtext2speechaimodel_p.h b/aimodel/plugins/tts/qtext2speechaimodel_p.h
index 2666fa8..0e7b438 100644
--- a/aimodel/plugins/tts/qtext2speechaimodel_p.h
+++ b/aimodel/plugins/tts/qtext2speechaimodel_p.h
@@ -15,7 +15,7 @@ class QText2SpeechAiModel : public AiModelPrivateInterface
     Q_OBJECT
 public:
     QText2SpeechAiModel();
-    void pushData(QVariantList data) override;
+    void pushData(QVariantList data, int seed) override;
 
     QSharedPointer<QTextToSpeech> m_speech;
 };
diff --git a/aimodel/qaimodel.cpp b/aimodel/qaimodel.cpp
index 89b86cc..8f81cdb 100644
--- a/aimodel/qaimodel.cpp
+++ b/aimodel/qaimodel.cpp
@@ -70,7 +70,7 @@ void QAiModel::setType(const AiModelPrivateInterface::AiModelTypes &newType)
                     m_interface->init(this);
                     break;
                 } else {
-                    qDebug() << "Could not convert" << instance << "to AiModelPrivateInterface*";
+                    qDebug() << "Could not convert" << instance << "to QAiModelPluginFactory*";
                 }
             }
     }
-- 
GitLab