diff --git a/src/BUILD b/src/BUILD index 80088630ff..2e7a295cc5 100644 --- a/src/BUILD +++ b/src/BUILD @@ -602,10 +602,6 @@ cc_library( deps = select({ "//:not_disable_python": [ "//src/python:libovmspythonmodule", - # Jinja template processing is done in Python - "//src/llm:llmcalculator", - "//src/llm:genai_servables", - "//src/llm:text_processor", ], "//:disable_python": [] }) + select({ @@ -613,7 +609,8 @@ cc_library( "//:not_disable_mediapipe" : [ "//src/llm:openai_completions_api_handler", "//src/embeddings:embeddingscalculator", - "//src/rerank:rerankcalculator",], + "//src/rerank:rerankcalculator", + "//src/llm:llmcalculator",], }) + select({ "//:enable_drogon": ["libdrogon_http_server"], "//conditions:default" : ["libnet_http_server"], @@ -2756,6 +2753,11 @@ cc_test( "test/get_mediapipe_graph_metadata_response_test.cpp", "test/mediapipe_framework_test.cpp", "test/http_openai_handler_test.cpp", + "test/llm/llmnode_test.cpp", + "test/llm/max_model_length_test.cpp", + "test/llm/text_streamer_test.cpp", + "test/llm/visual_language_model/complete_flow_test.cpp", + "test/llm/visual_language_model/initialization_test.cpp", ], "//:disable_mediapipe" : [ "test/disabled_mediapipe_test.cpp", @@ -2765,13 +2767,8 @@ cc_test( # OvmsPyTensor is currently not used in OVMS core and is just a base for the binding. # "test/python/ovms_py_tensor_test.cpp", "test/pythonnode_test.cpp", - # LLM logic uses Python for processing Jinja templates - "test/llm/llmnode_test.cpp", - "test/llm/max_model_length_test.cpp", + # LLM logic uses Python for processing Jinja templates when built with Python enabled "test/llm/llmtemplate_test.cpp", - "test/llm/text_streamer_test.cpp", - "test/llm/visual_language_model/complete_flow_test.cpp", - "test/llm/visual_language_model/initialization_test.cpp", ], "//:disable_python" : [], }), diff --git a/src/llm/BUILD b/src/llm/BUILD index 3f3ce60ed9..030655350e 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -16,9 +16,9 @@ load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library") load("//:common_settings.bzl", - "COMMON_STATIC_LIBS_COPTS", "COMMON_STATIC_LIBS_LINKOPTS", "COMMON_FUZZER_COPTS", "COMMON_FUZZER_LINKOPTS", "COMMON_LOCAL_DEFINES", "PYBIND_DEPS") + "COMMON_STATIC_LIBS_COPTS", "COMMON_STATIC_LIBS_LINKOPTS", "COMMON_FUZZER_COPTS", "COMMON_FUZZER_LINKOPTS", "COMMON_LOCAL_DEFINES", "PYBIND_DEPS", "COPTS_PYTHON") -COPTS_ADJUSTED = COMMON_STATIC_LIBS_COPTS + select({ +COPTS_ADJUSTED = COMMON_STATIC_LIBS_COPTS + COPTS_PYTHON + select({ "//conditions:default": [], "//:fuzzer_build" : COMMON_FUZZER_COPTS, }) @@ -92,13 +92,30 @@ cc_library( cc_library( name = "genai_servables", - hdrs = ["servable.hpp", "servable_initializer.hpp", - "language_model/continuous_batching/servable.hpp", "language_model/continuous_batching/llm_executor.hpp", "language_model/continuous_batching/servable_initializer.hpp", - "visual_language_model/continuous_batching/servable.hpp", "language_model/legacy/servable.hpp", "language_model/legacy/servable_initializer.hpp", "language_model/legacy/legacy_executor.hpp", - "visual_language_model/legacy/servable.hpp", "visual_language_model/legacy/servable_initializer.hpp", "visual_language_model/legacy/legacy_executor.hpp"], - srcs = ["servable.cpp", "servable_initializer.cpp", "language_model/continuous_batching/servable.cpp", "language_model/continuous_batching/servable_initializer.cpp", - "visual_language_model/continuous_batching/servable.cpp", "language_model/legacy/servable.cpp", "language_model/legacy/servable_initializer.cpp", "language_model/legacy/legacy_executor.cpp", - "visual_language_model/legacy/servable.cpp", "visual_language_model/legacy/servable_initializer.cpp", "visual_language_model/legacy/legacy_executor.cpp"], + hdrs = ["servable.hpp", + "servable_initializer.hpp", + "language_model/continuous_batching/servable.hpp", + "language_model/continuous_batching/llm_executor.hpp", + "language_model/continuous_batching/servable_initializer.hpp", + "visual_language_model/continuous_batching/servable.hpp", + "language_model/legacy/servable.hpp", + "language_model/legacy/servable_initializer.hpp", + "language_model/legacy/legacy_executor.hpp", + "visual_language_model/legacy/servable.hpp", + "visual_language_model/legacy/servable_initializer.hpp", + "visual_language_model/legacy/legacy_executor.hpp", + "text_utils.hpp"], + srcs = ["servable.cpp", + "servable_initializer.cpp", + "language_model/continuous_batching/servable.cpp", + "language_model/continuous_batching/servable_initializer.cpp", + "visual_language_model/continuous_batching/servable.cpp", + "language_model/legacy/servable.cpp", + "language_model/legacy/servable_initializer.cpp", + "language_model/legacy/legacy_executor.cpp", + "visual_language_model/legacy/servable.cpp", + "visual_language_model/legacy/servable_initializer.cpp", + "visual_language_model/legacy/legacy_executor.cpp"], deps = [ "//third_party:openvino", "@mediapipe//mediapipe/framework:calculator_framework", @@ -110,14 +127,15 @@ cc_library( "//src:libovmsprofiler", "//src:libovmsfilesystem", "llmcalculator_cc_proto", - "//src/python:utils", - ":text_processor", ":openai_completions_api_handler", "//src:httppayload", "//src:libhttpclientconnection", - ] + PYBIND_DEPS + select({ + ] + select({ "//conditions:default": ["//third_party:genai", ":llm_engine"], "//:not_genai_bin" : [":llm_engine"], + }) + select({ + "//:disable_python": [], + "//:not_disable_python" : [":py_jinja_template_processor"], }), visibility = ["//visibility:public"], local_defines = COMMON_LOCAL_DEFINES, @@ -127,9 +145,9 @@ cc_library( ) cc_library( - name = "text_processor", - hdrs = ["text_processor.hpp"], - srcs = ["text_processor.cpp"], + name = "py_jinja_template_processor", + hdrs = ["py_jinja_template_processor.hpp"], + srcs = ["py_jinja_template_processor.cpp"], deps = ["@mediapipe//mediapipe/framework:calculator_framework", "//third_party:openvino", "//src:libovmslogging", diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc index d1ee639762..ba9260bcab 100644 --- a/src/llm/http_llm_calculator.cc +++ b/src/llm/http_llm_calculator.cc @@ -26,6 +26,7 @@ #pragma warning(pop) #include "../http_payload.hpp" +#include "../logging.hpp" #include "../profiler.hpp" #include "apis/openai_completions.hpp" #include "servable.hpp" diff --git a/src/llm/language_model/continuous_batching/servable.cpp b/src/llm/language_model/continuous_batching/servable.cpp index fa62fb8d5b..3ab4850bb8 100644 --- a/src/llm/language_model/continuous_batching/servable.cpp +++ b/src/llm/language_model/continuous_batching/servable.cpp @@ -33,7 +33,10 @@ #include "../../../http_payload.hpp" #include "../../../mediapipe_internal/mediapipe_utils.hpp" #include "../../apis/openai_completions.hpp" -#include "../../text_processor.hpp" +#include "../../text_utils.hpp" +#if (PYTHON_DISABLE == 0) +#include "../../py_jinja_template_processor.hpp" +#endif #include "llm_executor.hpp" #include "servable.hpp" diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index ff608b7ac9..905bc16b05 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -115,8 +115,9 @@ Status ContinuousBatchingServableInitializer::initializeExperimental(std::shared SPDLOG_ERROR("Error during llm node initialization for models_path: {}", parsedModelsPath); return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED; } - - loadTextProcessor(properties, parsedModelsPath); +#if (PYTHON_DISABLE == 0) + loadTemplateProcessor(properties, parsedModelsPath); +#endif if (nodeOptions.has_max_tokens_limit()) { properties->maxTokensLimit = nodeOptions.max_tokens_limit(); } @@ -133,15 +134,12 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr(servable->getProperties()); - properties->modelsPath = parsedModelsPath; - properties->schedulerConfig.max_num_batched_tokens = nodeOptions.max_num_batched_tokens(); properties->schedulerConfig.cache_size = nodeOptions.cache_size(); properties->schedulerConfig.dynamic_split_fuse = nodeOptions.dynamic_split_fuse(); properties->schedulerConfig.max_num_seqs = nodeOptions.max_num_seqs(); properties->schedulerConfig.enable_prefix_caching = nodeOptions.enable_prefix_caching(); - properties->device = nodeOptions.device(); properties->isSpeculativePipeline = false; @@ -163,7 +161,6 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptrpluginConfig.insert(draftPipeline); properties->isSpeculativePipeline = true; } else if (nodeOptions.has_draft_max_num_batched_tokens() || nodeOptions.has_draft_cache_size() || nodeOptions.has_draft_dynamic_split_fuse() || nodeOptions.has_draft_max_num_seqs() || nodeOptions.has_draft_block_size() || nodeOptions.has_draft_device()) { - // Consider moving draft parameters to separate structure in node options, so it's validated on the proto level SPDLOG_ERROR("Draft model path is not provided, but draft scheduler options are set."); return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED; } @@ -188,14 +185,16 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptrmaxTokensLimit = nodeOptions.max_tokens_limit(); } properties->bestOfLimit = nodeOptions.best_of_limit(); properties->maxModelLength = parseMaxModelLength(parsedModelsPath); - properties->llmExecutorWrapper = std::make_shared(properties->pipeline); + return StatusCode::OK; } diff --git a/src/llm/language_model/legacy/servable.cpp b/src/llm/language_model/legacy/servable.cpp index 7c3a76acaa..b09f00362d 100644 --- a/src/llm/language_model/legacy/servable.cpp +++ b/src/llm/language_model/legacy/servable.cpp @@ -33,7 +33,10 @@ #include "../../../http_payload.hpp" #include "../../../mediapipe_internal/mediapipe_utils.hpp" #include "../../apis/openai_completions.hpp" -#include "../../text_processor.hpp" +#include "../../text_utils.hpp" +#if (PYTHON_DISABLE == 0) +#include "../../py_jinja_template_processor.hpp" +#endif #include "servable.hpp" namespace ovms { diff --git a/src/llm/language_model/legacy/servable_initializer.cpp b/src/llm/language_model/legacy/servable_initializer.cpp index 10b61d9e39..e28f95bd57 100644 --- a/src/llm/language_model/legacy/servable_initializer.cpp +++ b/src/llm/language_model/legacy/servable_initializer.cpp @@ -96,8 +96,9 @@ Status LegacyServableInitializer::initialize(std::shared_ptr& ser SPDLOG_ERROR("Error during llm node initialization for models_path: {}", parsedModelsPath); return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED; } - - loadTextProcessor(properties, parsedModelsPath); +#if (PYTHON_DISABLE == 0) + loadTemplateProcessor(properties, parsedModelsPath); +#endif properties->legacyExecutor = std::make_shared(properties->pipeline); if (nodeOptions.has_max_tokens_limit()) { properties->maxTokensLimit = nodeOptions.max_tokens_limit(); diff --git a/src/llm/text_processor.cpp b/src/llm/py_jinja_template_processor.cpp similarity index 86% rename from src/llm/text_processor.cpp rename to src/llm/py_jinja_template_processor.cpp index ce13fbcdfb..e4e717d074 100644 --- a/src/llm/text_processor.cpp +++ b/src/llm/py_jinja_template_processor.cpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#include "text_processor.hpp" +#include "py_jinja_template_processor.hpp" #include #include @@ -35,16 +35,16 @@ namespace ovms { -bool TextProcessor::applyChatTemplate(TextProcessor& textProcessor, std::string modelsPath, const std::string& requestBody, std::string& output) { - if (textProcessor.chatTemplate == nullptr) { +bool PyJinjaTemplateProcessor::applyChatTemplate(PyJinjaTemplateProcessor& templateProcessor, std::string modelsPath, const std::string& requestBody, std::string& output) { + if (templateProcessor.chatTemplate == nullptr) { output = "Error: Chat template not loaded correctly, so it cannot be applied"; return false; } py::gil_scoped_acquire acquire; try { - auto locals = py::dict("request_body"_a = requestBody, "chat_template"_a = textProcessor.chatTemplate->getObject(), - "bos_token"_a = textProcessor.bosToken, "eos_token"_a = textProcessor.eosToken); + auto locals = py::dict("request_body"_a = requestBody, "chat_template"_a = templateProcessor.chatTemplate->getObject(), + "bos_token"_a = templateProcessor.bosToken, "eos_token"_a = templateProcessor.eosToken); py::exec(R"( output = "" error = "" diff --git a/src/llm/py_jinja_template_processor.hpp b/src/llm/py_jinja_template_processor.hpp new file mode 100644 index 0000000000..4837b69541 --- /dev/null +++ b/src/llm/py_jinja_template_processor.hpp @@ -0,0 +1,41 @@ +//***************************************************************************** +// Copyright 2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include + +#include +#pragma warning(push) +#pragma warning(disable : 6326 28182 6011 28020) +// Python execution for template processing +#include // everything needed for embedding +#include +#pragma warning(pop) + +#include "src/python/utils.hpp" + +namespace ovms { + +class PyJinjaTemplateProcessor { +public: + std::string bosToken = ""; + std::string eosToken = ""; + std::unique_ptr> chatTemplate = nullptr; + + static bool applyChatTemplate(PyJinjaTemplateProcessor& templateProcessor, std::string modelsPath, const std::string& requestBody, std::string& output); +}; +} // namespace ovms diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index 3818b12cfa..821c937d3c 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -32,7 +32,7 @@ #include "../profiler.hpp" #include "apis/openai_completions.hpp" #include "servable.hpp" -#include "text_processor.hpp" +#include "text_utils.hpp" namespace ovms { absl::Status GenAiServable::loadRequest(std::shared_ptr& executionContext, const ovms::HttpPayload& payload) { @@ -87,15 +87,21 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptrendpoint) { case Endpoint::CHAT_COMPLETIONS: { +#if (PYTHON_DISABLE == 0) bool success; if (executionContext->apiHandler->getProcessedJson().size() > 0) { - success = TextProcessor::applyChatTemplate(getProperties()->textProcessor, getProperties()->modelsPath, executionContext->apiHandler->getProcessedJson(), inputText); + success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, executionContext->apiHandler->getProcessedJson(), inputText); } else { - success = TextProcessor::applyChatTemplate(getProperties()->textProcessor, getProperties()->modelsPath, executionContext->payload.body, inputText); + success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, executionContext->payload.body, inputText); } if (!success) { return absl::Status(absl::StatusCode::kInvalidArgument, inputText); } +#else + ov::genai::ChatHistory& chatHistory = executionContext->apiHandler->getChatHistory(); + constexpr bool add_generation_prompt = true; // confirm it should be hardcoded + inputText = getProperties()->tokenizer.apply_chat_template(chatHistory, add_generation_prompt); +#endif if (inputText.size() == 0) { return absl::Status(absl::StatusCode::kInvalidArgument, "Final prompt after applying chat template is empty"); } diff --git a/src/llm/servable.hpp b/src/llm/servable.hpp index da3cd1fdab..03eb66c41c 100644 --- a/src/llm/servable.hpp +++ b/src/llm/servable.hpp @@ -32,7 +32,9 @@ #include "../http_payload.hpp" #include "apis/openai_completions.hpp" -#include "text_processor.hpp" +#if (PYTHON_DISABLE == 0) +#include "py_jinja_template_processor.hpp" +#endif namespace ovms { // Some pipelines internals rely on request_id, so for now we provide increasing ID @@ -81,12 +83,14 @@ struct GenAiServableProperties { ov::AnyMap tokenizerPluginConfig; // Sampling limits std::optional maxTokensLimit; + std::optional maxModelLength; uint32_t bestOfLimit; bool isSpeculativePipeline; // sampling is generally common, but maybe we could avoid having this field at all // Text processing utilities ov::genai::Tokenizer tokenizer; - TextProcessor textProcessor; - std::optional maxModelLength; +#if (PYTHON_DISABLE == 0) + PyJinjaTemplateProcessor templateProcessor; +#endif }; class GenAiServable { diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index 96bb6317ea..2ae9c339d5 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -48,9 +48,10 @@ namespace ovms { +#if (PYTHON_DISABLE == 0) static const std::string CHAT_TEMPLATE_WARNING_MESSAGE = "Warning: Chat template has not been loaded properly. Servable will not respond to /chat/completions endpoint."; -void GenAiServableInitializer::loadTextProcessor(std::shared_ptr properties, const std::string& chatTemplateDirectory) { +void GenAiServableInitializer::loadTemplateProcessor(std::shared_ptr properties, const std::string& chatTemplateDirectory) { py::gil_scoped_acquire acquire; try { auto locals = py::dict("templates_directory"_a = chatTemplateDirectory); @@ -108,9 +109,9 @@ void GenAiServableInitializer::loadTextProcessor(std::shared_ptrtextProcessor.bosToken = locals["bos_token"].cast(); - properties->textProcessor.eosToken = locals["eos_token"].cast(); - properties->textProcessor.chatTemplate = std::make_unique>(locals["template"]); + properties->templateProcessor.bosToken = locals["bos_token"].cast(); + properties->templateProcessor.eosToken = locals["eos_token"].cast(); + properties->templateProcessor.chatTemplate = std::make_unique>(locals["template"]); } catch (const pybind11::error_already_set& e) { SPDLOG_INFO(CHAT_TEMPLATE_WARNING_MESSAGE); SPDLOG_DEBUG("Chat template loading failed with error: {}", e.what()); @@ -125,6 +126,7 @@ void GenAiServableInitializer::loadTextProcessor(std::shared_ptr parseMaxModelLength(std::string& modelsPath) { + std::string configPath = FileSystem::appendSlash(modelsPath) + "config.json"; + std::optional maxModelLength; + if (std::filesystem::exists(configPath.c_str())) { + std::ifstream ifs(configPath); + if (!ifs.is_open()) { + return maxModelLength; + } + rapidjson::Document modelConfig; + rapidjson::IStreamWrapper isw(ifs); + rapidjson::ParseResult parseResult = modelConfig.ParseStream(isw); + if (parseResult.Code()) { + return maxModelLength; + } + std::vector maxLengthFields = {"max_position_embeddings", "n_positions", "seq_len", "seq_length", "n_ctx", "sliding_window"}; + for (auto field : maxLengthFields) { + if (modelConfig.HasMember(field.c_str()) && modelConfig[field.c_str()].IsUint()) { + maxModelLength = modelConfig[field.c_str()].GetUint(); + break; + } + } + } + return maxModelLength; +} + Status determinePipelineType(PipelineType& pipelineType, const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath) { // Assuming that models_path is always set std::string parsedModelsPath; @@ -278,28 +305,4 @@ Status initializeGenAiServable(std::shared_ptr& servable, const : } return StatusCode::OK; } -std::optional parseMaxModelLength(std::string& modelsPath) { - std::string configPath = FileSystem::appendSlash(modelsPath) + "config.json"; - std::optional maxModelLength; - if (std::filesystem::exists(configPath.c_str())) { - std::ifstream ifs(configPath); - if (!ifs.is_open()) { - return maxModelLength; - } - rapidjson::Document modelConfig; - rapidjson::IStreamWrapper isw(ifs); - rapidjson::ParseResult parseResult = modelConfig.ParseStream(isw); - if (parseResult.Code()) { - return maxModelLength; - } - std::vector maxLengthFields = {"max_position_embeddings", "n_positions", "seq_len", "seq_length", "n_ctx", "sliding_window"}; - for (auto field : maxLengthFields) { - if (modelConfig.HasMember(field.c_str()) && modelConfig[field.c_str()].IsUint()) { - maxModelLength = modelConfig[field.c_str()].GetUint(); - break; - } - } - } - return maxModelLength; -} } // namespace ovms diff --git a/src/llm/servable_initializer.hpp b/src/llm/servable_initializer.hpp index ad6803d8c5..a5f46ba6cc 100644 --- a/src/llm/servable_initializer.hpp +++ b/src/llm/servable_initializer.hpp @@ -46,7 +46,7 @@ struct GenAiServableProperties; class GenAiServableInitializer { public: virtual ~GenAiServableInitializer() = default; - static void loadTextProcessor(std::shared_ptr properties, const std::string& chatTemplateDirectory); + static void loadTemplateProcessor(std::shared_ptr properties, const std::string& chatTemplateDirectory); /* initialize method implementation MUST fill servable with all required properties i.e. pipeline, tokenizer, configs etc. based on mediapipe node options. It is strictly connected with the servable, so implementation of this method in a derived class should be aware of the specific servable class structure @@ -55,7 +55,7 @@ class GenAiServableInitializer { virtual Status initialize(std::shared_ptr& servable, const mediapipe::LLMCalculatorOptions& nodeOptions, std::string graphPath) = 0; }; Status parseModelsPath(std::string& outPath, std::string modelsPath, std::string graphPath); +std::optional parseMaxModelLength(std::string& modelsPath); Status determinePipelineType(PipelineType& pipelineType, const mediapipe::LLMCalculatorOptions& nodeOptions, const std::string& graphPath); Status initializeGenAiServable(std::shared_ptr& servable, const ::mediapipe::CalculatorGraphConfig::Node& graphNodeConfig, std::string graphPath); -std::optional parseMaxModelLength(std::string& modelsPath); } // namespace ovms diff --git a/src/llm/text_processor.hpp b/src/llm/text_utils.hpp similarity index 75% rename from src/llm/text_processor.hpp rename to src/llm/text_utils.hpp index 4ceed95d6d..fba00730f5 100644 --- a/src/llm/text_processor.hpp +++ b/src/llm/text_utils.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright 2024 Intel Corporation +// Copyright 2025 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,30 +16,9 @@ #pragma once #include -#include #include #include - #include -#pragma warning(push) -#pragma warning(disable : 6326 28182 6011 28020) -// Python execution for template processing -#include // everything needed for embedding -#include -#pragma warning(pop) - -#include "src/python/utils.hpp" - -namespace ovms { - -class TextProcessor { -public: - std::string bosToken = ""; - std::string eosToken = ""; - std::unique_ptr> chatTemplate = nullptr; - - static bool applyChatTemplate(TextProcessor& textProcessor, std::string modelsPath, const std::string& requestBody, std::string& output); -}; template static std::string packPromptTokens(T* input, size_t size) { @@ -81,4 +60,3 @@ static std::string getPromptTokensString(const ov::Tensor& tensor) { } #pragma GCC diagnostic pop #pragma warning(pop) -} // namespace ovms diff --git a/src/llm/visual_language_model/continuous_batching/servable.cpp b/src/llm/visual_language_model/continuous_batching/servable.cpp index 515e2c2395..e876690a7a 100644 --- a/src/llm/visual_language_model/continuous_batching/servable.cpp +++ b/src/llm/visual_language_model/continuous_batching/servable.cpp @@ -23,6 +23,7 @@ #include #include "../../../logging.hpp" +#include "../../text_utils.hpp" namespace ovms { diff --git a/src/llm/visual_language_model/legacy/servable.cpp b/src/llm/visual_language_model/legacy/servable.cpp index 75b7c42d25..af29e4a1e7 100644 --- a/src/llm/visual_language_model/legacy/servable.cpp +++ b/src/llm/visual_language_model/legacy/servable.cpp @@ -34,7 +34,10 @@ #include "../../../http_payload.hpp" #include "../../../mediapipe_internal/mediapipe_utils.hpp" #include "../../apis/openai_completions.hpp" -#include "../../text_processor.hpp" +#include "../../text_utils.hpp" +#if (PYTHON_DISABLE == 0) +#include "../../py_jinja_template_processor.hpp" +#endif #include "servable.hpp" namespace ovms { diff --git a/src/llm/visual_language_model/legacy/servable_initializer.cpp b/src/llm/visual_language_model/legacy/servable_initializer.cpp index ba2e1df5c3..c10bf1b3af 100644 --- a/src/llm/visual_language_model/legacy/servable_initializer.cpp +++ b/src/llm/visual_language_model/legacy/servable_initializer.cpp @@ -81,8 +81,9 @@ Status VisualLanguageModelLegacyServableInitializer::initialize(std::shared_ptr< SPDLOG_ERROR("Error during llm node initialization for models_path: {}", parsedModelsPath); return StatusCode::LLM_NODE_RESOURCE_STATE_INITIALIZATION_FAILED; } - - loadTextProcessor(properties, parsedModelsPath); +#if (PYTHON_DISABLE == 0) + loadTemplateProcessor(properties, parsedModelsPath); +#endif properties->legacyExecutor = std::make_shared(properties->pipeline); if (nodeOptions.has_max_tokens_limit()) { properties->maxTokensLimit = nodeOptions.max_tokens_limit(); diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp index d3bd6b8bb4..dd08768f3a 100644 --- a/src/mediapipe_internal/mediapipegraphdefinition.cpp +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -33,9 +33,9 @@ #include "../model_metric_reporter.hpp" #include "../modelmanager.hpp" #include "../ov_utils.hpp" -#if (PYTHON_DISABLE == 0) #include "../llm/servable.hpp" #include "../llm/servable_initializer.hpp" +#if (PYTHON_DISABLE == 0) #include "../python/pythonnoderesources.hpp" #endif #include "../status.hpp" @@ -396,7 +396,6 @@ Status MediapipeGraphDefinition::waitForLoaded(std::unique_ptr class ResourcesCleaningGuard { public: @@ -413,7 +412,6 @@ class ResourcesCleaningGuard { shouldCleanup = false; } }; -#endif Status MediapipeGraphDefinition::initializeNodes() { SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes"); @@ -445,6 +443,7 @@ Status MediapipeGraphDefinition::initializeNodes() { this->pythonNodeResourcesMap.insert(std::pair>(nodeName, std::move(nodeResources))); pythonResourcesCleaningGuard.disableCleaning(); } +#endif // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI) if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) { ResourcesCleaningGuard genAiServablesCleaningGuard(this->genAiServableMap); @@ -470,7 +469,6 @@ Status MediapipeGraphDefinition::initializeNodes() { this->genAiServableMap.insert(std::pair>(nodeName, std::move(servable))); genAiServablesCleaningGuard.disableCleaning(); } -#endif } return StatusCode::OK; } diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp index c785adb9a8..bd2fdf5134 100644 --- a/src/mediapipe_internal/mediapipegraphexecutor.hpp +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -134,8 +134,8 @@ class MediapipeGraphExecutor { OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request)); #if (PYTHON_DISABLE == 0) inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->pythonNodeResourcesMap).At(STARTING_TIMESTAMP); - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->llmNodeResourcesMap).At(STARTING_TIMESTAMP); #endif + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->llmNodeResourcesMap).At(STARTING_TIMESTAMP); MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR); ::mediapipe::Packet packet; @@ -278,8 +278,8 @@ class MediapipeGraphExecutor { #if (PYTHON_DISABLE == 0) inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->pythonNodeResourcesMap) .At(STARTING_TIMESTAMP); - inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->llmNodeResourcesMap).At(STARTING_TIMESTAMP); #endif + inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket(this->llmNodeResourcesMap).At(STARTING_TIMESTAMP); } { diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp index da2b967b82..0d5a746d51 100644 --- a/src/test/llm/llmnode_test.cpp +++ b/src/test/llm/llmnode_test.cpp @@ -26,10 +26,12 @@ #include #include #include +#if (PYTHON_DISABLE == 0) #pragma warning(push) #pragma warning(disable : 6326 28182 6011 28020) #include #pragma warning(pop) +#endif #include "../../http_rest_api_handler.hpp" #include "../../http_status_code.hpp" @@ -39,7 +41,7 @@ #include "../../llm/language_model/continuous_batching/servable.hpp" #include "../../llm/servable.hpp" #include "../../llm/servable_initializer.hpp" -#include "../../llm/text_processor.hpp" +#include "../../llm/text_utils.hpp" #include "../../ov_utils.hpp" #include "../../server.hpp" #include "rapidjson/document.h" @@ -3235,9 +3237,11 @@ INSTANTIATE_TEST_SUITE_P( // Common tests for all pipeline types (testing logic executed prior pipeline type selection) class LLMConfigHttpTest : public ::testing::Test { +#if (PYTHON_DISABLE == 0) public: void SetUp() { py::initialize_interpreter(); } void TearDown() { py::finalize_interpreter(); } +#endif }; TEST_F(LLMConfigHttpTest, LLMNodeNameMissing) { @@ -3494,9 +3498,11 @@ TEST_F(LLMConfigHttpTest, LLMNodeWorkspacePathToFileNotDir) { } class LLMConfigHttpTestParameterized : public ::testing::Test, public ::testing::WithParamInterface> { +#if (PYTHON_DISABLE == 0) public: void SetUp() { py::initialize_interpreter(); } void TearDown() { py::finalize_interpreter(); } +#endif }; TEST_P(LLMConfigHttpTestParameterized, LLMNodeResourceInitFailed) { @@ -3560,9 +3566,11 @@ INSTANTIATE_TEST_SUITE_P( // Those tests are working on Continuous Batching path, since most of the node options are scheduler parameters that are not used in non-CB servables // We could consider adding tests for non-CB path in the future in the separate test suite class LLMOptionsHttpTestPython : public ::testing::Test { +#if (PYTHON_DISABLE == 0) public: static void SetUpTestSuite() { py::initialize_interpreter(); } static void TearDownTestSuite() { py::finalize_interpreter(); } +#endif }; class LLMOptionsHttpTest : public LLMOptionsHttpTestPython { diff --git a/src/test/llm/llmtemplate_test.cpp b/src/test/llm/llmtemplate_test.cpp index 86d1bb24bd..752dec7fbc 100644 --- a/src/test/llm/llmtemplate_test.cpp +++ b/src/test/llm/llmtemplate_test.cpp @@ -33,7 +33,7 @@ #include "../../httpservermodule.hpp" #include "../../llm/language_model/continuous_batching/servable.hpp" #include "../../llm/language_model/continuous_batching/servable_initializer.hpp" -#include "../../llm/text_processor.hpp" +#include "../../llm/py_jinja_template_processor.hpp" #include "../../mediapipe_internal/mediapipegraphdefinition.hpp" #include "../../server.hpp" @@ -77,11 +77,11 @@ TEST_F(LLMChatTemplateTest, ChatTemplateEmptyBody) { servable->getProperties()->modelsPath = directoryPath; // default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}" - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = ""; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); std::string errorOutput = "Expecting value: line 1 column 1 (char 0)"; ASSERT_EQ(finalPrompt, errorOutput); } @@ -91,7 +91,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateEmptyMessage) { servable->getProperties()->modelsPath = directoryPath; // default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}" - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -102,7 +102,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateEmptyMessage) { } )"; std::string errorOutput = "This servable accepts only single message requests"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); ASSERT_EQ(finalPrompt, errorOutput); } @@ -111,7 +111,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateMessageWithEmptyObject) { servable->getProperties()->modelsPath = directoryPath; // default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}" - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -121,7 +121,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateMessageWithEmptyObject) { "messages": [{}] } )"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, ""); } @@ -130,7 +130,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateDefault) { servable->getProperties()->modelsPath = directoryPath; // default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}" - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -139,7 +139,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateDefault) { } )"; std::string expectedOutput = "How can I help you?"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -148,7 +148,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateMultiMessage) { servable->getProperties()->modelsPath = directoryPath; // default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}" - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -157,7 +157,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateMultiMessage) { } )"; std::string errorOutput = "This servable accepts only single message requests"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); ASSERT_EQ(finalPrompt, errorOutput); } @@ -166,7 +166,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateComplexMessage) { servable->getProperties()->modelsPath = directoryPath; // default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}" - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -177,7 +177,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateComplexMessage) { } )"; std::string expectedOutput = "hello"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -187,7 +187,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateJinjaUppercase) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -198,7 +198,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateJinjaUppercase) { } )"; std::string expectedOutput = " Hi, HELLO "; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -208,7 +208,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateJinjaException) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -219,7 +219,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateJinjaException) { } )"; std::string errorOutput = "list object has no element 3"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); ASSERT_EQ(finalPrompt, errorOutput); } @@ -232,7 +232,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerDefault) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -243,7 +243,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerDefault) { } )"; std::string expectedOutput = "hello"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -256,7 +256,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerBosNull) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -268,7 +268,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerBosNull) { )"; std::string expectedOutput = "hello"; // Expect no issues with chat template since non string bos token is ignored - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -281,7 +281,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerBosDict) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -293,7 +293,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerBosDict) { )"; std::string expectedError = "Error: Chat template not loaded correctly, so it cannot be applied"; // Expect no issues with chat template since non string bos token is ignored - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); ASSERT_EQ(finalPrompt, expectedError); } @@ -306,7 +306,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerEosNull) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -318,7 +318,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerEosNull) { )"; std::string expectedOutput = "hello"; // Expect no issues with chat template since non string eos token is ignored - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -331,7 +331,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerException) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -342,7 +342,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerException) { } )"; std::string expectedOutput = "Error: Chat template not loaded correctly, so it cannot be applied"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -356,7 +356,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerUpperCase) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -367,7 +367,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerUpperCase) { } )"; std::string expectedOutput = "Hi, HELLO"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -381,7 +381,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerTemplateException) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -392,7 +392,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerTemplateException) { } )"; std::string expectedOutput = "list object has no element 3"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); ASSERT_EQ(finalPrompt, expectedOutput); } @@ -406,7 +406,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerTemplateBadVariable) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -417,7 +417,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTokenizerTemplateBadVariable) { } )"; std::string expectedError = "Error: Chat template not loaded correctly, so it cannot be applied"; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), false); ASSERT_EQ(finalPrompt, expectedError); } @@ -434,7 +434,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTwoConfigs) { std::shared_ptr servable = std::make_shared(); servable->getProperties()->modelsPath = directoryPath; - GenAiServableInitializer::loadTextProcessor(servable->getProperties(), servable->getProperties()->modelsPath); + GenAiServableInitializer::loadTemplateProcessor(servable->getProperties(), servable->getProperties()->modelsPath); std::string finalPrompt = ""; std::string payloadBody = R"( @@ -445,7 +445,7 @@ TEST_F(LLMChatTemplateTest, ChatTemplateTwoConfigs) { } )"; std::string expectedOutput = " Hi, HELLO "; - ASSERT_EQ(TextProcessor::applyChatTemplate(servable->getProperties()->textProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); + ASSERT_EQ(PyJinjaTemplateProcessor::applyChatTemplate(servable->getProperties()->templateProcessor, servable->getProperties()->modelsPath, payloadBody, finalPrompt), true); ASSERT_EQ(finalPrompt, expectedOutput); } diff --git a/src/test/llm/text_streamer_test.cpp b/src/test/llm/text_streamer_test.cpp index 9905db756e..386821f4b3 100644 --- a/src/test/llm/text_streamer_test.cpp +++ b/src/test/llm/text_streamer_test.cpp @@ -40,7 +40,9 @@ class TextStreamerTest : public ::testing::Test { )"; static void SetUpTestSuite() { +#if (PYTHON_DISABLE == 0) py::initialize_interpreter(); +#endif std::string adjustedPbtxt = testPbtxt; adjustConfigForTargetPlatform(adjustedPbtxt); ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(adjustedPbtxt, &config)); @@ -54,7 +56,9 @@ class TextStreamerTest : public ::testing::Test { } static void TearDownTestSuite() { servable.reset(); +#if (PYTHON_DISABLE == 0) py::finalize_interpreter(); +#endif } void assertTokensValues(ov::Tensor generatedTokens, std::vector expectedTokens) { ASSERT_EQ(generatedTokens.get_size(), expectedTokens.size()); diff --git a/src/test/llm/visual_language_model/initialization_test.cpp b/src/test/llm/visual_language_model/initialization_test.cpp index a3c5ca2224..ee31677886 100644 --- a/src/test/llm/visual_language_model/initialization_test.cpp +++ b/src/test/llm/visual_language_model/initialization_test.cpp @@ -34,9 +34,11 @@ Status callDeterminePipelineType(PipelineType& pipelineType, const std::string& // Initialization tests class VLMServableInitializationTest : public ::testing::Test { +#if (PYTHON_DISABLE == 0) public: void SetUp() { py::initialize_interpreter(); } void TearDown() { py::finalize_interpreter(); } +#endif }; TEST_F(VLMServableInitializationTest, determinePipelineTypeDefault) { diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp index 8f67aecb60..bf814ec0bb 100644 --- a/src/test/mediapipeflow_test.cpp +++ b/src/test/mediapipeflow_test.cpp @@ -3667,8 +3667,8 @@ TEST(WhitelistRegistered, MediapipeCalculatorsList) { "CalculatorRunnerSourceCalculator", "PyTensorOvTensorConverterCalculator", // integral OVMS calculator "PythonExecutorCalculator", // integral OVMS calculator - "HttpLLMCalculator", // integral OVMS calculator #endif + "HttpLLMCalculator", // integral OVMS calculator "OpenAIChatCompletionsMockCalculator", // OVMS test calculator "AddHeaderCalculator", "AddNumbersMultiInputsOutputsTestCalculator",