Skip to content

Commit

Permalink
[TensorRT] [ORT] support Stable Diffusion text2img Pipeline (#432)
Browse files Browse the repository at this point in the history
* update cpp ddimscheduler.h

* add reference github repo

* update to clip output to [batch,77,768],and change the clip model onnx input name

* update ort clip code

* update trt clip code

* ort unet code init

* add exec name , avoid multi define

* add unet test code

* add unet code

* add unet code in models

* update unet code #todo fix nan error

* del useless func

* 更新unet得到正常结果 但是本次只为一次推理 需要更新为time_step的循环

* add vae test code

* add vae model in models.h

* add vae test code

* update unet code #TODO clean code

* update unet.h code #TODO clean code

* add vae implement code #TODO clean code

* add tensorrt vae implement code #TODO clean code

* add tensorrt vae implement code #TODO clean code

* modify unet code to use ddim scheduler cpp version # TODO clean code

* add tensorrt vae define

* 先初始化一个ddim的采样器 也可以使用ort的

* update trt unet in models.h

* trt unet code implement

* target link ddim.so

* update trt unet code

* update trt unet test code

* update trt vae test code

* update trt unet code to fix bug

* update code to fp16

* update code

* Added dynamic library file for ddim scheduler

* Update vae code to remove useless code and functions

* update utils code to add new func, add bin to vector

* Update the test code of VAE

* update trt clip code to clean

* clean trt unet code

* add new func save vector to bin, and random latents generator

* update trt test code #TODO  clean code

* add ort and trt pipeline in model

* ort pipeline implement

* trt pipeline implement

* new inference interface for pipeline

* new inference interface for pipeline

* new inference interface for pipeline

* new inference interface for pipeline

* add pipeline code, include ort and trt

* update test code

* update test code

* add pipeline code

* update the code to clean code

* update the code to clean code

* update onnx path , engine path and save path

* combine all sd test files into one pipeline file

* delete vae unet clip test code

* update pipeline code , opt json file

* delay init model on low video memory

* update code

* update so fliename

* update test code

* update code ,low varm mode

* update code
  • Loading branch information
wangzijian1010 authored Sep 4, 2024
1 parent fae5d3d commit 6256f13
Show file tree
Hide file tree
Showing 30 changed files with 1,454 additions and 121 deletions.
4 changes: 2 additions & 2 deletions cmake/utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ function(add_lite_ai_toolkit_shared_library version soversion)
if (ENABLE_ONNXRUNTIME)
include(cmake/onnxruntime.cmake)
set(LITE_SRCS ${LITE_SRCS} ${ORT_SRCS})
set(LITE_DEPENDENCIES ${LITE_DEPENDENCIES} onnxruntime)
set(LITE_DEPENDENCIES ${LITE_DEPENDENCIES} onnxruntime ddim_scheduler_cpp)
endif ()

if (ENABLE_TENSORRT)
include(cmake/tensorrt.cmake)
set(LITE_SRCS ${LITE_SRCS} ${TRT_SRCS})
set(LITE_DEPENDENCIES ${LITE_DEPENDENCIES} cudart nvinfer nvonnxparser nvinfer_plugin)
set(LITE_DEPENDENCIES ${LITE_DEPENDENCIES} cudart nvinfer nvonnxparser nvinfer_plugin ddim_scheduler_cpp)
endif ()

if (ENABLE_MNN)
Expand Down
2 changes: 1 addition & 1 deletion examples/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,4 @@ add_lite_executable(lite_face_parsing_bisenet cv)
add_lite_executable(lite_face_parsing_bisenet_dyn cv)
add_lite_executable(lite_yolov8face cv)
add_lite_executable(lite_yolov8 cv)
add_lite_executable(lite_clip sd)
add_lite_executable(lite_sd_pipeline sd)
53 changes: 0 additions & 53 deletions examples/lite/sd/test_lite_clip.cpp

This file was deleted.

69 changes: 69 additions & 0 deletions examples/lite/sd/test_lite_sd_pipeline.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//
// Created by wangzijian on 8/31/24.
//
#include "lite/lite.h"

static void test_default()
{
std::string clip_onnx_path = "../../../examples/hub/onnx/sd/clip_model.onnx";
std::string unet_onnx_path = "../../../examples/hub/onnx/sd/unet_model.onnx";
std::string vae_onnx_path = "../../../examples/examples/hub/onnx/sd/vae_model.onnx";

auto *pipeline = new lite::onnxruntime::sd::pipeline::Pipeline(clip_onnx_path, unet_onnx_path,
vae_onnx_path,
1);

std::string prompt = "1girl with red hair,blue eyes,smile, looking at viewer";
std::string negative_prompt = "";
std::string save_path = "../../../examples/logs/output_merge.png";
std::string scheduler_config_path = "../../../lite/ort/sd/scheduler_config.json";

pipeline->inference(prompt,negative_prompt,save_path,scheduler_config_path);

delete pipeline;

}


static void test_trt_pipeline()
{
// 记录时间
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();

std::string clip_engine_path = "../../../examples/hub/trt/clip_text_model_fp16.engine";
std::string unet_engine_path = "../../../examples/hub/trt/unet_fp16.engine";
std::string vae_engine_path = "../../../examples/hub/trt/vae_model_fp16.engine";


auto *pipeline = new lite::trt::sd::pipeline::PipeLine(
clip_engine_path, unet_engine_path, vae_engine_path
);


std::string prompt = "1girl with red hair,blue eyes,smile, looking at viewer";
std::string negative_prompt = "";
std::string save_path = "../../../examples/logs/output_merge_tensorrt.png";
std::string scheduler_config_path = "../../../lite/ort/sd/scheduler_config.json";
pipeline->inference(prompt,negative_prompt,save_path,scheduler_config_path);

// 记录结束时间并且输出
std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_seconds = end_time - start_time;
std::cout << "Elapsed time: " << elapsed_seconds.count() << " seconds" << std::endl;

delete pipeline;

}

static void test_lite()
{
test_trt_pipeline();

// test_default();
}

int main()
{
test_lite();
return 0;
}
Binary file added lite/bin/libddim_scheduler_cpp.so
Binary file not shown.
38 changes: 36 additions & 2 deletions lite/models.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@
#include "lite/ort/cv/face_parsing_bisenet_dyn.h"
#include "lite/ort/cv/yolofacev8.h"
#include "lite/ort/sd/clip.h"

#include "lite/ort/sd/unet.h"
#include "lite/ort/sd/vae.h"
#include "lite/ort/sd/pipeline.h"
#endif


Expand All @@ -130,6 +132,9 @@
#include "lite/trt/cv/trt_yolov6.h"
#include "lite/trt/cv/trt_yolov5_blazeface.h"
#include "lite/trt/sd/trt_clip.h"
#include "lite/trt/sd/trt_vae.h"
#include "lite/trt/sd/trt_unet.h"
#include "lite/trt/sd/trt_pipeline.h"
#endif

// ENABLE_MNN
Expand Down Expand Up @@ -671,10 +676,25 @@ namespace lite
namespace sd
{
typedef ortsd::Clip _ONNXClip;
typedef ortsd::UNet _ONNXUNet;
typedef ortsd::Vae _ONNXVae;
typedef ortsd::Pipeline _ONNXPipeline;
namespace text_encoder
{
typedef _ONNXClip Clip;
}
namespace denoise
{
typedef _ONNXUNet UNet;
}
namespace image_decoder
{
typedef _ONNXVae Vae;
}
namespace pipeline
{
typedef _ONNXPipeline Pipeline;
}
}

}
Expand Down Expand Up @@ -717,12 +737,26 @@ namespace lite{
}
namespace sd
{

typedef trtsd::TRTUNet _TRT_UNet;
typedef trtsd::TRTClip _TRT_Clip;
typedef trtsd::TRTVae _TRT_Vae;
typedef trtsd::TRTPipeline _TRT_Pipeline;
namespace text_encoder
{
typedef _TRT_Clip Clip;
}
namespace image_decoder
{
typedef _TRT_Vae Vae;
}
namespace denoise
{
typedef _TRT_UNet UNet;
}
namespace pipeline
{
typedef _TRT_Pipeline PipeLine;
}
}
}
#endif
Expand Down
26 changes: 0 additions & 26 deletions lite/ort/cv/yolofacev8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,32 +153,6 @@ void YoloFaceV8::generate_box(std::vector<Ort::Value> &ort_outputs,
}


void save_tensor_to_file(const Ort::Value& tensor, const std::string& filename) {
// 获取张量的类型和形状信息
auto type_and_shape_info = tensor.GetTensorTypeAndShapeInfo();
std::vector<int64_t> shape = type_and_shape_info.GetShape();
size_t element_count = type_and_shape_info.GetElementCount();

ONNXTensorElementDataType type = type_and_shape_info.GetElementType();
if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
std::cerr << "Unsupported tensor data type. Only float tensors are supported." << std::endl;
return;
}

const float* pdata = tensor.GetTensorData<float>();

std::ofstream file(filename);
if (!file.is_open()) {
std::cerr << "Could not open file for writing: " << filename << std::endl;
return;
}

for (size_t i = 0; i < element_count; ++i) {
file << pdata[i] << "\n";
}
file.close();
}


void YoloFaceV8::detect(const cv::Mat &mat,std::vector<lite::types::Boxf> &boxes,
float conf_threshold, float iou_threshold) {
Expand Down
18 changes: 8 additions & 10 deletions lite/ort/sd/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ void Clip::inference(std::vector<std::string> input, std::vector<std::vector<flo
std::vector<std::vector<int>> output_encode;

encode_text(input,output_encode);


// flat out output_encode
std::vector<int32_t> flat_output_encode;
for (const auto& vec : output_encode) {
Expand All @@ -56,7 +54,7 @@ void Clip::inference(std::vector<std::string> input, std::vector<std::vector<flo

// make tensor
int batch = output_encode.size();
std::vector<int64_t> input_node_dims1 = {batch, 77};
std::vector<int64_t> input_node_dims1 = {batch, input_axes};

Ort::MemoryInfo allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

Expand All @@ -83,19 +81,19 @@ void Clip::inference(std::vector<std::string> input, std::vector<std::vector<flo

const float *text_feature_ptr = ort_outputs[0].GetTensorMutableData<float>();


// update to [b,77,768]
for (int i = 0 ; i < batch ; ++i)
{
std::vector<float> temp;
for (int j = 0 ; j < 512 ; ++j)
for (int j = 0 ; j < output_tensor_size ; ++j)
{
temp.push_back(text_feature_ptr[ i * 512 + j]);
temp.push_back(text_feature_ptr[ i * output_tensor_size + j]);
}
output.push_back(temp);
temp.clear();
}



}


Expand All @@ -111,9 +109,9 @@ void Clip::encode_text(std::vector<std::string> input_text, std::vector<std::vec
for (int i = 0 ; i < input_text.size(); ++i)
{
auto temp = tokenizer.tokenize(input_text[i], on_new_token_cb);
temp.push_back(49407);
if (temp.size() < 77) {
temp.resize(77, 0);
temp.push_back(end_flag_num);
if (temp.size() < input_axes) {
temp.resize(input_axes, 0);
}
output.push_back(temp);
}
Expand Down
7 changes: 5 additions & 2 deletions lite/ort/sd/clip.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,16 @@ namespace ortsd
const char *log_id = nullptr;
bool context_is_update = false;
const unsigned int num_threads; // initialize at runtime.
const int input_axes = 77;
const int output_tensor_size = 77 * 768;
const int end_flag_num = 49407;

std::vector<const char *> input_node_names = {
"TEXT"
"input_ids"
};

std::vector<const char *> output_node_names = {
"TEXT_EMBEDDING"
"text_embeddings"
};

public:
Expand Down
60 changes: 60 additions & 0 deletions lite/ort/sd/ddimscheduler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//
// Created by TalkUHulk on 2024/4/25.
//

// reference by https://github.com/TalkUHulk/ddim_scheduler_cpp.git

#ifndef DDIM_SCHEDULER_CPP_DDIMSCHEDULER_HPP
#define DDIM_SCHEDULER_CPP_DDIMSCHEDULER_HPP

#include <iostream>
#include <vector>
#include <string>
namespace Scheduler {

#if defined(_MSC_VER)
#if defined(BUILDING_AIENGINE_DLL)
#define DDIM_PUBLIC __declspec(dllexport)
#elif defined(USING_AIENGINE_DLL)
#define DDIM_PUBLIC __declspec(dllimport)
#else
#define DDIM_PUBLIC
#endif
#else
#define DDIM_PUBLIC __attribute__((visibility("default")))
#endif

struct DDIMMeta;
class DDIM_PUBLIC DDIMScheduler {

private:
DDIMMeta* meta_ptr = nullptr;
int num_inference_steps = 0;

public:
explicit DDIMScheduler(const std::string &config);

~DDIMScheduler();

// Sets the discrete timesteps used for the diffusion chain (to be run before inference).
int set_timesteps(int num_inference_steps);

void get_timesteps(std::vector<int> &dst);

float get_init_noise_sigma() const;

int step(std::vector<float> &model_output, const std::vector<int> &model_output_size,
std::vector<float> &sample, const std::vector<int> &sample_size,
std::vector<float> &prev_sample,
int timestep, float eta = 0.0, bool use_clipped_model_output = false);

int add_noise(std::vector<float> &sample, const std::vector<int> &sample_size,
std::vector<float> &noise, const std::vector<int> &noise_size, int timesteps,
std::vector<float> &noisy_samples);
private:
float get_variance(int timestep, int prev_timestep);
};
}


#endif //DDIM_SCHEDULER_CPP_DDIMSCHEDULER_HPP
Loading

0 comments on commit 6256f13

Please sign in to comment.