Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix issue#35, update ort to v2 #106

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.cli
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN apt update && \
# Download onnxruntime
RUN mkdir -p /usr/local/lib && \
cd /usr/local/lib && \
wget https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-x64-1.16.1.tgz && \
wget https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-1.17.1.tgz && \
tar xzf ./onnx*.tgz && \
rm -rf ./onnx*.tgz && \
mv ./onnx* ./onnxruntime
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.cli.cuda
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN apt update && \
# Download onnxruntime
RUN mkdir -p /usr/local/lib && \
cd /usr/local/lib && \
wget -q https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-x64-gpu-1.16.1.tgz && \
wget -q https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1.tgz && \
tar xzf ./onnx*.tgz && \
rm -rf ./onnx*.tgz && \
mv ./onnx* ./onnxruntime
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ make install
<summary> Click to expand</summary>

You should have onnxruntime in your system in order to run the extension.
You can download the `onnxruntime` binary realease from GitHub https://github.com/microsoft/onnxruntime/releases/tag/v1.16.1 and place it somewhere in your system (e.g. /usr/lib/onnxruntime)
You can download the `onnxruntime` binary realease from GitHub https://github.com/microsoft/onnxruntime/releases/tag/v1.17.1 and place it somewhere in your system (e.g. /usr/lib/onnxruntime)

Then you should export these 2 environment variables

Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ function setup_environment() {

function setup_onnx() {
pushd /tmp
ONNX_VERSION="1.16.1"
ONNX_VERSION="1.17.1"
PACKAGE_URL="https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-x64-${ONNX_VERSION}.tgz" && \
if [[ $ARCH == *"arm"* ]]; then PACKAGE_URL="https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-linux-aarch64-${ONNX_VERSION}.tgz"; fi && \
mkdir -p /usr/local/lib && \
Expand Down
2 changes: 1 addition & 1 deletion lantern_cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ itertools = "0.11.0"
csv = "1.3.0"
url = "2.2"
num_cpus = "1.15.0"
ort = { version = "1.16.0", features = ["load-dynamic", "cuda", "openvino"] }
ort = { version = "2.0.0-alpha.4", features = ["load-dynamic", "cuda", "openvino"] }
tokenizers = { version = "0.15.2", features = ["default"] }
image = { version = "0.24.9", features = ["jpeg", "png", "webp" ]}
sysinfo = "0.29.11"
Expand Down
77 changes: 40 additions & 37 deletions lantern_cli/src/embeddings/core/ort_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ use image::{imageops::FilterType, io::Reader as ImageReader, GenericImageView};
use isahc::{config::RedirectPolicy, prelude::*, HttpClient};
use itertools::Itertools;
use ndarray::{s, Array2, Array4, ArrayBase, Axis, CowArray, CowRepr, Dim, IxDynImpl};
use ort::session::Session;
use ort::tensor::ort_owned_tensor::ViewHolder;
use ort::{Environment, ExecutionProvider, GraphOptimizationLevel, SessionBuilder, Value};
use ort::CPUExecutionProvider;
use ort::CUDAExecutionProvider;
use ort::OpenVINOExecutionProvider;
use ort::Session;
use ort::ArrayViewHolder;
use ort::{GraphOptimizationLevel, Value};
use serde::Deserialize;
use std::{
cmp,
Expand Down Expand Up @@ -34,7 +37,7 @@ pub enum PoolingStrategy {

impl PoolingStrategy {
fn cls_pooling(
embeddings: ViewHolder<'_, f32, Dim<IxDynImpl>>,
embeddings: ArrayViewHolder<'_, f32>,
output_dims: usize,
) -> Vec<Vec<f32>> {
embeddings
Expand All @@ -48,7 +51,7 @@ impl PoolingStrategy {
}

fn mean_pooling(
embeddings: ViewHolder<'_, f32, Dim<IxDynImpl>>,
embeddings: ArrayViewHolder<'_, f32>,
attention_mask: &SessionInput,
output_dims: usize,
) -> Vec<Vec<f32>> {
Expand Down Expand Up @@ -81,7 +84,7 @@ impl PoolingStrategy {

pub fn pool(
&self,
embeddings: ViewHolder<'_, f32, Dim<IxDynImpl>>,
embeddings: ArrayViewHolder<'_, f32>,
attention_mask: &SessionInput,
output_dims: usize,
) -> Vec<Vec<f32>> {
Expand Down Expand Up @@ -263,23 +266,23 @@ lazy_static! {
}

lazy_static! {
static ref ONNX_ENV: Arc<Environment> = Environment::builder()
.with_name("ldb_extras")
.with_execution_providers([
ExecutionProvider::CUDA(Default::default()),
ExecutionProvider::OpenVINO(Default::default()),
ExecutionProvider::CPU(Default::default()),
])
.build()
.unwrap()
.into_arc();
// ONNX_ENV is used to initiate ort environment once when it is first dereferenced.
static ref ONNX_ENV: bool = {
ort::init().with_name("ldb_extras")
.with_execution_providers([
CUDAExecutionProvider::default().build(),
OpenVINOExecutionProvider::default().build(),
CPUExecutionProvider::default().build(),
])
.commit()
.expect("can not initiate ort environment");
true
};
}

static MEM_PERCENT_THRESHOLD: f64 = 80.0;

impl EncoderService {
pub fn new(
environment: &Arc<Environment>,
model_name: &str,
model_params: ModelParams,
model_folder: &PathBuf,
Expand All @@ -306,7 +309,8 @@ impl EncoderService {

let num_cpus = num_cpus::get();

let encoder = SessionBuilder::new(environment)?
let _ = &ONNX_ENV;
let encoder = Session::builder()?
.with_parallel_execution(true)?
.with_intra_threads(num_cpus as i16)?
.with_optimization_level(GraphOptimizationLevel::Level3)?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just tested with GPU server, the default env seems to not work, but I have put

            .with_execution_providers([
                CUDAExecutionProvider::default().build(),
                OpenVINOExecutionProvider::default().build(),
                CPUExecutionProvider::default().build(),
            ])?

on Session::builder()? and removed the ONNX_ENV completely and it started to use the GPU

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you so much!
feel free to edit it in my branch

Expand Down Expand Up @@ -467,17 +471,17 @@ impl EncoderService {
.iter()
.map(|chunk| {
// Iterate over each chunk and create embedding for that chunk
let inputs: Vec<Value<'_>> = chunk
let inputs: Vec<Value> = chunk
.iter()
.map(|v| Value::from_array(session.allocator(), &v).unwrap())
.map(|v| Value::from_array(v).unwrap())
.collect();

let outputs = session.run(inputs).unwrap();
let outputs = session.run(&inputs[..]).unwrap();

let binding = outputs[0].try_extract()?;
let binding = outputs[0].extract_tensor()?;
let embeddings = binding.view();
let attention_mask = &chunk[attention_mask_idx];
let output_dims = session.outputs[0].dimensions.last().unwrap().unwrap() as usize;
let output_dims = *session.outputs[0].output_type.tensor_dimensions().unwrap().last().unwrap() as usize;
let embeddings: Vec<Vec<f32>> = self.model_params.pooling_strategy.pool(
embeddings,
attention_mask,
Expand Down Expand Up @@ -529,12 +533,12 @@ impl EncoderService {
)?)
.into_dyn();

let outputs = session.run(vec![
Value::from_array(session.allocator(), &ids)?,
Value::from_array(session.allocator(), &mask)?,
let outputs = session.run([
Value::from_array(&ids)?,
Value::from_array(&mask)?,
])?;

let binding = outputs[0].try_extract()?;
let binding = outputs[0].extract_tensor()?;
let embeddings = binding.view();

let seq_len = embeddings.shape().get(1).ok_or("not")?;
Expand Down Expand Up @@ -598,11 +602,10 @@ impl EncoderService {

let processed_tokens = pixels.len();

let outputs = session.run(vec![Value::from_array(
session.allocator(),
&pixels.into_dyn(),
let outputs = session.run([Value::from_array(
&pixels,
)?])?;
let binding = outputs[0].try_extract()?;
let binding = outputs[0].extract_tensor()?;
let embeddings = binding.view();

let seq_len = embeddings.shape().get(1).unwrap();
Expand Down Expand Up @@ -777,7 +780,6 @@ impl<'a> OrtRuntime<'a> {

let model_info = map_write.get_mut(model_name).unwrap();
let encoder = EncoderService::new(
&ONNX_ENV,
model_name,
model_info.params.clone(),
&model_folder,
Expand Down Expand Up @@ -943,18 +945,19 @@ impl<'a> EmbeddingRuntime for OrtRuntime<'a> {
// And output shuold have the dimensions
// This should be checked when adding new model

let output_dims = model_info
let output_dims = *model_info
.encoder
.as_ref()
.unwrap()
.encoder
.outputs
.last()
.unwrap()
.dimensions()
.last()
.output_type
.tensor_dimensions()
.unwrap()
.unwrap();
.last()
.unwrap() as usize;

let invalid_vec = vec![-1.0; output_dims];
let return_res = buffers
Expand Down
Loading