Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tembo ai model source #123

Merged
merged 18 commits into from
Jun 6, 2024
4 changes: 2 additions & 2 deletions .github/workflows/build-ollama-serve.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ defaults:
run:
shell: bash
working-directory: ./ollama-serve/

jobs:
build_and_push:
name: Build and push images
Expand All @@ -43,7 +43,7 @@ jobs:
echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ on:
branches:
- main
paths:
- 'docs/**'
- '.github/workflows/docs.yml'
- "docs/**"
- ".github/workflows/docs.yml"
jobs:
deploy:
runs-on: ubuntu-latest
Expand All @@ -19,5 +19,5 @@ jobs:

- name: deploy docs
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: poetry run mkdocs gh-deploy --force
50 changes: 23 additions & 27 deletions .github/workflows/extension_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ on:
branches:
- main
paths-ignore:
- 'README.md'
- "README.md"
- docs/**
push:
branches:
- main
paths-ignore:
- 'README.md'
- "README.md"
- docs/**

release:
Expand All @@ -39,12 +39,12 @@ jobs:
- name: Install stoml and pg-trunk
shell: bash
run: |
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk

- name: Cache binaries
uses: actions/cache@v2
Expand Down Expand Up @@ -120,11 +120,9 @@ jobs:
${{ runner.os }}-bins-
- name: setup-tests
run: |
pgrx15_config=$(/usr/local/bin/stoml ~/.pgrx/config.toml configs.pg15)
~/.cargo/bin/trunk install pgvector --pg-config ${pgrx15_config}
~/.cargo/bin/trunk install pgmq --pg-config ${pgrx15_config}
~/.cargo/bin/trunk install pg_cron --pg-config ${pgrx15_config}
echo "shared_preload_libraries = 'pg_cron, vectorize'" >> ~/.pgrx/data-15/postgresql.conf
make trunk-dependencies
make setup.urls
make setup.shared_preload_libraries
rm -rf ./target/pgrx-test-data-* || true
- name: unit-test
run: |
Expand All @@ -133,8 +131,6 @@ jobs:
env:
HF_API_KEY: ${{ secrets.HF_API_KEY }}
run: |
pgrx15_config=$(/usr/local/bin/stoml ~/.pgrx/config.toml configs.pg15)
pg_version=$(/usr/local/bin/stoml Cargo.toml features.default)
echo "\q" | make run
make test-integration

Expand All @@ -160,12 +156,12 @@ jobs:
- name: Install stoml and pg-trunk
shell: bash
run: |
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
cargo install pg-trunk
- name: trunk build
working-directory: ./extension
# trunk does not support path dependencies in Cargo.toml that are not within the current working directory
Expand All @@ -184,7 +180,7 @@ jobs:
needs:
- publish
env:
TARGET_PLATFORMS: linux/amd64,linux/arm64
TARGET_PLATFORMS: linux/amd64,linux/arm64
runs-on:
- self-hosted
- dind
Expand All @@ -202,11 +198,11 @@ jobs:
- name: Install stoml and pg-trunk
shell: bash
run: |
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
set -xe
wget https://github.com/freshautomations/stoml/releases/download/v0.7.1/stoml_linux_amd64 &> /dev/null
mv stoml_linux_amd64 stoml
chmod +x stoml
sudo mv stoml /usr/local/bin/
- name: Set version strings
id: versions
run: |
Expand Down
4 changes: 2 additions & 2 deletions core/src/transformers/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::types::{JobParams, VectorizeMeta};
// max token length is 8192
// however, depending on content of text, token count can be higher than
pub const MAX_TOKEN_LEN: usize = 8192;
pub const OPENAI_EMBEDDING_URL: &str = "https://api.openai.com/v1/embeddings";
pub const OPENAI_BASE_URL: &str = "https://api.openai.com/v1";

pub fn prepare_openai_request(
vect_meta: VectorizeMeta,
Expand All @@ -30,7 +30,7 @@ pub fn prepare_openai_request(
},
};
Ok(EmbeddingRequest {
url: OPENAI_EMBEDDING_URL.to_owned(),
url: format!("{OPENAI_BASE_URL}/embeddings"),
payload,
api_key: Some(apikey),
})
Expand Down
39 changes: 31 additions & 8 deletions core/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ pub enum ModelError {
impl Model {
pub fn new(input: &str) -> Result<Self, ModelError> {
let mut parts: Vec<&str> = input.split('/').collect();
let missing_source = parts.len() != 2;
let missing_source = parts.len() < 2;
if parts.len() > 3 {
return Err(ModelError::InvalidFormat(input.to_string()));
}
if missing_source && parts[0] == "text-embedding-ada-002" {
// for backwards compatibility, prepend "openai" to text-embedding-ada-2
parts.insert(0, "openai");
Expand All @@ -178,10 +181,22 @@ impl Model {
.parse::<ModelSource>()
.map_err(|_| ModelError::InvalidSource(parts[0].to_string()))?;

let name = if source == ModelSource::Tembo {
// removes the leading /tembo from the model name
parts.remove(0);
// all others remain the same
parts.join("/")
} else {
parts
.last()
.expect("expected non-empty model name")
.to_string()
};

Ok(Self {
source,
fullname: format!("{}/{}", parts[0], parts[1]),
name: parts[1].to_string(),
fullname: parts.join("/"),
name,
})
}
}
Expand All @@ -199,6 +214,7 @@ pub enum ModelSource {
OpenAI,
SentenceTransformers,
Ollama,
Tembo,
}

impl FromStr for ModelSource {
Expand All @@ -209,6 +225,7 @@ impl FromStr for ModelSource {
"ollama" => Ok(ModelSource::Ollama),
"openai" => Ok(ModelSource::OpenAI),
"sentence-transformers" => Ok(ModelSource::SentenceTransformers),
"tembo" => Ok(ModelSource::Tembo),
_ => Ok(ModelSource::SentenceTransformers),
}
}
Expand All @@ -220,6 +237,7 @@ impl Display for ModelSource {
ModelSource::Ollama => write!(f, "ollama"),
ModelSource::OpenAI => write!(f, "openai"),
ModelSource::SentenceTransformers => write!(f, "sentence-transformers"),
ModelSource::Tembo => write!(f, "tembo"),
}
}
}
Expand All @@ -230,6 +248,7 @@ impl From<String> for ModelSource {
"ollama" => ModelSource::Ollama,
"openai" => ModelSource::OpenAI,
"sentence-transformers" => ModelSource::SentenceTransformers,
"tembo" => ModelSource::Tembo,
// other cases are assumed to be private sentence-transformer compatible model
// and can be hot-loaded
_ => ModelSource::SentenceTransformers,
Expand All @@ -242,10 +261,19 @@ impl From<String> for ModelSource {
mod model_tests {
use super::*;

#[test]
fn test_tembo_parsing() {
let model = Model::new("tembo/meta-llama/Meta-Llama-3-8B-Instruct").unwrap();
assert_eq!(model.source, ModelSource::Tembo);
assert_eq!(model.fullname, "meta-llama/Meta-Llama-3-8B-Instruct");
assert_eq!(model.name, "meta-llama/Meta-Llama-3-8B-Instruct");
}

#[test]
fn test_ollama_parsing() {
let model = Model::new("ollama/wizardlm2:7b").unwrap();
assert_eq!(model.source, ModelSource::Ollama);
assert_eq!(model.fullname, "ollama/wizardlm2:7b");
assert_eq!(model.name, "wizardlm2:7b");
}

Expand Down Expand Up @@ -295,11 +323,6 @@ mod model_tests {
assert!(Model::new("openaimodel-name").is_err());
}

#[test]
fn test_invalid_format_extra_slash() {
assert!(Model::new("openai/model/name").is_err());
}

#[test]
fn test_backwards_compatibility() {
let model = Model::new("text-embedding-ada-002").unwrap();
Expand Down
6 changes: 4 additions & 2 deletions core/src/worker/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl Config {
}

/// source a variable from environment - use default if not exists
fn from_env_default(key: &str, default: &str) -> String {
pub fn from_env_default(key: &str, default: &str) -> String {
env::var(key).unwrap_or_else(|_| default.to_owned())
}

Expand All @@ -98,7 +98,9 @@ async fn execute_job(
&msg.message.inputs,
cfg.openai_api_key.clone(),
)?,
ModelSource::Ollama => Err(anyhow::anyhow!("Ollama transformer not implemented yet"))?,
ModelSource::Ollama | &ModelSource::Tembo => Err(anyhow::anyhow!(
"Ollama/Tembo transformer not implemented yet"
))?,
ModelSource::SentenceTransformers => generic::prepare_generic_embedding_request(
job_meta.clone(),
&msg.message.inputs,
Expand Down
4 changes: 2 additions & 2 deletions extension/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[package]
name = "vectorize"
version = "0.15.1"
version = "0.16.0"
edition = "2021"
publish = false

[lib]
crate-type = ["cdylib"]

[features]
default = ["pg15"]
default = ["pg16"]
pg14 = ["pgrx/pg14", "pgrx-tests/pg14"]
pg15 = ["pgrx/pg15", "pgrx-tests/pg15"]
pg16 = ["pgrx/pg16", "pgrx-tests/pg16"]
Expand Down
18 changes: 13 additions & 5 deletions extension/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
SQLX_OFFLINE:=true
DATABASE_URL:=postgres://${USER}:${USER}@localhost:28815/postgres
DISTNAME = $(shell grep -m 1 '^name' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
DISTVERSION = $(shell grep -m 1 '^version' Trunk.toml | sed -e 's/[^"]*"\([^"]*\)",\{0,1\}/\1/')
PG_VERSION:=15
PG_VERSION:=16
DATABASE_URL:=postgres://${USER}:${USER}@localhost:288${PG_VERSION}/postgres
PGRX_PG_CONFIG =$(shell cargo pgrx info pg-config pg${PG_VERSION})
UPGRADE_FROM_VER:=0.9.0
UPGRADE_FROM_VER:=0.16.0
BRANCH:=$(git rev-parse --abbrev-ref HEAD)
RUST_LOG:=debug

Expand Down Expand Up @@ -40,11 +40,17 @@ setup.shared_preload_libraries:
setup.urls:
echo "vectorize.embedding_service_url = 'http://localhost:3000/v1/embeddings'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf
echo "vectorize.ollama_service_url = 'http://localhost:3001'" >> ~/.pgrx/data-${PG_VERSION}/postgresql.conf
setup: setup.dependencies setup.shared_preload_libraries
setup: setup.dependencies setup.shared_preload_libraries setup.urls

cat-logs:
cat ~/.pgrx/${PG_VERSION}.log

# install dependencies from trunk (limited OS/arch)
trunk-dependencies:
trunk install pgvector --pg-config ${PGRX_PG_CONFIG}
trunk install pgmq --pg-config ${PGRX_PG_CONFIG}
trunk install pg_cron --pg-config ${PGRX_PG_CONFIG}

install-pg_cron:
git clone https://github.com/citusdata/pg_cron.git && \
cd pg_cron && \
Expand All @@ -65,7 +71,9 @@ install-pgvector:
install-pgmq:
git clone https://github.com/tembo-io/pgmq.git && \
cd pgmq && \
cargo pgrx install --pg-config=${PGRX_PG_CONFIG} && \
PG_CONFIG=${PGRX_PG_CONFIG} make clean && \
PG_CONFIG=${PGRX_PG_CONFIG} make && \
PG_CONFIG=${PGRX_PG_CONFIG} make install && \
cd .. && rm -rf pgmq

test-integration:
Expand Down
2 changes: 1 addition & 1 deletion extension/Trunk.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description = "The simplest way to orchestrate vector search on Postgres."
homepage = "https://github.com/tembo-io/pg_vectorize"
documentation = "https://github.com/tembo-io/pg_vectorize"
categories = ["orchestration", "machine_learning"]
version = "0.15.1"
version = "0.16.0"

[build]
postgres_version = "15"
Expand Down
28 changes: 28 additions & 0 deletions extension/sql/vectorize--0.15.1--0.16.0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- src/api.rs:158
-- vectorize::api::generate
CREATE FUNCTION vectorize."generate"(
"input" TEXT, /* &str */
"model" TEXT DEFAULT 'openai/gpt-3.5-turbo', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'generate_wrapper';

-- src/api.rs:168
-- vectorize::api::env_interpolate_guc
CREATE FUNCTION vectorize."env_interpolate_guc"(
"guc_name" TEXT /* &str */
) RETURNS TEXT /* core::result::Result<alloc::string::String, anyhow::Error> */
STRICT
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'env_interpolate_guc_wrapper';

-- src/api.rs:79
-- vectorize::api::encode
CREATE FUNCTION vectorize."encode"(
"input" TEXT, /* &str */
"model" TEXT DEFAULT 'openai/text-embedding-ada-002', /* alloc::string::String */
"api_key" TEXT DEFAULT NULL /* core::option::Option<alloc::string::String> */
) RETURNS double precision[] /* core::result::Result<alloc::vec::Vec<f64>, anyhow::Error> */
LANGUAGE c /* Rust */
AS 'MODULE_PATHNAME', 'encode_wrapper';
Loading
Loading