Merge branch 'main' into providers-voyage

tembo-io · Nov 1, 2024 · 8b67830 · 8b67830
2 parents 8d563d2 + 78fb934
commit 8b67830
Show file tree

Hide file tree

Showing 4 changed files with 275 additions and 257 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -105,6 +105,12 @@ make run
 
 Once the above command is run, you will be brought into Postgres via `psql`.
 
+Run the following command inside the `psql` console to enable the extensions:
+
+```sql
+create extension vectorize cascade
+```
+
 To list out the enabled extensions, run:
 
 ```sql

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -17,7 +17,7 @@ All Embedding model and LLM providers can have their base URLs changed.
 For example, if you have an OpenAI compliant embedding or LLM server (such as [vLLM](https://github.com/vllm-project/vllm)), running at `https://api.myserver.com/v1`, you can change the base URL with the following SQL command:
 
 ```sql
-ALTER SYSTEM SET vectorize.openai_base_url TO 'https://api.myserver.com/v1';
+ALTER SYSTEM SET vectorize.openai_service_url TO 'https://api.myserver.com/v1';
 SELECT pg_reload_conf();
 ```
 

diff --git a/vector-serve/app/models.py b/vector-serve/app/models.py
@@ -6,6 +6,13 @@
 
 from app.metrics import ML_MODEL_COUNT
 
+LOCAL_FILES_ONLY = os.getenv("LOCAL_FILES_ONLY", "true").lower() in [
+    "true",
+    "1",
+    "t",
+    True,
+]
+
 _HF_ORG = "sentence-transformers"
 
 MODELS_TO_CACHE = [f"{_HF_ORG}/all-MiniLM-L6-v2"]
@@ -28,7 +35,9 @@ def parse_header(authorization: str) -> str | None:
 def load_model_cache(app: FastAPI) -> dict[str, SentenceTransformer]:
     model_cache = {}
     for m in MODELS_TO_CACHE:
-        model_cache[m] = SentenceTransformer(m, cache_folder=cache_dir)
+        model_cache[m] = SentenceTransformer(
+            m, cache_folder=cache_dir, local_files_only=LOCAL_FILES_ONLY
+        )
     app.state.model_cache = model_cache