From e9b57eca7593e2bc35a9db1001367f58e5259538 Mon Sep 17 00:00:00 2001 From: Diwank Singh Tomer Date: Wed, 2 Oct 2024 15:05:17 -0400 Subject: [PATCH] wip(deploy): Sample docker-compose files for different scenarios Signed-off-by: Diwank Singh Tomer --- deploy/README.md | 69 +++++++ ...ocker-compose.multi-tenant-cpu-managed.yml | 175 ++++++++++++++++++ ...er-compose.multi-tenant-cpu-selfhosted.yml | 144 ++++++++++++++ ...ocker-compose.multi-tenant-gpu-managed.yml | 84 +++++++++ ...er-compose.multi-tenant-gpu-selfhosted.yml | 149 +++++++++++++++ ...cker-compose.single-tenant-cpu-managed.yml | 75 ++++++++ ...r-compose.single-tenant-cpu-selfhosted.yml | 140 ++++++++++++++ ...cker-compose.single-tenant-gpu-managed.yml | 80 ++++++++ ...r-compose.single-tenant-gpu-selfhosted.yml | 145 +++++++++++++++ 9 files changed, 1061 insertions(+) create mode 100644 deploy/README.md create mode 100644 deploy/docker-compose.multi-tenant-cpu-managed.yml create mode 100644 deploy/docker-compose.multi-tenant-cpu-selfhosted.yml create mode 100644 deploy/docker-compose.multi-tenant-gpu-managed.yml create mode 100644 deploy/docker-compose.multi-tenant-gpu-selfhosted.yml create mode 100644 deploy/docker-compose.single-tenant-cpu-managed.yml create mode 100644 deploy/docker-compose.single-tenant-cpu-selfhosted.yml create mode 100644 deploy/docker-compose.single-tenant-gpu-managed.yml create mode 100644 deploy/docker-compose.single-tenant-gpu-selfhosted.yml diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 000000000..48c004efc --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,69 @@ +# Julep Deployment Configurations + +This directory contains various Docker Compose configurations for deploying Julep in different scenarios. Each configuration is tailored to specific use cases and deployment requirements. + +## Available Configurations + +### 1. Single-Tenant Mode with CPU Embeddings & Managed DB +- **File:** `docker-compose.single-tenant-cpu-managed.yml` +- **Description:** Deploys Julep in single-tenant mode using CPU-based embedding services with managed Temporal and LiteLLM databases. +- **Suitable for:** Development, testing, or small-scale deployments prioritizing simplicity and cost-effectiveness. + +### 2. Multi-Tenant Mode with CPU Embeddings & Managed DB +- **File:** `docker-compose.multi-tenant-cpu-managed.yml` +- **Description:** Deploys Julep in multi-tenant mode using CPU-based embedding services with managed Temporal and LiteLLM databases. +- **Suitable for:** Multi-tenant environments requiring manageable complexity with efficient resource usage. + +### 3. Single-Tenant Mode with GPU Embeddings & Managed DB +- **File:** `docker-compose.single-tenant-gpu-managed.yml` +- **Description:** Deploys Julep in single-tenant mode using GPU-based embedding services with managed Temporal and LiteLLM databases. +- **Suitable for:** Single-tenant deployments needing enhanced performance through GPU-powered embeddings. + +### 4. Multi-Tenant Mode with GPU Embeddings & Managed DB +- **File:** `docker-compose.multi-tenant-gpu-managed.yml` +- **Description:** Deploys Julep in multi-tenant mode using GPU-based embedding services with managed Temporal and LiteLLM databases. +- **Suitable for:** Large-scale multi-tenant deployments demanding high-performance embeddings. + +### 5. Single-Tenant Mode with CPU Embeddings & Self-Hosted DB +- **File:** `docker-compose.single-tenant-cpu-selfhosted.yml` +- **Description:** Deploys Julep in single-tenant mode using CPU-based embedding services with self-hosted Temporal and LiteLLM databases. +- **Suitable for:** Deployments where controlling the database infrastructure is preferred over managed services. + +### 6. Multi-Tenant Mode with CPU Embeddings & Self-Hosted DB +- **File:** `docker-compose.multi-tenant-cpu-selfhosted.yml` +- **Description:** Deploys Julep in multi-tenant mode using CPU-based embedding services with self-hosted Temporal and LiteLLM databases. +- **Suitable for:** Multi-tenant deployments with greater control over database services, ideal for organizations with specific compliance or customization needs. + +### 7. Single-Tenant Mode with GPU Embeddings & Self-Hosted DB +- **File:** `docker-compose.single-tenant-gpu-selfhosted.yml` +- **Description:** Deploys Julep in single-tenant mode using GPU-based embedding services with self-hosted Temporal and LiteLLM databases. +- **Suitable for:** High-performance single-tenant deployments that require self-managed databases for specialized configurations. + +### 8. Multi-Tenant Mode with GPU Embeddings & Self-Hosted DB +- **File:** `docker-compose.multi-tenant-gpu-selfhosted.yml` +- **Description:** Deploys Julep in multi-tenant mode using GPU-based embedding services with self-hosted Temporal and LiteLLM databases. +- **Suitable for:** High-performance, multi-tenant deployments that require full control over both embedding services and database infrastructure. + +## Configuration Components + +Each configuration file combines the following components: + +- **Tenancy Mode:** Single-tenant or Multi-tenant +- **Embedding Service:** CPU-based or GPU-based +- **Database Management:** Managed or Self-hosted + +## Additional Services + +- **Temporal UI:** Available as an optional add-on for all configurations to provide a web-based interface for monitoring Temporal workflows. + +## Choosing a Configuration + +Select the configuration that best matches your deployment requirements, considering factors such as: + +- Number of tenants +- Performance needs +- Infrastructure control preferences +- Scalability requirements +- Development vs. Production environment + +Refer to the individual Docker Compose files for detailed service configurations and environment variable requirements. \ No newline at end of file diff --git a/deploy/docker-compose.multi-tenant-cpu-managed.yml b/deploy/docker-compose.multi-tenant-cpu-managed.yml new file mode 100644 index 000000000..777b5fa07 --- /dev/null +++ b/deploy/docker-compose.multi-tenant-cpu-managed.yml @@ -0,0 +1,175 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api-multi-tenant:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + + agents-api-multi-tenant: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: gateway + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + AGENTS_API_MULTI_TENANT_MODE: true + AGENTS_API_PREFIX: "/api" + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + gateway: + condition: service_started + + litellm: + image: ghcr.io/berriai/litellm-database:main-v1.46.6 + environment: + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + DATABASE_URL: ${LITELLM_DATABASE_URL:-postgresql://${LITELLM_POSTGRES_USER:-llmproxy}:${LITELLM_POSTGRES_PASSWORD}@litellm-db:5432/${LITELLM_POSTGRES_DB:-litellm}?sslmode=prefer_ssl} + REDIS_URL: ${LITELLM_REDIS_URL:-redis://default:${LITELLM_REDIS_PASSWORD}@litellm-redis:6379} + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + volumes: + - ./litellm-config.yaml:/app/config.yaml + - .keys:/app/.keys:ro + ports: + - "4000:4000" + depends_on: + litellm-db: + condition: service_started + litellm-redis: + condition: service_started + + litellm-db: + image: postgres:16 + restart: unless-stopped + volumes: + - litellm-db-data:/var/lib/postgresql/data + environment: + POSTGRES_DB: ${LITELLM_POSTGRES_DB:-litellm} + POSTGRES_USER: ${LITELLM_POSTGRES_USER:-llmproxy} + POSTGRES_PASSWORD: ${LITELLM_POSTGRES_PASSWORD} + healthcheck: + test: ["CMD-SHELL", "pg_isready -d ${LITELLM_POSTGRES_DB:-litellm} -U ${LITELLM_POSTGRES_USER:-llmproxy}"] + interval: 1s + timeout: 5s + retries: 10 + + litellm-redis: + image: redis/redis-stack-server + restart: unless-stopped + environment: + REDIS_ARGS: --requirepass ${LITELLM_REDIS_PASSWORD} + volumes: + - litellm-redis-data:/data + + temporal: + image: temporalio/auto-setup:1.25 + hostname: temporal + environment: + POSTGRES_PWD: ${TEMPORAL_POSTGRES_PASSWORD} + POSTGRES_DB: ${TEMPORAL_POSTGRES_DB:-temporal} + POSTGRES_SEEDS: temporal-db + DB_HOST: temporal-db + DB_PORT: 5432 + POSTGRES_USER: temporal + TEMPORAL_ADDRESS: temporal:7233 + POSTGRES_TLS_ENABLED: false + POSTGRES_TLS_CA_FILE: /cert/ca.crt + SQL_TLS_ENABLED: false + SQL_CA: /cert/ca.crt + POSTGRES_TLS_DISABLE_HOST_VERIFICATION: false + VISIBILITY_DBNAME: temporal_visibility + SKIP_SCHEMA_SETUP: false + SKIP_DB_CREATE: false + DYNAMIC_CONFIG_FILE_PATH: config/dynamicconfig/temporal-postgres.yaml + DB: postgres12 + LOG_LEVEL: info + volumes: + - ./scheduler/dynamicconfig:/etc/temporal/config/dynamicconfig + - ./scheduler/cert:/cert + depends_on: + temporal-db: + condition: service_started + + temporal-db: + image: postgres:16 + restart: unless-stopped + volumes: + - temporal-db-data:/var/lib/postgresql/data + environment: + POSTGRES_DB: ${TEMPORAL_POSTGRES_DB:-temporal} + POSTGRES_USER: temporal + POSTGRES_PASSWORD: ${TEMPORAL_POSTGRES_PASSWORD} + healthcheck: + test: ["CMD-SHELL", "pg_isready -d ${TEMPORAL_POSTGRES_DB:-temporal} -U temporal"] + interval: 1s + timeout: 5s + retries: 10 + +volumes: + cozo_data: + cozo_backup: + litellm-db-data: + litellm-redis-data: + temporal-db-data: \ No newline at end of file diff --git a/deploy/docker-compose.multi-tenant-cpu-selfhosted.yml b/deploy/docker-compose.multi-tenant-cpu-selfhosted.yml new file mode 100644 index 000000000..caf68e46c --- /dev/null +++ b/deploy/docker-compose.multi-tenant-cpu-selfhosted.yml @@ -0,0 +1,144 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api-multi-tenant:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + + agents-api-multi-tenant: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: gateway + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + AGENTS_API_MULTI_TENANT_MODE: true + AGENTS_API_PREFIX: "/api" + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + gateway: + condition: service_started + + litellm: + image: ghcr.io/berriai/litellm-database:main-v1.46.6 + environment: + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + DATABASE_URL: postgresql://${LITELLM_POSTGRES_USER:-llmproxy}:${LITELLM_POSTGRES_PASSWORD}@litellm-db:5432/${LITELLM_POSTGRES_DB:-litellm}?sslmode=prefer + REDIS_URL: redis://default:${LITELLM_REDIS_PASSWORD}@litellm-redis:6379 + volumes: + - ./litellm-config.yaml:/app/config.yaml + - .keys:/app/.keys:ro + ports: + - "4000:4000" + depends_on: + litellm-db: + condition: service_healthy + litellm-redis: + condition: service_started + + litellm-db: + image: postgres:16 + environment: + POSTGRES_DB: ${LITELLM_POSTGRES_DB:-litellm} + POSTGRES_USER: ${LITELLM_POSTGRES_USER:-llmproxy} + POSTGRES_PASSWORD: ${LITELLM_POSTGRES_PASSWORD} + volumes: + - litellm-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${LITELLM_POSTGRES_USER:-llmproxy}"] + interval: 5s + timeout: 5s + retries: 5 + + litellm-redis: + image: redis:6 + command: redis-server --requirepass ${LITELLM_REDIS_PASSWORD} + volumes: + - litellm-redis-data:/data + + temporal: + image: temporalio/auto-setup:1.25 + environment: + - DB=postgresql + - DB_PORT=5432 + - POSTGRES_USER=${TEMPORAL_POSTGRES_USER:-temporal} + - POSTGRES_PWD=${TEMPORAL_POSTGRES_PASSWORD} + - POSTGRES_SEEDS=temporal-db + depends_on: + temporal-db: + condition: service_healthy + + temporal-db: + image: postgres:16 + environment: + POSTGRES_PASSWORD: ${TEMPORAL_POSTGRES_PASSWORD} + POSTGRES_USER: ${TEMPORAL_POSTGRES_USER:-temporal} + volumes: + - temporal-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${TEMPORAL_POSTGRES_USER:-temporal}"] + interval: 5s + timeout: 5s + retries: 5 + +volumes: + cozo_data: + cozo_backup: + litellm-db-data: + litellm-redis-data: + temporal-db-data: \ No newline at end of file diff --git a/deploy/docker-compose.multi-tenant-gpu-managed.yml b/deploy/docker-compose.multi-tenant-gpu-managed.yml new file mode 100644 index 000000000..e9e1e9bd7 --- /dev/null +++ b/deploy/docker-compose.multi-tenant-gpu-managed.yml @@ -0,0 +1,84 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api-multi-tenant:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:gpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] + + agents-api-multi-tenant: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: gateway + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + AGENTS_API_MULTI_TENANT_MODE: true + AGENTS_API_PREFIX: "/api" + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + gateway: + condition: service_started + +volumes: + cozo_data: + cozo_backup: \ No newline at end of file diff --git a/deploy/docker-compose.multi-tenant-gpu-selfhosted.yml b/deploy/docker-compose.multi-tenant-gpu-selfhosted.yml new file mode 100644 index 000000000..737a21e2d --- /dev/null +++ b/deploy/docker-compose.multi-tenant-gpu-selfhosted.yml @@ -0,0 +1,149 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api-multi-tenant:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:gpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] + + agents-api-multi-tenant: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: gateway + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + AGENTS_API_MULTI_TENANT_MODE: true + AGENTS_API_PREFIX: "/api" + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + gateway: + condition: service_started + + litellm: + image: ghcr.io/berriai/litellm-database:main-v1.46.6 + environment: + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + DATABASE_URL: postgresql://${LITELLM_POSTGRES_USER:-llmproxy}:${LITELLM_POSTGRES_PASSWORD}@litellm-db:5432/${LITELLM_POSTGRES_DB:-litellm}?sslmode=prefer + REDIS_URL: redis://default:${LITELLM_REDIS_PASSWORD}@litellm-redis:6379 + volumes: + - ./litellm-config.yaml:/app/config.yaml + - .keys:/app/.keys:ro + ports: + - "4000:4000" + depends_on: + litellm-db: + condition: service_healthy + litellm-redis: + condition: service_started + + litellm-db: + image: postgres:16 + environment: + POSTGRES_DB: ${LITELLM_POSTGRES_DB:-litellm} + POSTGRES_USER: ${LITELLM_POSTGRES_USER:-llmproxy} + POSTGRES_PASSWORD: ${LITELLM_POSTGRES_PASSWORD} + volumes: + - litellm-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${LITELLM_POSTGRES_USER:-llmproxy}"] + interval: 5s + timeout: 5s + retries: 5 + + litellm-redis: + image: redis:6 + command: redis-server --requirepass ${LITELLM_REDIS_PASSWORD} + volumes: + - litellm-redis-data:/data + + temporal: + image: temporalio/auto-setup:1.25 + environment: + - DB=postgresql + - DB_PORT=5432 + - POSTGRES_USER=${TEMPORAL_POSTGRES_USER:-temporal} + - POSTGRES_PWD=${TEMPORAL_POSTGRES_PASSWORD} + - POSTGRES_SEEDS=temporal-db + depends_on: + temporal-db: + condition: service_healthy + + temporal-db: + image: postgres:16 + environment: + POSTGRES_PASSWORD: ${TEMPORAL_POSTGRES_PASSWORD} + POSTGRES_USER: ${TEMPORAL_POSTGRES_USER:-temporal} + volumes: + - temporal-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${TEMPORAL_POSTGRES_USER:-temporal}"] + interval: 5s + timeout: 5s + retries: 5 + +volumes: + cozo_data: + cozo_backup: + litellm-db-data: + litellm-redis-data: + temporal-db-data: \ No newline at end of file diff --git a/deploy/docker-compose.single-tenant-cpu-managed.yml b/deploy/docker-compose.single-tenant-cpu-managed.yml new file mode 100644 index 000000000..cdcf2bc59 --- /dev/null +++ b/deploy/docker-compose.single-tenant-cpu-managed.yml @@ -0,0 +1,75 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + + agents-api: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: localhost + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + +volumes: + cozo_data: + cozo_backup: \ No newline at end of file diff --git a/deploy/docker-compose.single-tenant-cpu-selfhosted.yml b/deploy/docker-compose.single-tenant-cpu-selfhosted.yml new file mode 100644 index 000000000..9cd63e64e --- /dev/null +++ b/deploy/docker-compose.single-tenant-cpu-selfhosted.yml @@ -0,0 +1,140 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + + agents-api: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: localhost + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + + litellm: + image: ghcr.io/berriai/litellm-database:main-v1.46.6 + environment: + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + DATABASE_URL: postgresql://${LITELLM_POSTGRES_USER:-llmproxy}:${LITELLM_POSTGRES_PASSWORD}@litellm-db:5432/${LITELLM_POSTGRES_DB:-litellm}?sslmode=prefer + REDIS_URL: redis://default:${LITELLM_REDIS_PASSWORD}@litellm-redis:6379 + volumes: + - ./litellm-config.yaml:/app/config.yaml + - .keys:/app/.keys:ro + ports: + - "4000:4000" + depends_on: + litellm-db: + condition: service_healthy + litellm-redis: + condition: service_started + + litellm-db: + image: postgres:16 + environment: + POSTGRES_DB: ${LITELLM_POSTGRES_DB:-litellm} + POSTGRES_USER: ${LITELLM_POSTGRES_USER:-llmproxy} + POSTGRES_PASSWORD: ${LITELLM_POSTGRES_PASSWORD} + volumes: + - litellm-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${LITELLM_POSTGRES_USER:-llmproxy}"] + interval: 5s + timeout: 5s + retries: 5 + + litellm-redis: + image: redis:6 + command: redis-server --requirepass ${LITELLM_REDIS_PASSWORD} + volumes: + - litellm-redis-data:/data + + temporal: + image: temporalio/auto-setup:1.25 + environment: + - DB=postgresql + - DB_PORT=5432 + - POSTGRES_USER=${TEMPORAL_POSTGRES_USER:-temporal} + - POSTGRES_PWD=${TEMPORAL_POSTGRES_PASSWORD} + - POSTGRES_SEEDS=temporal-db + depends_on: + temporal-db: + condition: service_healthy + + temporal-db: + image: postgres:16 + environment: + POSTGRES_PASSWORD: ${TEMPORAL_POSTGRES_PASSWORD} + POSTGRES_USER: ${TEMPORAL_POSTGRES_USER:-temporal} + volumes: + - temporal-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${TEMPORAL_POSTGRES_USER:-temporal}"] + interval: 5s + timeout: 5s + retries: 5 + +volumes: + cozo_data: + cozo_backup: + litellm-db-data: + litellm-redis-data: + temporal-db-data: \ No newline at end of file diff --git a/deploy/docker-compose.single-tenant-gpu-managed.yml b/deploy/docker-compose.single-tenant-gpu-managed.yml new file mode 100644 index 000000000..eda0f371d --- /dev/null +++ b/deploy/docker-compose.single-tenant-gpu-managed.yml @@ -0,0 +1,80 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:gpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] + + agents-api: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: localhost + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + +volumes: + cozo_data: + cozo_backup: \ No newline at end of file diff --git a/deploy/docker-compose.single-tenant-gpu-selfhosted.yml b/deploy/docker-compose.single-tenant-gpu-selfhosted.yml new file mode 100644 index 000000000..c6f42fa98 --- /dev/null +++ b/deploy/docker-compose.single-tenant-gpu-selfhosted.yml @@ -0,0 +1,145 @@ +version: '3.8' + +services: + memory-store: + image: julepai/memory-store:${TAG} + environment: + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_PORT: 9070 + COZO_MNT_DIR: /data + COZO_BACKUP_DIR: /backup + volumes: + - cozo_data:/data + - cozo_backup:/backup + ports: + - "9070:9070" + + gateway: + image: julepai/gateway:${TAG} + environment: + GATEWAY_PORT: 80 + JWT_SHARED_KEY: ${JWT_SHARED_KEY} + AGENTS_API_URL: http://agents-api:8080 + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + TRAEFIK_LOG_LEVEL: INFO + ports: + - "80:80" + + embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:gpu-1.5 + environment: + MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + OPENAI_API_KEY: ${OPENAI_API_KEY} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + GROQ_API_KEY: ${GROQ_API_KEY} + CLOUDFLARE_API_KEY: ${CLOUDFLARE_API_KEY} + CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID} + NVIDIA_NIM_API_KEY: ${NVIDIA_NIM_API_KEY} + GITHUB_API_KEY: ${GITHUB_API_KEY} + VOYAGE_API_KEY: ${VOYAGE_API_KEY} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} + TRUNCATE_EMBED_TEXT: True + volumes: + - ~/.cache/huggingface/hub:/data + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] + + agents-api: + image: julepai/agents-api:${TAG} + environment: + AGENTS_API_KEY: ${AGENTS_API_KEY} + AGENTS_API_KEY_HEADER_NAME: Authorization + AGENTS_API_HOSTNAME: localhost + AGENTS_API_PUBLIC_PORT: 80 + AGENTS_API_PROTOCOL: http + COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN} + COZO_HOST: http://memory-store:9070 + DEBUG: False + EMBEDDING_MODEL_ID: Alibaba-NLP/gte-large-en-v1.5 + INTEGRATION_SERVICE_URL: http://integrations:8000 + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_URL: http://litellm:4000 + SUMMARIZATION_MODEL_NAME: gpt-4-turbo + TEMPORAL_ENDPOINT: temporal:7233 + TEMPORAL_NAMESPACE: default + TEMPORAL_TASK_QUEUE: julep-task-queue + TEMPORAL_WORKER_URL: temporal:7233 + WORKER_URL: temporal:7233 + ports: + - "8080:8080" + depends_on: + memory-store: + condition: service_started + + litellm: + image: ghcr.io/berriai/litellm-database:main-v1.46.6 + environment: + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + DATABASE_URL: postgresql://${LITELLM_POSTGRES_USER:-llmproxy}:${LITELLM_POSTGRES_PASSWORD}@litellm-db:5432/${LITELLM_POSTGRES_DB:-litellm}?sslmode=prefer + REDIS_URL: redis://default:${LITELLM_REDIS_PASSWORD}@litellm-redis:6379 + volumes: + - ./litellm-config.yaml:/app/config.yaml + - .keys:/app/.keys:ro + ports: + - "4000:4000" + depends_on: + litellm-db: + condition: service_healthy + litellm-redis: + condition: service_started + + litellm-db: + image: postgres:16 + environment: + POSTGRES_DB: ${LITELLM_POSTGRES_DB:-litellm} + POSTGRES_USER: ${LITELLM_POSTGRES_USER:-llmproxy} + POSTGRES_PASSWORD: ${LITELLM_POSTGRES_PASSWORD} + volumes: + - litellm-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${LITELLM_POSTGRES_USER:-llmproxy}"] + interval: 5s + timeout: 5s + retries: 5 + + litellm-redis: + image: redis:6 + command: redis-server --requirepass ${LITELLM_REDIS_PASSWORD} + volumes: + - litellm-redis-data:/data + + temporal: + image: temporalio/auto-setup:1.25 + environment: + - DB=postgresql + - DB_PORT=5432 + - POSTGRES_USER=${TEMPORAL_POSTGRES_USER:-temporal} + - POSTGRES_PWD=${TEMPORAL_POSTGRES_PASSWORD} + - POSTGRES_SEEDS=temporal-db + depends_on: + temporal-db: + condition: service_healthy + + temporal-db: + image: postgres:16 + environment: + POSTGRES_PASSWORD: ${TEMPORAL_POSTGRES_PASSWORD} + POSTGRES_USER: ${TEMPORAL_POSTGRES_USER:-temporal} + volumes: + - temporal-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${TEMPORAL_POSTGRES_USER:-temporal}"] + interval: 5s + timeout: 5s + retries: 5 + +volumes: + cozo_data: + cozo_backup: + litellm-db-data: + litellm-redis-data: + temporal-db-data: \ No newline at end of file