diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py index 8894364b24fcb..28a1e4137bc15 100644 --- a/libs/partners/openai/langchain_openai/embeddings/base.py +++ b/libs/partners/openai/langchain_openai/embeddings/base.py @@ -99,23 +99,90 @@ def _process_batched_chunked_embeddings( class OpenAIEmbeddings(BaseModel, Embeddings): - """OpenAI embedding models. + """OpenAI embedding model integration. - To use, you should have the - environment variable ``OPENAI_API_KEY`` set with your API key or pass it - as a named parameter to the constructor. + Setup: + Install ``langchain_openai`` and set environment variable ``OPENAI_API_KEY``. - In order to use the library with Microsoft Azure endpoints, use - AzureOpenAIEmbeddings. + .. code-block:: bash - Example: + pip install -U langchain_openai + export OPENAI_API_KEY="your-api-key" + + Key init args — embedding params: + model: str + Name of OpenAI model to use. + dimensions: Optional[int] = None + The number of dimensions the resulting output embeddings should have. + Only supported in `text-embedding-3` and later models. + + Key init args — client params: + api_key: Optional[SecretStr] = None + OpenAI API key. + organization: Optional[str] = None + OpenAI organization ID. If not passed in will be read + from env var OPENAI_ORG_ID. + max_retries: int = 2 + Maximum number of retries to make when generating. + request_timeout: Optional[Union[float, Tuple[float, float], Any]] = None + Timeout for requests to OpenAI completion API + + See full list of supported init args and their descriptions in the params section. + + Instantiate: .. code-block:: python from langchain_openai import OpenAIEmbeddings - model = OpenAIEmbeddings(model="text-embedding-3-large") + embed = OpenAIEmbeddings( + model="text-embedding-3-large", + # With the `text-embedding-3` class + # of models, you can specify the size + # of the embeddings you want returned. + # dimensions=1024 + ) + + Embed multiple texts: + + .. code-block:: python + + vectors = embeddings.embed_documents(['hello', 'goodbye']) + # Showing only the first 3 coordinates + print(len(vectors)) + print(vectors[0][:3]) + + .. code-block:: python + + 2 + [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] + + Embed single text: + + .. code-block:: python + + input_text = "The meaning of life is 42" + vector = embeddings.embed_query('hello') + print(vector[:3]) + + .. code-block:: python + + [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] + + Async: + .. code-block:: python + + await embed.aembed_query(input_text) + print(vector[:3]) + + # multiple: + # await embed.aembed_documents(input_texts) + + .. code-block:: python + + [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188] """ + client: Any = Field(default=None, exclude=True) #: :meta private: async_client: Any = Field(default=None, exclude=True) #: :meta private: model: str = "text-embedding-ada-002"