Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community(doc_loaders): allow any credential type in AzureAIDocumentI… #29289

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Iterator, List, Optional
from __future__ import annotations

from typing import TYPE_CHECKING, Iterator, List, Optional

from langchain_core.documents import Document

Expand All @@ -8,14 +10,17 @@
AzureAIDocumentIntelligenceParser,
)

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential


class AzureAIDocumentIntelligenceLoader(BaseLoader):
"""Load a PDF with Azure Document Intelligence."""

def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
file_path: Optional[str] = None,
url_path: Optional[str] = None,
bytes_source: Optional[bytes] = None,
Expand All @@ -24,6 +29,7 @@ def __init__(
mode: str = "markdown",
*,
analysis_features: Optional[List[str]] = None,
credentials: Optional["TokenCredential"] = None,
) -> None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we name this azure_credential, for consistency with what was done here: #28873

"""
Initialize the object for file processing with Azure Document Intelligence
Expand Down Expand Up @@ -63,6 +69,9 @@ def __init__(
List of optional analysis features, each feature should be passed
as a str that conforms to the enum `DocumentAnalysisFeature` in
`azure-ai-documentintelligence` package. Default value is None.
credentials: Optional[TokenCredential]
The credentials to use for DocumentIntelligenceClient construction, when
using credentials other than api_key (like AD).

Examples:
---------
Expand All @@ -79,6 +88,15 @@ def __init__(
assert (
file_path is not None or url_path is not None or bytes_source is not None
), "file_path, url_path or bytes_source must be provided"

assert (
api_key is not None or credentials is not None
), "Either api_key or credentials must be provided."

assert (
api_key is None or credentials is None
), "Only one of api_key or credentials should be provided."

self.file_path = file_path
self.url_path = url_path
self.bytes_source = bytes_source
Expand All @@ -90,6 +108,7 @@ def __init__(
api_model=api_model,
mode=mode,
analysis_features=analysis_features,
credentials=credentials,
)

def lazy_load(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from __future__ import annotations

import logging
from typing import Any, Iterator, List, Optional
from typing import TYPE_CHECKING, Any, Iterator, List, Optional

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential

logger = logging.getLogger(__name__)


Expand All @@ -16,17 +21,25 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
api_version: Optional[str] = None,
api_model: str = "prebuilt-layout",
mode: str = "markdown",
analysis_features: Optional[List[str]] = None,
credentials: Optional["TokenCredential"] = None,
):
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature
from azure.core.credentials import AzureKeyCredential

kwargs = {}

if api_key is None and credentials is None:
raise ValueError("Either api_key or credentials must be provided.")

if api_key and credentials:
raise ValueError("Only one of api_key or credentials should be provided.")

if api_version is not None:
kwargs["api_version"] = api_version

Expand All @@ -49,7 +62,7 @@ def __init__(

self.client = DocumentIntelligenceClient(
endpoint=api_endpoint,
credential=AzureKeyCredential(api_key),
credential=credentials or AzureKeyCredential(api_key),
headers={"x-ms-useragent": "langchain-parser/1.0.0"},
features=analysis_features,
**kwargs,
Expand Down
Loading