Skip to content

Commit

Permalink
community(doc_loaders): allow any credential type in Azure DocumentIntel
Browse files Browse the repository at this point in the history
  • Loading branch information
ianchi committed Jan 18, 2025
1 parent 184ea8a commit 9904324
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Iterator, List, Optional
from __future__ import annotations

from typing import TYPE_CHECKING, Iterator, List, Optional

from langchain_core.documents import Document

Expand All @@ -8,14 +10,17 @@
AzureAIDocumentIntelligenceParser,
)

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential


class AzureAIDocumentIntelligenceLoader(BaseLoader):
"""Load a PDF with Azure Document Intelligence."""

def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
file_path: Optional[str] = None,
url_path: Optional[str] = None,
bytes_source: Optional[bytes] = None,
Expand All @@ -24,6 +29,7 @@ def __init__(
mode: str = "markdown",
*,
analysis_features: Optional[List[str]] = None,
credentials: Optional["TokenCredential"] = None,
) -> None:
"""
Initialize the object for file processing with Azure Document Intelligence
Expand Down Expand Up @@ -63,6 +69,9 @@ def __init__(
List of optional analysis features, each feature should be passed
as a str that conforms to the enum `DocumentAnalysisFeature` in
`azure-ai-documentintelligence` package. Default value is None.
credentials: Optional[TokenCredential]
The credentials to use for DocumentIntelligenceClient construction, when
using credentials other than api_key (like AD).
Examples:
---------
Expand All @@ -79,6 +88,15 @@ def __init__(
assert (
file_path is not None or url_path is not None or bytes_source is not None
), "file_path, url_path or bytes_source must be provided"

assert (
api_key is not None or credentials is not None
), "Either api_key or credentials must be provided."

assert (
api_key is None or credentials is None
), "Only one of api_key or credentials should be provided."

self.file_path = file_path
self.url_path = url_path
self.bytes_source = bytes_source
Expand All @@ -90,6 +108,7 @@ def __init__(
api_model=api_model,
mode=mode,
analysis_features=analysis_features,
credentials=credentials,
)

def lazy_load(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from __future__ import annotations

import logging
from typing import Any, Iterator, List, Optional
from typing import TYPE_CHECKING, Any, Iterator, List, Optional

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential

logger = logging.getLogger(__name__)


Expand All @@ -16,17 +21,25 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
api_version: Optional[str] = None,
api_model: str = "prebuilt-layout",
mode: str = "markdown",
analysis_features: Optional[List[str]] = None,
credentials: Optional["TokenCredential"] = None,
):
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature
from azure.core.credentials import AzureKeyCredential

kwargs = {}

if api_key is None and credentials is None:
raise ValueError("Either api_key or credentials must be provided.")

if api_key and credentials:
raise ValueError("Only one of api_key or credentials should be provided.")

if api_version is not None:
kwargs["api_version"] = api_version

Expand All @@ -49,7 +62,7 @@ def __init__(

self.client = DocumentIntelligenceClient(
endpoint=api_endpoint,
credential=AzureKeyCredential(api_key),
credential=credentials or AzureKeyCredential(api_key),
headers={"x-ms-useragent": "langchain-parser/1.0.0"},
features=analysis_features,
**kwargs,
Expand Down

0 comments on commit 9904324

Please sign in to comment.