Skip to content

Commit

Permalink
Optimise tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pprados committed Jan 17, 2025
1 parent 23a73a9 commit 52e1a02
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ class PyMuPDFParser(BaseBlobParser):
parser = PyMuPDFParser(
# password = None,
mode = "single",
pages_delimitor = "\n\f",
pages_delimiter = "\n\f",
# extract_images = True,
# images_parser = TesseractBlobParser(),
# extract_tables="markdown",
Expand Down
2 changes: 1 addition & 1 deletion libs/community/langchain_community/document_loaders/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ def __init__(
pages_delimiter: str = _DEFAULT_PAGES_DELIMITER,
extract_images: bool = False,
images_parser: Optional[BaseImageBlobParser] = None,
images_inner_format:str="text",
images_inner_format: str = "text",
extract_tables: Union[Literal["csv", "markdown", "html"], None] = None,
headers: Optional[dict] = None,
extract_tables_settings: Optional[dict[str, Any]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,31 @@ def _analyze_image(self, img: "Image") -> str:


@pytest.mark.parametrize(
"mode",
["single", "page"],
"mode,image_parser",
[("single", EmptyImageBlobParser()), ("page", None)],
)
@pytest.mark.parametrize(
"image_parser",
[EmptyImageBlobParser(), None],
"parser_factory,params",
[
("PyMuPDFParser", {}),
],
)
@pytest.mark.requires("pillow")
def test_mode_and_extract_images_variations(
parser_factory: str,
params: dict,
mode: str,
image_parser: BaseImageBlobParser,
) -> None:
_test_matrix(
parser_factory,
params,
mode,
image_parser,
images_inner_format="text",
)


@pytest.mark.parametrize(
"images_inner_format",
["text", "markdown-img", "html-img"],
Expand All @@ -161,7 +179,24 @@ def _analyze_image(self, img: "Image") -> str:
],
)
@pytest.mark.requires("pillow")
def test_mode_and_extract_images_variations(
def test_mode_and_image_formats_variations(
parser_factory: str,
params: dict,
images_inner_format: str,
) -> None:
mode = "single"
image_parser = EmptyImageBlobParser()

_test_matrix(
parser_factory,
params,
mode,
image_parser,
images_inner_format,
)


def _test_matrix(
parser_factory: str,
params: dict,
mode: str,
Expand Down

0 comments on commit 52e1a02

Please sign in to comment.