From 3b8ecace0b86d15388d102eea50c403ba61974dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zeme=C5=82ka?= Date: Sun, 22 Dec 2024 18:02:29 +0100 Subject: [PATCH] Updated prompt to extract text and format it in Markdown, including additional visual details, instead of only describing the image. --- src/markitdown/_markitdown.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 789c1e5..ee3e59c 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -1047,7 +1047,21 @@ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: def _get_llm_description(self, local_path, extension, client, model, prompt=None): if prompt is None or prompt.strip() == "": - prompt = "Write a detailed caption for this image." + prompt = ''' + Analyze the image and extract all visible text in the original language. + Reproduce the extracted text in a structured Markdown format, preserving + any formatting such as headings, bullet points, and highlights. Ensure + the output accurately reflects the structure and style of the original + document. + + Additionally, if the image includes any visual elements (e.g., diagrams, + logos, or specific layouts) that cannot be represented directly in Markdown, + describe them in plain text as part of the Markdown document under a section + titled "Visual Notes." + + Output only the converted Markdown text without any additional commentary + or explanations. + ''' data_uri = "" with open(local_path, "rb") as image_file: