Skip to content

Commit

Permalink
Merge pull request #27 from ic-xu/main
Browse files Browse the repository at this point in the history
fix: Fix sequence item 2: expected str instance, NoneType found exception when table output is set to markdown.
  • Loading branch information
Filimoa authored Apr 19, 2024
2 parents 0ae1fa4 + 9cdd611 commit 106465d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/openparse/tables/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ def _ingest_with_pymupdf(
tabs = page.find_tables()
for i, tab in enumerate(tabs.tables):
headers = tab.header.names
for j, header in enumerate(headers):
if header is None:
headers[j] = ""
else:
headers[j] = header.strip()
lines = tab.extract()

if parsing_args.table_output_format == "str":
Expand Down
2 changes: 1 addition & 1 deletion src/openparse/tables/pymupdf/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def output_to_markdown(headers: List[str], rows: List[List[str]]) -> str:
markdown_output += "|---" * len(headers) + "|\n"

for row in rows:
processed_row = [" " if cell in [None, ""] else cell for cell in row]
processed_row = [" " if cell in [None, ""] else cell.replace("\n", " ") for cell in row]
markdown_output += "| " + " | ".join(processed_row) + " |\n"

return markdown_output
Expand Down

0 comments on commit 106465d

Please sign in to comment.