diff --git a/poetry.lock b/poetry.lock index 41822a4..ee6a2f1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "airium" @@ -458,27 +458,44 @@ files = [ [[package]] name = "curies" -version = "0.7.10" -description = "Idiomatic conversion between URIs and compact URIs (CURIEs)." +version = "0.9.2" +description = "Idiomatic conversion between URIs and compact URIs (CURIEs)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "curies-0.7.10-py3-none-any.whl", hash = "sha256:ad80f420dd76b6f3e921a245370ff6ab7473c48c29c17254970c03cd2e58af5f"}, - {file = "curies-0.7.10.tar.gz", hash = "sha256:98a7ceb94710fab3a02727a7f85ba0719dd22be5fc8b5f2ad1d7d4cfc47d64ce"}, + {file = "curies-0.9.2-py3-none-any.whl", hash = "sha256:102ab6c3e55394744019eeac3b4775c47178a6023cdbb58f58f920b5f571fcc3"}, + {file = "curies-0.9.2.tar.gz", hash = "sha256:938527aab5f6d2d952297848d73ef7d715f4a45d5c9904653e47d33ae60d385f"}, ] [package.dependencies] -pydantic = "*" +pydantic = ">=2.0" pytrie = "*" -requests = "*" +typing-extensions = "*" [package.extras] -docs = ["sphinx", "sphinx-automodapi", "sphinx-rtd-theme"] +docs = ["sphinx (>=8)", "sphinx-rtd-theme (>=3.0)", "sphinx_automodapi"] fastapi = ["defusedxml", "fastapi", "httpx", "python-multipart", "uvicorn"] flask = ["defusedxml", "flask"] pandas = ["pandas"] rdflib = ["rdflib"] -tests = ["coverage", "pytest"] +tests = ["coverage", "pytest", "requests"] + +[[package]] +name = "deepl" +version = "1.20.0" +description = "Python library for the DeepL API." +optional = false +python-versions = "<4,>=3.6.2" +files = [ + {file = "deepl-1.20.0-py3-none-any.whl", hash = "sha256:69f93a26d70bff6e2204a71f2e601c4b826cbad1ff6f0e3065cbf5d17a0662d9"}, + {file = "deepl-1.20.0.tar.gz", hash = "sha256:2eae46d8f0279b00bce4686ec018bb5757a0ad0cc036616ef00ad12f9758955b"}, +] + +[package.dependencies] +requests = ">=2,<3" + +[package.extras] +keyring = ["keyring (>=23.4.1,<24.0.0)"] [[package]] name = "defusedxml" @@ -1533,13 +1550,13 @@ requests = "*" [[package]] name = "llm" -version = "0.13.1" +version = "0.19.1" description = "A CLI utility and Python library for interacting with Large Language Models, including OpenAI, PaLM and local models installed on your own machine." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "llm-0.13.1-py3-none-any.whl", hash = "sha256:05e468e3723097a07dba9d2b87d05f2359e7e436e8b8bd6743074021f4d8169c"}, - {file = "llm-0.13.1.tar.gz", hash = "sha256:fadda395c273cfd199886f41b6380eb37106284754cb21601233217ae764d4f7"}, + {file = "llm-0.19.1-py3-none-any.whl", hash = "sha256:6450fe6ab7b844365da21a8dfacf03d1e26730109f487aec456b872ffc85ae63"}, + {file = "llm-0.19.1.tar.gz", hash = "sha256:64f0c9500ec26a7de61a3a07b1f0f1cdd333a753c4a7aba7791b4ed3cd54117f"}, ] [package.dependencies] @@ -1548,16 +1565,17 @@ click-default-group = ">=1.2.3" openai = ">=1.0" pip = "*" pluggy = "*" +puremagic = "*" pydantic = ">=1.10.2" pyreadline3 = {version = "*", markers = "sys_platform == \"win32\""} python-ulid = "*" PyYAML = "*" setuptools = "*" sqlite-migrate = ">=0.1a2" -sqlite-utils = ">=3.35.0" +sqlite-utils = ">=3.37" [package.extras] -test = ["black (>=24.1.0)", "cogapp", "mypy", "numpy", "pytest", "pytest-httpx", "ruff", "types-PyYAML", "types-click", "types-setuptools"] +test = ["black (>=24.1.0)", "cogapp", "mypy (>=1.10.0)", "numpy", "pytest", "pytest-asyncio", "pytest-httpx (>=0.33.0)", "ruff", "types-PyYAML", "types-click", "types-setuptools"] [[package]] name = "lxml" @@ -2574,6 +2592,17 @@ fastobo = ">=0.12.2,<0.13.0" networkx = ">=2.3,<4.0" python-dateutil = ">=2.8,<3.0" +[[package]] +name = "puremagic" +version = "1.28" +description = "Pure python implementation of magic file detection" +optional = false +python-versions = "*" +files = [ + {file = "puremagic-1.28-py3-none-any.whl", hash = "sha256:e16cb9708ee2007142c37931c58f07f7eca956b3472489106a7245e5c3aa1241"}, + {file = "puremagic-1.28.tar.gz", hash = "sha256:195893fc129657f611b86b959aab337207d6df7f25372209269ed9e303c1a8c0"}, +] + [[package]] name = "pycodestyle" version = "2.12.1" @@ -4398,13 +4427,13 @@ files = [ [[package]] name = "xmltodict" -version = "0.13.0" +version = "0.14.2" description = "Makes working with XML feel like you are working with JSON" optional = false -python-versions = ">=3.4" +python-versions = ">=3.6" files = [ - {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, - {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, + {file = "xmltodict-0.14.2-py2.py3-none-any.whl", hash = "sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac"}, + {file = "xmltodict-0.14.2.tar.gz", hash = "sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553"}, ] [[package]] @@ -4432,4 +4461,4 @@ docs = [] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "9cb522a65377231b892776ac9a2a0f9c965c6d24ae4679d70f31c86e316efa88" +content-hash = "8bfa8fb66b8f81d671a7e836e2fc23c6969901a04e51cb7810c120e3ffa745e1" diff --git a/pyproject.toml b/pyproject.toml index 1070b93..c7c1831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,17 +11,18 @@ python = "^3.11" click = "*" importlib-metadata = ">=4.8.0" rdflib = ">=6.3.2" -jsonasobj2 = "^1.0.4" -xmltodict = "^0.13.0" -pandas = "^2.0.2" -oaklib = "^0.6.0" -tabulate = "^0.9.0" -llm = "^0.13.1" +jsonasobj2 = ">=1.0.4" +xmltodict = ">=0.13.0" +pandas = ">=2.0.2" +deepl = ">=1.20.0" +oaklib = ">=0.6.0" +tabulate = ">=0.9.0" +llm = ">=0.19.1" python-dotenv = "^1.0.1" -curies = "^0.7.7" -sssom = "^0.4.4" -linkml-runtime = "^1.7.1" -linkml = "^1.7.4" +curies = ">=0.7.7" +sssom = ">=0.4.4" +linkml-runtime = ">=1.7.1" +linkml = ">=1.7.4" [tool.poetry.group.docs.dependencies] mkdocs = "^1.4.2" diff --git a/src/babelon/translate.py b/src/babelon/translate.py index 45f4b60..2e5c00c 100644 --- a/src/babelon/translate.py +++ b/src/babelon/translate.py @@ -6,6 +6,7 @@ import string from typing import Dict, List +import deepl import llm import pandas as pd @@ -38,7 +39,7 @@ def translate(self, text, target_language): class OpenAITranslator(Translator): """A specific translator class that uses GPT-4 for translation.""" - def __init__(self, model="gpt-4-turbo-preview"): + def __init__(self, model="gpt-4o"): """Instantiate GPT4 translator.""" self.model = llm.get_model(model) self.model.key = os.environ["OPENAI_API_KEY"] @@ -82,6 +83,40 @@ def translate(self, text_to_translate, language_code): return "" +class DeepLTranslator(Translator): + """A specific translator class that uses DeepL API for translation.""" + + def __init__(self): + """Instantiate DeepL translator with an API key.""" + self.api_key = os.environ["DEEPL_API_KEY"] + self.translator = deepl.Translator(self.api_key) + + def model_name(self): + """Return the unique name of the translation model.""" + return "DeepL" + + def translate(self, text_to_translate, language_code): + """ + Translate text using DeepL API. + + Args: + text_to_translate (str): The text to be translated. + language_code (str): The target language code (e.g., 'DE' for German). + + Returns: + str: The translated text, or an empty string if translation fails. + """ + result = self.translator.translate_text( + text_to_translate, target_lang=language_code.upper() + ) + translation = result.text + if translation: + print(f"Translation: {translation}") + return translation + else: + return "" + + def _get_translation_language(translation_language_df, default_language="en"): if translation_language_df: return translation_language_df @@ -106,9 +141,11 @@ def get_translator_model(model="gpt-4"): ValueError: If the model does not exist. """ if model == "gpt-4": - return OpenAITranslator("gpt-4-turbo-preview") + return OpenAITranslator("gpt-4o") elif model == "gpt-3.5": return OpenAITranslator("gpt-3.5-turbo") + elif model == "deepl": + return DeepLTranslator() else: try: translator = OpenAITranslator(model) diff --git a/tests/test_translate.py b/tests/test_translate.py index 1e51048..c43dbfa 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -7,6 +7,7 @@ from oaklib import get_adapter from babelon.translate import ( + DeepLTranslator, OpenAITranslator, _is_equivalent_string, prepare_translation_for_ontology, @@ -24,13 +25,21 @@ def setUp(self) -> None: """Set up the test case.""" @unittest.skipIf(not os.path.exists(env_file), "Skipping test as .env file does not exist") - def test_translate(self): + def test_translate_openai(self): """Test update_translation_profile.""" load_dotenv() translator = OpenAITranslator() translated_value = translator.translate("fever", "de") self.assertEqual("Fieber", translated_value) + @unittest.skipIf(not os.path.exists(env_file), "Skipping test as .env file does not exist") + def test_translate_deepl(self): + """Test update_translation_profile.""" + load_dotenv() + translator = DeepLTranslator() + translated_value = translator.translate("fever", "de") + self.assertEqual("Fieber", translated_value) + @unittest.skipIf(not os.path.exists(env_file), "Skipping test as .env file does not exist") def test_translate_profile(self): """Test to see if a small babelon profile can be translated."""