Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix support for pyproject.toml dependencies with multiple selectors #597

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions grayskull/strategy/py_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,18 +696,30 @@ def download_sdist_pkg(sdist_url: str, dest: str, name: str | None = None):

def merge_deps_toml_setup(setup_deps: list, toml_deps: list) -> list:
re_split = re.compile(r"\s+|>|=|<|~|!")
all_deps = defaultdict(list)
for dep in toml_deps + setup_deps:
if dep.strip():
dep_name = re_split.split(dep)[0]
if dep_name not in all_deps:
if dep_name.replace("_", "-") in all_deps:
dep_name = dep_name.replace("_", "-")
elif dep_name.replace("-", "_") in all_deps:
dep_name = dep_name.replace("-", "_")
all_deps[dep_name].append(dep)

return [deps[0] for deps in all_deps.values()]
# drop any empty deps
setup_deps = [dep for dep in setup_deps if dep.strip()]
toml_deps = [dep for dep in toml_deps if dep.strip()]

# get dep names
toml_dep_names = [re_split.split(dep)[0] for dep in toml_deps]
setup_dep_names = [re_split.split(dep)[0] for dep in setup_deps]

# prefer toml over setup; only add setup deps if not found in toml
merged_deps = toml_deps
for dep_name, dep in zip(setup_dep_names, setup_deps):
if not dep_name.strip():
continue
alternatives = [
dep_name,
dep_name.replace("_", "-"),
dep_name.replace("-", "_"),
]
found = any([alternative in toml_dep_names for alternative in alternatives])
# only add the setup dep if no alternative name was found
if not found:
merged_deps.append(dep)

return merged_deps


def merge_setup_toml_metadata(setup_metadata: dict, pyproject_metadata: dict) -> dict:
Expand Down
54 changes: 39 additions & 15 deletions grayskull/strategy/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,18 +185,16 @@ def merge_requires_dist(pypi_metadata: dict, sdist_metadata: dict) -> list:
:param sdist_metadata: sdist metadata
:return: list with all requirements
"""
all_deps = sdist_metadata.get("install_requires") or []
all_deps += pypi_metadata.get("requires_dist") or []

re_search = re.compile(r";\s*extra")
all_deps = [pkg for pkg in all_deps if not re_search.search(pkg)]
current_pkg = pypi_metadata.get("name", "")

requires_dist = []
pypi_deps_name = set()
with progressbar_with_status(len(all_deps)) as bar:
for pos, sdist_pkg in enumerate(all_deps, 1):
match_deps = RE_DEPS_NAME.match(sdist_pkg)
def _get_pkg_names(deps, current_pkg, bar, pos):
"""
Get pkg names for each dep, dropping deps where no pkg name is found
"""
new_deps = []
pkg_names = []
for dep in deps:
pos += 1
match_deps = RE_DEPS_NAME.match(dep)
if not match_deps:
bar.update(pos)
continue
Expand All @@ -205,11 +203,32 @@ def merge_requires_dist(pypi_metadata: dict, sdist_metadata: dict) -> list:
bar.update(pos, pkg_name=pkg_name)
if current_pkg and current_pkg == pkg_name:
continue
if pkg_name in pypi_deps_name:
continue
new_deps.append(dep.replace(match_deps, pkg_name))
pkg_names.append(pkg_name)
return pkg_names, new_deps

sdist_deps = sdist_metadata.get("install_requires") or []
pypi_deps = pypi_metadata.get("requires_dist") or []

re_search = re.compile(r";\s*extra")
sdist_deps = [pkg for pkg in sdist_deps if not re_search.search(pkg)]
pypi_deps = [pkg for pkg in pypi_deps if not re_search.search(pkg)]
current_pkg = pypi_metadata.get("name", "")

count = len(sdist_deps) + len(pypi_deps)
# progress bar here because calling normalize_pkg_name() on each package
# takes awhile
with progressbar_with_status(count) as bar:
pos = 0
sdist_pkgs, sdist_deps = _get_pkg_names(sdist_deps, current_pkg, bar, pos)
pypi_pkgs, pypi_deps = _get_pkg_names(pypi_deps, current_pkg, bar, pos)

# prefer sdist over pypi; only add pypi deps if not found in sdist
requires_dist = sdist_deps
for pkg_name, dep in zip(pypi_pkgs, pypi_deps):
if pkg_name not in sdist_pkgs:
requires_dist.append(dep)

pypi_deps_name.add(pkg_name)
requires_dist.append(sdist_pkg.replace(match_deps, pkg_name))
return requires_dist


Expand Down Expand Up @@ -322,8 +341,13 @@ def get_pypi_metadata(config: Configuration) -> PypiMetadata:
def get_run_req_from_requires_dist(requires_dist: list, config: Configuration) -> list:
"""Get the run requirements looking for the `requires_dist` key
present in the metadata"""
re_selector = re.compile(r"\s+#\s+\[.*\]", re.DOTALL)
run_req = []
for req in requires_dist:
if re_selector.search(req):
# if there's already a selector, make sure we're arch
config.is_arch = True

list_raw_requirements = req.split(";")
selector = ""
if len(list_raw_requirements) > 1:
Expand Down
24 changes: 19 additions & 5 deletions grayskull/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,32 +128,41 @@ def rm_duplicated_deps(all_requirements: list | set | None) -> list | None:
# and underscores converted to dashes. The value is the requirement itself,
# as it should be added.
# (This is order-preserving since dicts are ordered by first insertion.)
new_reqs: dict[str, str] = {}
new_reqs: dict[tuple[str, str], str] = {}
re_split = re.compile(r"\s+(|>|=|<|~|!|#)+")
for dep in all_requirements:
if dep.strip().startswith(("{{", "<{")):
new_reqs[dep] = dep
continue
dep_name, *constrains = re_split.split(dep.strip())
dep_name = dep_name.strip()

if "#" in dep:
selector = dep.split("#")[-1]
else:
selector = ""

constrains = [
c.strip()
for c in constrains
if c.strip() not in {"=*", "==*", "*", "*.*", "*.*.*", ""}
]
canonicalized = dep_name.replace("_", "-").lower()
constrains.insert(0, dep_name)
if canonicalized in new_reqs:
# a canonicalized dependency is only redundant if it also has the same
# selector as a pervious dependency
key = (canonicalized, selector)
if key in new_reqs:
# In order to break ties deterministically, we prioritize the requirement
# which is alphanumerically lowest. This happens to prioritize the "-"
# character over "_".
# Example: given "importlib_metadata" and "importlib-metadata", we will
# keep "importlib-metadata" because it is alphabetically lower.
previous_req = new_reqs[canonicalized]
previous_req = new_reqs[key]
if len(dep) > len(previous_req) or "-" in dep_name:
new_reqs[canonicalized] = " ".join(constrains)
new_reqs[key] = " ".join(constrains)
else:
new_reqs[canonicalized] = " ".join(constrains)
new_reqs[key] = " ".join(constrains)
return [re.sub(r"\s+(#)", " \\1", v.strip()) for v in new_reqs.values()]


Expand All @@ -168,6 +177,7 @@ def format_dependencies(all_dependencies: list, name: str) -> list:
formatted_dependencies = []
re_deps = re.compile(r"^\s*([\.a-zA-Z0-9_-]+)\s*(.*)\s*$", re.MULTILINE | re.DOTALL)
re_remove_space = re.compile(r"([<>!=]+)\s+")
re_selector = re.compile(r"\s+#\s+\[.*\]", re.DOTALL)
re_remove_tags = re.compile(r"\s*(\[.*\])", re.DOTALL)
re_remove_comments = re.compile(r"\s+#.*", re.DOTALL)
for req in all_dependencies:
Expand All @@ -184,6 +194,10 @@ def format_dependencies(all_dependencies: list, name: str) -> list:
if len(match_req) > 1:
deps_name = " ".join(match_req)
deps_name = re_remove_space.sub(r"\1", deps_name.strip())
if re_selector.search(deps_name):
# don't want to remove selectors
formatted_dependencies.append(deps_name)
continue
deps_name = re_remove_tags.sub(r" ", deps_name.strip())
deps_name = re_remove_comments.sub("", deps_name)
formatted_dependencies.append(deps_name.strip())
Expand Down
162 changes: 162 additions & 0 deletions tests/test_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,119 @@ def test_merge_pypi_sdist_metadata():
)


def test_pypi_metadata_constraints_for_python_versions():
config = Configuration(name="apache-airflow-providers-trino", version="6.0.0")
pypi_metadata = get_pypi_metadata(config)
assert sorted(pypi_metadata["requires_dist"]) == sorted(
[
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.9.0",
'pandas<2.2,>=1.5.3; python_version < "3.9"',
'pandas<2.2,>=2.1.2; python_version >= "3.9"',
"trino>=0.318.0",
'apache-airflow-providers-google; extra == "google"',
'apache-airflow-providers-openlineage; extra == "openlineage"',
]
)

config = Configuration(name="databricks-sql-connector", version="3.7.0")
pypi_metadata = get_pypi_metadata(config)
assert sorted(pypi_metadata["requires_dist"]) == sorted(
[
'alembic<2.0.0,>=1.0.11; extra == "alembic"',
"lz4<5.0.0,>=4.0.2",
'numpy>=1.16.6; python_version >= "3.8" and python_version < "3.11"',
'numpy>=1.23.4; python_version >= "3.11"',
"oauthlib<4.0.0,>=3.1.0",
"openpyxl<4.0.0,>=3.0.10",
'pandas<2.3.0,>=1.2.5; python_version >= "3.8"',
"pyarrow>=14.0.1",
"requests<3.0.0,>=2.18.1",
'sqlalchemy>=2.0.21; extra == "sqlalchemy" or extra == "alembic"',
"thrift<0.21.0,>=0.16.0",
"urllib3>=1.26",
]
)


def test_sdist_metadata_from_toml_project_dependencies():
config = Configuration(name="apache-airflow-providers-trino", version="6.0.0")
pypi_metadata = get_pypi_metadata(config)
sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config)
assert sorted(sdist_metadata["install_requires"]) == sorted(
[
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.9.0",
'pandas>=1.5.3,<2.2;python_version<"3.9"',
'pandas>=2.1.2,<2.2;python_version>="3.9"',
"trino>=0.318.0",
"python ~=3.9",
]
)


def test_sdist_metadata_from_toml_poetry_dependencies():
config = Configuration(name="databricks-sql-connector", version="3.7.0")
pypi_metadata = get_pypi_metadata(config)
sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config)
assert sorted(sdist_metadata["install_requires"]) == sorted(
[
"python >=3.8.0,<4.0.0",
"thrift >=0.16.0,<0.21.0",
"pandas >=1.2.5,<2.3.0 # [py>=38]",
"pyarrow >=14.0.1",
"lz4 >=4.0.2,<5.0.0",
"requests >=2.18.1,<3.0.0",
"oauthlib >=3.1.0,<4.0.0",
"numpy >=1.16.6 # [py>=38 and py<311]",
"numpy >=1.23.4 # [py>=311]",
"openpyxl >=3.0.10,<4.0.0",
"urllib3 >=1.26",
]
)


def test_merge_pypi_sdist_metadata_from_toml():
# tests merging pyproject.toml dependencies from poetry with pypi data,
# including multiple numpy constraints with python version selectors
config = Configuration(name="databricks-sql-connector", version="3.7.0")
pypi_metadata = get_pypi_metadata(config)
sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config)
merged_data = merge_pypi_sdist_metadata(pypi_metadata, sdist_metadata, config)
assert sorted(merged_data["requires_dist"]) == sorted(
[
"python >=3.8.0,<4.0.0",
"thrift >=0.16.0,<0.21.0",
"pandas >=1.2.5,<2.3.0 # [py>=38]",
"pyarrow >=14.0.1",
"lz4 >=4.0.2,<5.0.0",
"requests >=2.18.1,<3.0.0",
"oauthlib >=3.1.0,<4.0.0",
"numpy >=1.16.6 # [py>=38 and py<311]",
"numpy >=1.23.4 # [py>=311]",
"openpyxl >=3.0.10,<4.0.0",
"urllib3 >=1.26",
]
)

# tests merging pyproject.toml project dependencies with pypi data,
# including multiple pandas constraints with python version selectors
config = Configuration(name="apache-airflow-providers-trino", version="6.0.0")
pypi_metadata = get_pypi_metadata(config)
sdist_metadata = get_sdist_metadata(pypi_metadata["sdist_url"], config)
merged_data = merge_pypi_sdist_metadata(pypi_metadata, sdist_metadata, config)
assert sorted(merged_data["requires_dist"]) == sorted(
[
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.9.0",
'pandas>=1.5.3,<2.2;python_version<"3.9"',
'pandas>=2.1.2,<2.2;python_version>="3.9"',
"trino>=0.318.0",
"python ~=3.9",
]
)


def test_update_requirements_with_pin():
req = {
"build": ["<{ compiler('c') }}"],
Expand Down Expand Up @@ -787,6 +900,55 @@ def test_run_requirements_sdist():
]
)

# a more complex example with selectors
config = Configuration(name="apache-airflow-providers-trino", version="6.0.0")
recipe = GrayskullFactory.create_recipe("pypi", config)
assert "noarch" not in recipe["build"]
selectors = {
"python >=3.9,<4.dev0": None,
"apache-airflow-providers-common-sql >=1.20.0": None,
"apache-airflow >=2.9.0": None,
"pandas >=1.5.3,<2.2": "[py<39]",
"pandas >=2.1.2,<2.2": "[py>=39]",
"trino-python-client >=0.318.0": None,
}

for dep in recipe["requirements"]["run"]:
dep_str = str(dep)
assert dep_str in selectors
selector = selectors[dep_str]
if dep.inline_comment is None:
assert selector is None
else:
assert str(dep.inline_comment) == selector

# another example with selectors
config = Configuration(name="databricks-sql-connector", version="3.7.0")
recipe = GrayskullFactory.create_recipe("pypi", config)
assert "noarch" not in recipe["build"]
selectors = {
"python >=3.8.0,<4.0.0": None,
"thrift >=0.16.0,<0.21.0": None,
"pandas >=1.2.5,<2.3.0": "[py>=38]",
"pyarrow >=14.0.1": None,
"lz4 >=4.0.2,<5.0.0": None,
"requests >=2.18.1,<3.0.0": None,
"oauthlib >=3.1.0,<4.0.0": None,
"numpy >=1.16.6": "[py>=38 and py<311]",
"numpy >=1.23.4": "[py>=311]",
"openpyxl >=3.0.10,<4.0.0": None,
"urllib3 >=1.26": None,
}

for dep in recipe["requirements"]["run"]:
dep_str = str(dep)
assert dep_str in selectors
selector = selectors[dep_str]
if dep.inline_comment is None:
assert selector is None
else:
assert str(dep.inline_comment) == selector


def test_format_host_requirements():
assert sorted(format_dependencies(["setuptools>=40.0", "pkg2"], "pkg1")) == sorted(
Expand Down
Loading