From ddb36fd24f8b8333dca0ee22b1412cf1f00d370c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 18 Jul 2024 13:19:09 -0500 Subject: [PATCH] fix return-before-assignment bug in notebook test script, add pre-commit, drop cuspatial notebooks (#690) CI has been failing here for a few weeks, as a result of some failing `cuspatial` notebooks. As described in https://github.com/rapidsai/cuspatial/issues/1406, I observed those notebooks failing like this: ```text cannot access local variable 'execution_time' where it is not associated with a value ``` Turns out that that was coming from the `test_notebooks.py` script in this repo. It calls `return execution_time` unconditionally, even though it's possible for that variable to not exist. This fixes that, and proposes adding a `pre-commit` configuration and corresponding CI job, to help catch such things earlier in the future. This also proposes skipping some `cuspatial` notebooks, to get CI working. I'd hoped to address that in https://github.com/rapidsai/cuspatial/pull/1407, but most `cuspatial` maintainers are busy or unavailable this week. I think we should get CI working here again and deal with the `cuspatial` notebooks as a separate concern. ## Notes for Reviewers Any other changes you see in files here are the result of running `pre-commit run --all-files` (with some manual fixes applied to fix `ruff` warnings). Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Ray Douglass (https://github.com/raydouglass) - https://github.com/jakirkham - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/docker/pull/690 --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/workflows/pr.yml | 13 +++++++++ .pre-commit-config.yaml | 12 +++++++++ context/test_notebooks.py | 17 ++++++------ pyproject.toml | 40 ++++++++++++++++++++++++++++ raft-ann-bench/README.md | 2 +- raft-ann-bench/cpu/Dockerfile | 1 - 7 files changed, 76 insertions(+), 11 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 pyproject.toml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 7b7e3f62..3923e4d8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -21,7 +21,7 @@ A clear and concise description of what you expected to happen. - Method of cuDF install: [conda, Docker, or from source] - If method of install is [Docker], provide `docker pull` & `docker run` commands used - Please run and attach the output of the `cudf/print_env.sh` script to gather relevant environment details - + **Additional context** Add any other context about the problem here. diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index a29e8bfa..7d9f17c1 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -10,7 +10,20 @@ concurrency: cancel-in-progress: true jobs: + checks: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Run pre-commit + run: | + pip install pre-commit + pre-commit run --all-files docker: + needs: [checks] uses: ./.github/workflows/build-test-publish-images.yml with: build_type: pull-request diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..cb2f78eb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +--- +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.2 + hooks: + - id: ruff + args: ["--config", "pyproject.toml"] diff --git a/context/test_notebooks.py b/context/test_notebooks.py index 96538aad..7b3084fb 100755 --- a/context/test_notebooks.py +++ b/context/test_notebooks.py @@ -5,9 +5,7 @@ import sys import timeit from typing import Iterable -import nbconvert import nbformat -from datetime import datetime from nbconvert.preprocessors import ExecutePreprocessor import yaml @@ -29,12 +27,15 @@ # following nbs are marked as skipped 'cugraph/algorithms/layout/Force-Atlas2.ipynb', 'cuspatial/binary_predicates.ipynb', - 'cuspatial/cuproj_benchmark.ipynb' + 'cuspatial/cuproj_benchmark.ipynb', + # context on these being skipped: https://github.com/rapidsai/cuspatial/pull/1407 + 'cuspatial/cuspatial_api_examples.ipynb', + 'cuspatial/nyc_taxi_years_correlation.ipynb' ] def get_notebooks(directory: str) -> Iterable[str]: - for root, dirs, files in os.walk(directory): + for root, _, files in os.walk(directory): for file in files: if ( file.endswith(".ipynb") @@ -71,12 +72,12 @@ def test_notebook(notebook_file, executed_nb_file): # use nbconvert to run the notebook natively ep = ExecutePreprocessor(timeout=600, kernel_name="python3", allow_errors=True) + task_init = timeit.default_timer() try: - task_init = timeit.default_timer() - nb, nb_resources = ep.preprocess(nb, {"metadata": {"path": ""}}) - execution_time = timeit.default_timer() - task_init + nb, _ = ep.preprocess(nb, {"metadata": {"path": ""}}) except Exception as e: errors.append(e) + execution_time = timeit.default_timer() - task_init with open(executed_nb_file, "w", encoding="utf-8") as f: nbformat.write(nb, f) @@ -152,7 +153,7 @@ def test_notebook(notebook_file, executed_nb_file): print(f"Input must be a directory. Got: {ns.input}") sys.exit(1) - notebooks = sorted(list(get_notebooks(ns.input))) + notebooks = sorted(get_notebooks(ns.input)) print(f"{len(notebooks)} Notebooks to be tested:") for notebook in notebooks: print(notebook) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..20bbdee6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[tool.ruff] +target-version = "py310" + +[tool.ruff.lint] +ignore = [ + # (flake8) line too long + "E501", + # (pylint) too many branches + "PLR0912", +] +select = [ + # flake8-builtins + "A", + # flake8-bugbear + "B", + # flake8-comprehensions + "C4", + # pycodestyle + "E", + # eradicate + "ERA", + # pyflakes + "F", + # flynt + "FLY", + # perflint + "PERF", + # pygrep-hooks + "PGH", + # pylint + "PL", + # flake8-pyi + "PYI", + # flake8-return + "RET", + # ruff-exclusive checks + "RUF", + # flake8-bandit + "S", +] diff --git a/raft-ann-bench/README.md b/raft-ann-bench/README.md index 8f611f0c..33c82c46 100644 --- a/raft-ann-bench/README.md +++ b/raft-ann-bench/README.md @@ -2,7 +2,7 @@ This folder contains the dockerfiles for generating GPU and CPU RAFT ANN benchmark images. -This images are meant to enable end users of RAFT's ANN algorithms to easily run and reproduce benchmarks and comparisons between RAFT and third party libraries. +These images are meant to enable end users of RAFT's ANN algorithms to easily run and reproduce benchmarks and comparisons between RAFT and third party libraries. # Image types: diff --git a/raft-ann-bench/cpu/Dockerfile b/raft-ann-bench/cpu/Dockerfile index ca2fb84e..048bd5e3 100644 --- a/raft-ann-bench/cpu/Dockerfile +++ b/raft-ann-bench/cpu/Dockerfile @@ -60,4 +60,3 @@ RUN /home/rapids/raftannbench/get_datasets.sh CMD ["--dataset fashion-mnist-784-euclidean", "", "--algorithms hnswlib"] ENTRYPOINT ["/bin/bash", "/data/scripts/run_benchmark_preloaded_datasets.sh"] -