From 7a6d0c5f18729683a9cc1e1a51589b84cd4a463b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 18 Dec 2024 15:31:03 -0600 Subject: [PATCH] chore(tests): sync client verification tests (#1046) --- .cross_sync/README.md | 2 + .github/workflows/conformance.yaml | 12 +- .kokoro/conformance.sh | 10 +- noxfile.py | 2 +- test_proxy/README.md | 2 +- .../client_handler_data_sync_autogen.py | 185 ++++++++++++++++++ test_proxy/run_tests.sh | 17 +- test_proxy/test_proxy.py | 5 +- tests/unit/data/test_sync_up_to_date.py | 99 ++++++++++ 9 files changed, 319 insertions(+), 15 deletions(-) create mode 100644 test_proxy/handlers/client_handler_data_sync_autogen.py create mode 100644 tests/unit/data/test_sync_up_to_date.py diff --git a/.cross_sync/README.md b/.cross_sync/README.md index 18a9aafdf..0d8a1cf8c 100644 --- a/.cross_sync/README.md +++ b/.cross_sync/README.md @@ -66,6 +66,8 @@ Generation can be initiated using `nox -s generate_sync` from the root of the project. This will find all classes with the `__CROSS_SYNC_OUTPUT__ = "path/to/output"` annotation, and generate a sync version of classes marked with `@CrossSync.convert_sync` at the output path. +There is a unit test at `tests/unit/data/test_sync_up_to_date.py` that verifies that the generated code is up to date + ## Architecture CrossSync is made up of two parts: diff --git a/.github/workflows/conformance.yaml b/.github/workflows/conformance.yaml index 448e1cc3a..8445240c3 100644 --- a/.github/workflows/conformance.yaml +++ b/.github/workflows/conformance.yaml @@ -26,7 +26,15 @@ jobs: matrix: test-version: [ "v0.0.2" ] py-version: [ 3.8 ] - client-type: [ "async", "legacy" ] + client-type: [ "async", "sync", "legacy" ] + include: + - client-type: "sync" + # sync client does not support concurrent streams + test_args: "-skip _Generic_MultiStream" + - client-type: "legacy" + # legacy client is synchronous and does not support concurrent streams + # legacy client does not expose mutate_row. Disable those tests + test_args: "-skip _Generic_MultiStream -skip TestMutateRow_" fail-fast: false name: "${{ matrix.client-type }} client / python ${{ matrix.py-version }} / test tag ${{ matrix.test-version }}" steps: @@ -53,4 +61,6 @@ jobs: env: CLIENT_TYPE: ${{ matrix.client-type }} PYTHONUNBUFFERED: 1 + TEST_ARGS: ${{ matrix.test_args }} + PROXY_PORT: 9999 diff --git a/.kokoro/conformance.sh b/.kokoro/conformance.sh index e85fc1394..fd585142e 100644 --- a/.kokoro/conformance.sh +++ b/.kokoro/conformance.sh @@ -19,16 +19,7 @@ set -eo pipefail ## cd to the parent directory, i.e. the root of the git repo cd $(dirname $0)/.. -PROXY_ARGS="" -TEST_ARGS="" -if [[ "${CLIENT_TYPE^^}" == "LEGACY" ]]; then - echo "Using legacy client" - # legacy client does not expose mutate_row. Disable those tests - TEST_ARGS="-skip TestMutateRow_" -fi - # Build and start the proxy in a separate process -PROXY_PORT=9999 pushd test_proxy nohup python test_proxy.py --port $PROXY_PORT --client_type=$CLIENT_TYPE & proxyPID=$! @@ -42,6 +33,7 @@ function cleanup() { trap cleanup EXIT # Run the conformance test +echo "running tests with args: $TEST_ARGS" pushd cloud-bigtable-clients-test/tests eval "go test -v -proxy_addr=:$PROXY_PORT $TEST_ARGS" RETURN_CODE=$? diff --git a/noxfile.py b/noxfile.py index 8576fed85..548bfd0ec 100644 --- a/noxfile.py +++ b/noxfile.py @@ -298,7 +298,7 @@ def system_emulated(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) -@nox.parametrize("client_type", ["async"]) +@nox.parametrize("client_type", ["async", "sync", "legacy"]) def conformance(session, client_type): # install dependencies constraints_path = str( diff --git a/test_proxy/README.md b/test_proxy/README.md index 266fba7cd..5c87c729a 100644 --- a/test_proxy/README.md +++ b/test_proxy/README.md @@ -31,7 +31,7 @@ python test_proxy.py --port 8080 ``` By default, the test_proxy targets the async client. You can change this by passing in the `--client_type` flag. -Valid options are `async` and `legacy`. +Valid options are `async`, `sync`, and `legacy`. ``` python test_proxy.py --client_type=legacy diff --git a/test_proxy/handlers/client_handler_data_sync_autogen.py b/test_proxy/handlers/client_handler_data_sync_autogen.py new file mode 100644 index 000000000..eabae0ffa --- /dev/null +++ b/test_proxy/handlers/client_handler_data_sync_autogen.py @@ -0,0 +1,185 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is automatically generated by CrossSync. Do not edit manually. + +""" +This module contains the client handler process for proxy_server.py. +""" +import os +from google.cloud.environment_vars import BIGTABLE_EMULATOR +from google.cloud.bigtable.data._cross_sync import CrossSync +from client_handler_data_async import error_safe + + +class TestProxyClientHandler: + """ + Implements the same methods as the grpc server, but handles the client + library side of the request. + + Requests received in TestProxyGrpcServer are converted to a dictionary, + and supplied to the TestProxyClientHandler methods as kwargs. + The client response is then returned back to the TestProxyGrpcServer + """ + + def __init__( + self, + data_target=None, + project_id=None, + instance_id=None, + app_profile_id=None, + per_operation_timeout=None, + **kwargs + ): + self.closed = False + os.environ[BIGTABLE_EMULATOR] = data_target + self.client = CrossSync._Sync_Impl.DataClient(project=project_id) + self.instance_id = instance_id + self.app_profile_id = app_profile_id + self.per_operation_timeout = per_operation_timeout + + def close(self): + self.closed = True + + @error_safe + async def ReadRows(self, request, **kwargs): + table_id = request.pop("table_name").split("/")[-1] + app_profile_id = self.app_profile_id or request.get("app_profile_id", None) + table = self.client.get_table(self.instance_id, table_id, app_profile_id) + kwargs["operation_timeout"] = ( + kwargs.get("operation_timeout", self.per_operation_timeout) or 20 + ) + result_list = table.read_rows(request, **kwargs) + serialized_response = [row._to_dict() for row in result_list] + return serialized_response + + @error_safe + async def ReadRow(self, row_key, **kwargs): + table_id = kwargs.pop("table_name").split("/")[-1] + app_profile_id = self.app_profile_id or kwargs.get("app_profile_id", None) + table = self.client.get_table(self.instance_id, table_id, app_profile_id) + kwargs["operation_timeout"] = ( + kwargs.get("operation_timeout", self.per_operation_timeout) or 20 + ) + result_row = table.read_row(row_key, **kwargs) + if result_row: + return result_row._to_dict() + else: + return "None" + + @error_safe + async def MutateRow(self, request, **kwargs): + from google.cloud.bigtable.data.mutations import Mutation + + table_id = request["table_name"].split("/")[-1] + app_profile_id = self.app_profile_id or request.get("app_profile_id", None) + table = self.client.get_table(self.instance_id, table_id, app_profile_id) + kwargs["operation_timeout"] = ( + kwargs.get("operation_timeout", self.per_operation_timeout) or 20 + ) + row_key = request["row_key"] + mutations = [Mutation._from_dict(d) for d in request["mutations"]] + table.mutate_row(row_key, mutations, **kwargs) + return "OK" + + @error_safe + async def BulkMutateRows(self, request, **kwargs): + from google.cloud.bigtable.data.mutations import RowMutationEntry + + table_id = request["table_name"].split("/")[-1] + app_profile_id = self.app_profile_id or request.get("app_profile_id", None) + table = self.client.get_table(self.instance_id, table_id, app_profile_id) + kwargs["operation_timeout"] = ( + kwargs.get("operation_timeout", self.per_operation_timeout) or 20 + ) + entry_list = [ + RowMutationEntry._from_dict(entry) for entry in request["entries"] + ] + table.bulk_mutate_rows(entry_list, **kwargs) + return "OK" + + @error_safe + async def CheckAndMutateRow(self, request, **kwargs): + from google.cloud.bigtable.data.mutations import Mutation, SetCell + + table_id = request["table_name"].split("/")[-1] + app_profile_id = self.app_profile_id or request.get("app_profile_id", None) + table = self.client.get_table(self.instance_id, table_id, app_profile_id) + kwargs["operation_timeout"] = ( + kwargs.get("operation_timeout", self.per_operation_timeout) or 20 + ) + row_key = request["row_key"] + true_mutations = [] + for mut_dict in request.get("true_mutations", []): + try: + true_mutations.append(Mutation._from_dict(mut_dict)) + except ValueError: + mutation = SetCell("", "", "", 0) + true_mutations.append(mutation) + false_mutations = [] + for mut_dict in request.get("false_mutations", []): + try: + false_mutations.append(Mutation._from_dict(mut_dict)) + except ValueError: + false_mutations.append(SetCell("", "", "", 0)) + predicate_filter = request.get("predicate_filter", None) + result = table.check_and_mutate_row( + row_key, + predicate_filter, + true_case_mutations=true_mutations, + false_case_mutations=false_mutations, + **kwargs + ) + return result + + @error_safe + async def ReadModifyWriteRow(self, request, **kwargs): + from google.cloud.bigtable.data.read_modify_write_rules import IncrementRule + from google.cloud.bigtable.data.read_modify_write_rules import AppendValueRule + + table_id = request["table_name"].split("/")[-1] + app_profile_id = self.app_profile_id or request.get("app_profile_id", None) + table = self.client.get_table(self.instance_id, table_id, app_profile_id) + kwargs["operation_timeout"] = ( + kwargs.get("operation_timeout", self.per_operation_timeout) or 20 + ) + row_key = request["row_key"] + rules = [] + for rule_dict in request.get("rules", []): + qualifier = rule_dict["column_qualifier"] + if "append_value" in rule_dict: + new_rule = AppendValueRule( + rule_dict["family_name"], qualifier, rule_dict["append_value"] + ) + else: + new_rule = IncrementRule( + rule_dict["family_name"], qualifier, rule_dict["increment_amount"] + ) + rules.append(new_rule) + result = table.read_modify_write_row(row_key, rules, **kwargs) + if result: + return result._to_dict() + else: + return "None" + + @error_safe + async def SampleRowKeys(self, request, **kwargs): + table_id = request["table_name"].split("/")[-1] + app_profile_id = self.app_profile_id or request.get("app_profile_id", None) + table = self.client.get_table(self.instance_id, table_id, app_profile_id) + kwargs["operation_timeout"] = ( + kwargs.get("operation_timeout", self.per_operation_timeout) or 20 + ) + result = table.sample_row_keys(**kwargs) + return result diff --git a/test_proxy/run_tests.sh b/test_proxy/run_tests.sh index c2e9c6312..b6f1291a6 100755 --- a/test_proxy/run_tests.sh +++ b/test_proxy/run_tests.sh @@ -27,7 +27,7 @@ fi SCRIPT_DIR=$(realpath $(dirname "$0")) cd $SCRIPT_DIR -export PROXY_SERVER_PORT=50055 +export PROXY_SERVER_PORT=$(shuf -i 50000-60000 -n 1) # download test suite if [ ! -d "cloud-bigtable-clients-test" ]; then @@ -43,6 +43,19 @@ function finish { } trap finish EXIT +if [[ $CLIENT_TYPE == "legacy" ]]; then + echo "Using legacy client" + # legacy client does not expose mutate_row. Disable those tests + TEST_ARGS="-skip TestMutateRow_" +fi + +if [[ $CLIENT_TYPE != "async" ]]; then + echo "Using legacy client" + # sync and legacy client do not support concurrent streams + TEST_ARGS="$TEST_ARGS -skip _Generic_MultiStream " +fi + # run tests pushd cloud-bigtable-clients-test/tests -go test -v -proxy_addr=:$PROXY_SERVER_PORT +echo "Running with $TEST_ARGS" +go test -v -proxy_addr=:$PROXY_SERVER_PORT $TEST_ARGS diff --git a/test_proxy/test_proxy.py b/test_proxy/test_proxy.py index 9e03f1e5c..793500768 100644 --- a/test_proxy/test_proxy.py +++ b/test_proxy/test_proxy.py @@ -114,6 +114,9 @@ def format_dict(input_obj): if client_type == "legacy": import client_handler_legacy client = client_handler_legacy.LegacyTestProxyClientHandler(**json_data) + elif client_type == "sync": + import client_handler_data_sync_autogen + client = client_handler_data_sync_autogen.TestProxyClientHandler(**json_data) else: client = client_handler_data_async.TestProxyClientHandlerAsync(**json_data) client_map[client_id] = client @@ -150,7 +153,7 @@ def client_handler_process(request_q, queue_pool, client_type="async"): p = argparse.ArgumentParser() p.add_argument("--port", dest='port', default="50055") -p.add_argument("--client_type", dest='client_type', default="async", choices=["async", "legacy"]) +p.add_argument("--client_type", dest='client_type', default="async", choices=["async", "sync", "legacy"]) if __name__ == "__main__": port = p.parse_args().port diff --git a/tests/unit/data/test_sync_up_to_date.py b/tests/unit/data/test_sync_up_to_date.py new file mode 100644 index 000000000..492d35ddf --- /dev/null +++ b/tests/unit/data/test_sync_up_to_date.py @@ -0,0 +1,99 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import hashlib +import pytest +import ast +import re +from difflib import unified_diff + +# add cross_sync to path +test_dir_name = os.path.dirname(__file__) +repo_root = os.path.join(test_dir_name, "..", "..", "..") +cross_sync_path = os.path.join(repo_root, ".cross_sync") +sys.path.append(cross_sync_path) + +from generate import convert_files_in_dir, CrossSyncOutputFile # noqa: E402 + +sync_files = list(convert_files_in_dir(repo_root)) + + +def test_found_files(): + """ + Make sure sync_test is populated with some of the files we expect to see, + to ensure that later tests are actually running. + """ + assert len(sync_files) > 0, "No sync files found" + assert len(sync_files) > 10, "Unexpectedly few sync files found" + # test for key files + outputs = [os.path.basename(f.output_path) for f in sync_files] + assert "client.py" in outputs + assert "execute_query_iterator.py" in outputs + assert "test_client.py" in outputs + assert "test_system_autogen.py" in outputs, "system tests not found" + assert ( + "client_handler_data_sync_autogen.py" in outputs + ), "test proxy handler not found" + + +@pytest.mark.skipif( + sys.version_info < (3, 9), reason="ast.unparse is only available in 3.9+" +) +@pytest.mark.parametrize("sync_file", sync_files, ids=lambda f: f.output_path) +def test_sync_up_to_date(sync_file): + """ + Generate a fresh copy of each cross_sync file, and compare hashes with the existing file. + + If this test fails, run `nox -s generate_sync` to update the sync files. + """ + path = sync_file.output_path + new_render = sync_file.render(with_formatter=True, save_to_disk=False) + found_render = CrossSyncOutputFile( + output_path="", ast_tree=ast.parse(open(path).read()), header=sync_file.header + ).render(with_formatter=True, save_to_disk=False) + # compare by content + diff = unified_diff(found_render.splitlines(), new_render.splitlines(), lineterm="") + diff_str = "\n".join(diff) + assert ( + not diff_str + ), f"Found differences. Run `nox -s generate_sync` to update:\n{diff_str}" + # compare by hash + new_hash = hashlib.md5(new_render.encode()).hexdigest() + found_hash = hashlib.md5(found_render.encode()).hexdigest() + assert new_hash == found_hash, f"md5 mismatch for {path}" + + +@pytest.mark.parametrize("sync_file", sync_files, ids=lambda f: f.output_path) +def test_verify_headers(sync_file): + license_regex = r""" + \#\ Copyright\ \d{4}\ Google\ LLC\n + \#\n + \#\ Licensed\ under\ the\ Apache\ License,\ Version\ 2\.0\ \(the\ \"License\"\);\n + \#\ you\ may\ not\ use\ this\ file\ except\ in\ compliance\ with\ the\ License\.\n + \#\ You\ may\ obtain\ a\ copy\ of\ the\ License\ at\ + \#\n + \#\s+http:\/\/www\.apache\.org\/licenses\/LICENSE-2\.0\n + \#\n + \#\ Unless\ required\ by\ applicable\ law\ or\ agreed\ to\ in\ writing,\ software\n + \#\ distributed\ under\ the\ License\ is\ distributed\ on\ an\ \"AS\ IS\"\ BASIS,\n + \#\ WITHOUT\ WARRANTIES\ OR\ CONDITIONS\ OF\ ANY\ KIND,\ either\ express\ or\ implied\.\n + \#\ See\ the\ License\ for\ the\ specific\ language\ governing\ permissions\ and\n + \#\ limitations\ under\ the\ License\. + """ + pattern = re.compile(license_regex, re.VERBOSE) + + with open(sync_file.output_path, "r") as f: + content = f.read() + assert pattern.search(content), "Missing license header"