Skip to content

Commit

Permalink
Rate limit download requests globally for HCA (#6740, PR #6749)
Browse files Browse the repository at this point in the history
  • Loading branch information
achave11-ucsc committed Dec 18, 2024
2 parents a045398 + c0eabd5 commit 72ec3ee
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 51 deletions.
14 changes: 14 additions & 0 deletions deployments/prod/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1417,4 +1417,18 @@ def env() -> Mapping[str, Optional[str]]:
}),

'AZUL_ENABLE_REPLICAS': '0',

# HCA allocates a daily budget for file downloads. To avoid exceeding
# that budget, we limit the download rate as follows:
#
# r = b/d/f/24/60*w
#
# where `r` is the rate limit (downloads/window), `b` is the daily
# download budget (dollars/day), `d` is the download cost (dollars/
# gibibyte/download), `f` is the average file size (gibibytes), and `w`
# is the evaluation window (minutes) (=10). The value for `d` varies by
# region, so a weighted average is calculated based on the observed
# number of daily downloads per region.
#
'AZUL_FILE_DOWNLOAD_RATE_LIMIT': '59/[email protected]'
}
40 changes: 29 additions & 11 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,15 @@ def env() -> Mapping[str, Optional[str]]:

# The Docker registry containing all 3rd party images used by this
# project, including images used locally, in FROM clauses, for CI/CD or
# GitLab. Must be empty or end in a slash. All references to 3rd party
# images must point at the registry defined here, ideally by prefixing
# the image reference with a reference to this variable. The registry
# and the images therein are managed by the `shared` TF component, which
# copies images from the upstream registry into the Azul registry. A
# 3rd-party image at `<registry>/<username>/<repository>:tag`, is stored
# as `${azul_docker_registry>}<registry>/<username>/<repository>:tag` in
# GitLab. Must be empty or end in a slash. All references to images from
# other parties must point at the registry defined here, ideally by
# prefixing the image reference with a reference to this variable. The
# registry and the images therein are managed by the `shared` TF
# component, which copies images from the upstream registry into the
# Azul registry.
#
# The image `<registry>/<username>/<repository>:<tag>`, is stored as
# `${azul_docker_registry>}<registry>/<username>/<repository>:<tag>` in
# the Azul registry. To disable the use of the Azul registry, set this
# variable to the empty string.
#
Expand Down Expand Up @@ -400,9 +402,9 @@ def env() -> Mapping[str, Optional[str]]:
# IAM role normally assumed by lambda functions in the active Azul
# deployment.
#
# The syntax is <account>[,<role>...][:<account>[,<role>...]...] where
# <account> is the numeric AWS account ID and role is a role name with
# optional * or ? wildcards for the StringLike operator in IAM
# The syntax is `<account>[,<role>...][:<account>[,<role>...]...]` where
# `<account>` is the numeric AWS account ID and `<role>` is a role name
# with optional * or ? wildcards for the StringLike operator in IAM
# conditions. Whitespace around separators and at the beginning or end
# of the value are ignored.
#
Expand Down Expand Up @@ -928,5 +930,21 @@ def env() -> Mapping[str, Optional[str]]:
# not covering any changes to the indexer, since indexing will be
# skipped.
#
'azul_it_flags': None
'azul_it_flags': None,

# A global rate limit on file downloads across all regions and IP
# addresses, enforced by AWS WAF.
#
# The syntax is `<limit>/<window>@<concurrency>` where `<limit>` is the
# maximum allowed number of download requests made every `<window>`
# seconds, and `<concurrency>` is the expected number of distinct IPs
# making at least one download request during that time. The concurrency
# does not need to be an integer. See
#
# https://docs.aws.amazon.com/waf/latest/developerguide/waf-rule-statement-type-rate-based-high-level-settings.html
#
# for restrictions on the supported values for `<limit>` ("Rate limit")
# and `<window>` ("Evaluation window").
#
'AZUL_FILE_DOWNLOAD_RATE_LIMIT': None
}
30 changes: 30 additions & 0 deletions src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ClassVar,
NotRequired,
Optional,
Self,
TYPE_CHECKING,
TextIO,
TypeVar,
Expand All @@ -33,6 +34,7 @@
)

import attr
import attrs
from furl import (
furl,
)
Expand Down Expand Up @@ -1590,6 +1592,34 @@ def docker_image_gists_path(self) -> Path:

waf_rate_rule_limit = 1000

@attrs.frozen(auto_attribs=True, kw_only=True)
class FileDownloadLimit:
rate_limit: int
evaluation_window: int
assumed_request_concurrency: float

@classmethod
def parse(cls, s: str) -> Self:
rate, s = s.split('/')
window, concurrency = s.split('@')
return cls(rate_limit=int(rate),
evaluation_window=int(window),
assumed_request_concurrency=float(concurrency))

@property
def retry_after(self) -> int:
return round(self.evaluation_window /
self.rate_limit *
self.assumed_request_concurrency)

@property
def waf_file_download_limit(self) -> FileDownloadLimit | None:
value = self.environ.get('AZUL_FILE_DOWNLOAD_RATE_LIMIT')
if value is None:
return None
else:
return self.FileDownloadLimit.parse(value)

assert 100 <= waf_rate_rule_limit <= 2_000_000_000 # mandated by AWS

@property
Expand Down
37 changes: 17 additions & 20 deletions terraform/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified terraform/_schema.json.gz
Binary file not shown.
79 changes: 60 additions & 19 deletions terraform/api_gateway.tf.json.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def for_domain(cls, domain):
'status': '$context.status'
}

file_download_limit = config.waf_file_download_limit

emit_tf({
'data': [
{
Expand Down Expand Up @@ -193,17 +195,55 @@ def for_domain(cls, domain):
'rule': [
{**rule, 'priority': i}
for i, rule in enumerate([
*([] if file_download_limit is None else [{
'name': 'FileDownloadRateLimit',
'statement': {
'rate_based_statement': {
'limit': file_download_limit.rate_limit,
'evaluation_window_sec': file_download_limit.evaluation_window,
'aggregate_key_type': 'CONSTANT',
'scope_down_statement': {
'regex_match_statement': {
'regex_string': '^(/fetch)?/repository/files',
'field_to_match': {'uri_path': {}},
'text_transformation': {
'priority': 0,
'type': 'NONE'
}
}
}
}
},
'action': {
'block': {
'custom_response': {
'response_code': 429,
'response_header': [
{
'name': 'Retry-After',
'value': str(file_download_limit.retry_after),
}
]
}
}
},
'visibility_config': {
'metric_name': 'FileDownloadRateLimit',
'sampled_requests_enabled': True,
'cloudwatch_metrics_enabled': True
}
}]),
*[
{
'name': name,
'action': {
action: {}
},
'statement': {
'ip_set_reference_statement': {
'arn': '${data.aws_wafv2_ip_set.%s.arn}' % ip_set_term
}
},
'action': {
action: {}
},
'visibility_config': {
'metric_name': name,
'sampled_requests_enabled': True,
Expand All @@ -218,6 +258,12 @@ def for_domain(cls, domain):
*[
{
'name': name,
'statement': {
'rate_based_statement': {
'limit': limit,
'aggregate_key_type': 'IP'
}
},
'action': {
'block': {
'custom_response': {
Expand All @@ -231,12 +277,6 @@ def for_domain(cls, domain):
}
}
},
'statement': {
'rate_based_statement': {
'limit': limit,
'aggregate_key_type': 'IP'
}
},
'visibility_config': {
'metric_name': name,
'sampled_requests_enabled': True,
Expand All @@ -258,9 +298,6 @@ def for_domain(cls, domain):
],
{
'name': 'AWS-CommonRuleSet',
'override_action': {
'none': {}
},
'statement': {
'managed_rule_group_statement': {
'name': 'AWSManagedRulesCommonRuleSet',
Expand Down Expand Up @@ -298,6 +335,9 @@ def for_domain(cls, domain):
]
}
},
'override_action': {
'none': {}
},
'visibility_config': {
'metric_name': 'AWS-CommonRuleSet',
'sampled_requests_enabled': True,
Expand All @@ -306,15 +346,15 @@ def for_domain(cls, domain):
},
{
'name': 'AWS-AmazonIpReputationList',
'override_action': {
'none': {}
},
'statement': {
'managed_rule_group_statement': {
'name': 'AWSManagedRulesAmazonIpReputationList',
'vendor_name': 'AWS'
}
},
'override_action': {
'none': {}
},
'visibility_config': {
'metric_name': 'AWS-AmazonIpReputationList',
'sampled_requests_enabled': True,
Expand All @@ -323,22 +363,23 @@ def for_domain(cls, domain):
},
{
'name': 'AWS-UnixRuleSet',
'override_action': {
'none': {}
},
'statement': {
'managed_rule_group_statement': {
'name': 'AWSManagedRulesUnixRuleSet',
'vendor_name': 'AWS'
}
},
'override_action': {
'none': {}
},
'visibility_config': {
'metric_name': 'AWS-UnixRuleSet',
'sampled_requests_enabled': True,
'cloudwatch_metrics_enabled': True
}
},
])],
])
],
'scope': 'REGIONAL',
'visibility_config': {
'cloudwatch_metrics_enabled': True,
Expand Down
2 changes: 1 addition & 1 deletion terraform/providers.tf.json.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
},
'aws': {
'source': 'hashicorp/aws',
'version': '5.49.0'
'version': '5.80.0'
},
# FIXME: Remove the provider
# https://github.com/DataBiosphere/azul/pull/6285
Expand Down

0 comments on commit 72ec3ee

Please sign in to comment.