Skip to content

Commit

Permalink
fetch: Try to make downloads more robust
Browse files Browse the repository at this point in the history
This adds retries and connection reuse
  • Loading branch information
jbruechert committed Jan 2, 2025
1 parent 121f513 commit 7d9764c
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions src/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from zipfile import ZipFile
from typing import Optional, Any
from zoneinfo import ZoneInfo
from requests.adapters import HTTPAdapter, Retry

import email.utils
import requests
Expand Down Expand Up @@ -100,6 +101,14 @@ def fetch_source(self, dest_path: Path, source: Source) -> bool:

return self.fetch_source(dest_path, http_source)
case HttpSource():
session = requests.Session()

retries = Retry(total=5,
backoff_factor=0.1,
status_forcelist=[500, 502, 503, 504])

session.mount('http://', HTTPAdapter(max_retries=retries))

request_options: dict[str, Any] = {
"verify": not source.options.ignore_tls_errors,
"timeout": 30
Expand Down Expand Up @@ -127,7 +136,7 @@ def fetch_source(self, dest_path: Path, source: Source) -> bool:

# Fetch last modification time from the server
server_headers = \
requests.head(download_url, headers=headers,
session.head(download_url, headers=headers,
allow_redirects=True,
**request_options).headers

Expand All @@ -146,7 +155,7 @@ def fetch_source(self, dest_path: Path, source: Source) -> bool:
headers["if-modified-since"] = last_modified \
.strftime("%a, %d %b %Y %X %Z")

response = requests.get(download_url, headers=headers,
response = session.get(download_url, headers=headers,
**request_options)

# If the file was not modified, return
Expand Down

0 comments on commit 7d9764c

Please sign in to comment.