From 7d9764cb05badcd8ad4174371a5aa0f4b58dd49e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonah=20Br=C3=BCchert?= Date: Thu, 2 Jan 2025 02:18:40 +0100 Subject: [PATCH] fetch: Try to make downloads more robust This adds retries and connection reuse --- src/fetch.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/fetch.py b/src/fetch.py index 7205bf57..9554dfdf 100755 --- a/src/fetch.py +++ b/src/fetch.py @@ -10,6 +10,7 @@ from zipfile import ZipFile from typing import Optional, Any from zoneinfo import ZoneInfo +from requests.adapters import HTTPAdapter, Retry import email.utils import requests @@ -100,6 +101,14 @@ def fetch_source(self, dest_path: Path, source: Source) -> bool: return self.fetch_source(dest_path, http_source) case HttpSource(): + session = requests.Session() + + retries = Retry(total=5, + backoff_factor=0.1, + status_forcelist=[500, 502, 503, 504]) + + session.mount('http://', HTTPAdapter(max_retries=retries)) + request_options: dict[str, Any] = { "verify": not source.options.ignore_tls_errors, "timeout": 30 @@ -127,7 +136,7 @@ def fetch_source(self, dest_path: Path, source: Source) -> bool: # Fetch last modification time from the server server_headers = \ - requests.head(download_url, headers=headers, + session.head(download_url, headers=headers, allow_redirects=True, **request_options).headers @@ -146,7 +155,7 @@ def fetch_source(self, dest_path: Path, source: Source) -> bool: headers["if-modified-since"] = last_modified \ .strftime("%a, %d %b %Y %X %Z") - response = requests.get(download_url, headers=headers, + response = session.get(download_url, headers=headers, **request_options) # If the file was not modified, return