Skip to content

Commit

Permalink
workaround for the wrong "Content-Encoding" header (--images)
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Mar 7, 2024
1 parent 5654502 commit e07eee1
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
18 changes: 17 additions & 1 deletion wikiteam3/dumpgenerator/dump/image/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,23 @@ def get_ia_wbm_response() -> Optional[requests.Response]:

if r is None:
Delay(config=config)
r = session.get(url=url, params=modify_params(), headers=modify_headers(), allow_redirects=True)
try:
r = session.get(url=url, params=modify_params(), headers=modify_headers(), allow_redirects=True)
except requests.exceptions.ContentDecodingError as e:
# Workaround for https://fedoraproject.org/w/uploads/5/54/Duffy-f12-banner.svgz
# (see also https://cdn.digitaldragon.dev/wikibot/jobs/b0f52fc3-927b-4d14-aded-89a2795e8d4d/log.txt)
# server response with "Content-Encoding: gzip" (or other) but the transfer is not encoded/compressed actually
# If this workround can't get the original file, the file will be thrown to images_mismatch dir, not too bad :)
log_error(
config, to_stdout=True,
text=f"{e} when downloading {filename_underscore} with URL {url} . "
"Retrying with 'Accept-Encoding: identity' header and no transfer auto-decompresion..."
)
_headers = modify_headers()
_headers["Accept-Encoding"] = "identity"
r = session.get(url=url, params=modify_params(), headers=_headers, allow_redirects=True, stream=True)
r._content = r.raw.read()

check_response(r)

# a trick to get original file (fandom)
Expand Down
2 changes: 1 addition & 1 deletion wikiteam3/utils/monkey_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def new_send(request, **kwargs):
self.clear_timeouted_pools()

return self.old_send_method(request, **kwargs)
except KeyboardInterrupt:
except (KeyboardInterrupt, requests.exceptions.ContentDecodingError): # don't retry
raise
except Exception as e:
hard_retries_left -= 1
Expand Down

0 comments on commit e07eee1

Please sign in to comment.