From 100541a6d5a50b429a2f3d0ee05bc3037e99fed4 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Wed, 21 Aug 2024 18:07:07 +1000 Subject: [PATCH] Logging module and download module; download progress bar --- filesender/api.py | 93 +++++++++++++++++++++++++----------------- filesender/download.py | 50 +++++++++++++++++++++++ filesender/log.py | 28 +++++++++++++ filesender/main.py | 34 +++++++-------- 4 files changed, 150 insertions(+), 55 deletions(-) create mode 100644 filesender/download.py create mode 100644 filesender/log.py diff --git a/filesender/api.py b/filesender/api.py index bd04701..5990374 100644 --- a/filesender/api.py +++ b/filesender/api.py @@ -1,5 +1,5 @@ from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Set -from bs4 import BeautifulSoup +from filesender.download import files_from_page, DownloadFile import filesender.response_types as response import filesender.request_types as request from urllib.parse import urlparse, urlunparse, unquote @@ -10,7 +10,7 @@ import aiofiles from aiostream import stream from contextlib import contextmanager -from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception +from tenacity import RetryCallState, retry, stop_after_attempt, wait_fixed, retry_if_exception import logging from tqdm.asyncio import tqdm @@ -25,9 +25,13 @@ def should_retry(e: BaseException) -> bool: # Seems to be just a bug in the backend # https://github.com/encode/httpx/discussions/2941 return True - elif isinstance(e, HTTPStatusError) and e.response.status_code == 500 and e.response.json()["message"] == "auth_remote_too_late": + elif isinstance(e, HTTPStatusError) and e.response.status_code == 500: + message = e.response.json()["message"] + if message == "auth_remote_too_late": + return True + if message == "auth_remote_signature_check_failed": + return True # These errors are caused by lag between creating the response and it being received - return True return False @@ -40,6 +44,13 @@ def url_without_scheme(url: str) -> str: """ return unquote(urlunparse(urlparse(url)._replace(scheme="")).lstrip("/")) +def exception_to_message(e: BaseException) -> str: + if isinstance(e, HTTPStatusError): + return f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}." + elif isinstance(e, RequestError): + return f"Request failed for request {e.request.method} {e.request.url}. {repr(e)}" + else: + return repr(e) @contextmanager def raise_status(): @@ -49,16 +60,8 @@ def raise_status(): """ try: yield - except HTTPStatusError as e: - raise Exception( - f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}" - ) from e - except RequestError as e: - # TODO: check for SSL read error - raise Exception( - f"Request failed for request {e.request.method} {e.request.url}" - ) from e - + except BaseException as e: + raise Exception(exception_to_message(e)) from e async def yield_chunks(path: Path, chunk_size: int) -> AsyncIterator[Tuple[bytes, int]]: """ @@ -166,11 +169,20 @@ async def _sign_send(self, request: Request) -> Any: with raise_status(): return await self._sign_send_inner(request) + @staticmethod + def on_retry(state: RetryCallState) -> None: + message = str(state.outcome) + if state.outcome is not None and state.outcome.failed: + e = state.outcome.exception() + message = exception_to_message(e) + + logger.warn(f"Attempt {state.attempt_number}. {message}") + @retry( retry=retry_if_exception(should_retry), wait=wait_fixed(0.1), stop=stop_after_attempt(5), - before_sleep=lambda x: logger.warn(f"Attempt {x.attempt_number}.{x.outcome}") + before_sleep=on_retry ) async def _sign_send_inner(self, request: Request) -> Any: # Needs to be a separate function to handle retry policy correctly @@ -313,19 +325,14 @@ async def create_guest(self, body: request.Guest) -> response.Guest: self.http_client.build_request("POST", f"{self.base_url}/guest", json=body) ) - async def _files_from_token(self, token: str) -> Set[int]: + async def _files_from_token(self, token: str) -> Iterable[DownloadFile]: """ Internal function that returns a list of file IDs for a given guest token """ download_page = await self.http_client.get( "https://filesender.aarnet.edu.au", params={"s": "download", "token": token} ) - files: Set[int] = set() - for file in BeautifulSoup(download_page.content, "html.parser").find_all( - class_="file" - ): - files.add(int(file.attrs["data-id"])) - return files + return files_from_page(download_page.content) async def download_files( self, @@ -342,12 +349,12 @@ async def download_files( out_dir: The path to write the downloaded files. """ - file_ids = await self._files_from_token(token) + file_meta = await self._files_from_token(token) - async def _download_args() -> AsyncIterator[Tuple[str, Any, Path]]: + async def _download_args() -> AsyncIterator[Tuple[str, Any, Path, int, str]]: "Yields tuples of arguments to pass to download_file" - for file_id in file_ids: - yield token, file_id, out_dir + for file in file_meta: + yield token, file["id"], out_dir, file["size"], file["name"] # Each file is downloaded in parallel # Pyright messes this up @@ -358,8 +365,8 @@ async def download_file( token: str, file_id: int, out_dir: Path, - key: Optional[bytes] = None, - algorithm: Optional[str] = None, + file_size: int | float = float("inf"), + file_name: Optional[str] = None ) -> None: """ Downloads a single file. @@ -368,6 +375,8 @@ async def download_file( token: Obtained from the transfer email. The same as [`GuestAuth`][filesender.GuestAuth]'s `guest_token`. file_id: A single file ID indicating the file to be downloaded. out_dir: The path to write the downloaded file. + file_size: The file size in bytes, optionally + file_name: The file name of the file being downloaded. This will impact the name by which it's saved. """ download_endpoint = urlunparse( urlparse(self.base_url)._replace(path="/download.php") @@ -375,16 +384,24 @@ async def download_file( async with self.http_client.stream( "GET", download_endpoint, params={"files_ids": file_id, "token": token} ) as res: - for content_param in res.headers["Content-Disposition"].split(";"): - if "filename" in content_param: - filename = content_param.split("=")[1].lstrip('"').rstrip('"') - break - else: - raise Exception("No filename found") - - async with aiofiles.open(out_dir / filename, "wb") as fp: - async for chunk in res.aiter_raw(chunk_size=8192): - await fp.write(chunk) + # Determine filename from response, if not provided + if file_name is None: + for content_param in res.headers["Content-Disposition"].split(";"): + if "filename" in content_param: + file_name = content_param.split("=")[1].lstrip('"').rstrip('"') + break + else: + raise Exception("No filename found") + + chunk_size = 8192 + chunk_size_mb = chunk_size / 1024 / 1024 + with tqdm(desc=file_name, unit="MB", total=int(file_size / 1024 / 1024)) as progress: + async with aiofiles.open(out_dir / file_name, "wb") as fp: + # We can't add the total here, because we don't know it: + # https://github.com/filesender/filesender/issues/1555 + async for chunk in res.aiter_raw(chunk_size=chunk_size): + await fp.write(chunk) + progress.update(chunk_size_mb) async def get_server_info(self) -> response.ServerInfo: """ diff --git a/filesender/download.py b/filesender/download.py new file mode 100644 index 0000000..aed9b93 --- /dev/null +++ b/filesender/download.py @@ -0,0 +1,50 @@ +from typing import Iterable, TypedDict + +from bs4 import BeautifulSoup + + +class DownloadFile(TypedDict): + client_entropy: str + encrypted: str + encrypted_size: int + fileaead: str + fileiv: str + id: int + key_salt: str + key_version: int + mime: str + #: filename + name: str + password_encoding: str + password_hash_iterations: int + password_version: int + size: int + transferid: int + +def files_from_page(content: bytes) -> Iterable[DownloadFile]: + """ + Yields dictionaries describing the files listed on a FileSender web page + + Params: + content: The HTML content of the FileSender download page + """ + for file in BeautifulSoup(content, "html.parser").find_all( + class_="file" + ): + yield { + "client_entropy": file.attrs[f"data-client-entropy"], + "encrypted": file.attrs["data-encrypted"], + "encrypted_size": int(file.attrs["data-encrypted-size"]), + "fileaead": file.attrs["data-fileaead"], + "fileiv": file.attrs["data-fileiv"], + "id": int(file.attrs["data-id"]), + "key_salt": file.attrs["data-key-salt"], + "key_version": int(file.attrs["data-key-version"]), + "mime": file.attrs["data-mime"], + "name": file.attrs["data-name"], + "password_encoding": file.attrs["data-password-encoding"], + "password_hash_iterations": int(file.attrs["data-password-hash-iterations"]), + "password_version": int(file.attrs["data-password-version"]), + "size": int(file.attrs["data-size"]), + "transferid": int(file.attrs["data-transferid"]), + } diff --git a/filesender/log.py b/filesender/log.py new file mode 100644 index 0000000..8047ee4 --- /dev/null +++ b/filesender/log.py @@ -0,0 +1,28 @@ +from click import ParamType, Context, Parameter +from enum import Enum + +class LogLevel(Enum): + NOTSET = 0 + DEBUG = 10 + VERBOSE = 15 + INFO = 20 + WARNING = 30 + ERROR = 40 + CRITICAL = 50 + +class LogParam(ParamType): + name = "LogParam" + + def convert(self, value: int | str, param: Parameter | None, ctx: Context | None) -> int: + if isinstance(value, int): + return value + + # Convert string representation to int + if not hasattr(LogLevel, value): + self.fail(f"{value!r} is not a valid log level", param, ctx) + + return LogLevel[value].value + + def get_metavar(self, param: Parameter) -> str | None: + # Print out the choices + return "|".join(LogLevel._member_map_) diff --git a/filesender/main.py b/filesender/main.py index 056edda..3752f2a 100644 --- a/filesender/main.py +++ b/filesender/main.py @@ -47,20 +47,20 @@ def version_callback(value: bool): @app.callback(context_settings=context) def common_args( - base_url: Annotated[str, Option(help="The URL of the FileSender REST API")], context: Context, + base_url: Annotated[str, Option(help="The URL of the FileSender REST API")], + log_level: Annotated[ + int, Option(click_type=LogParam(), help="Logging verbosity", ) + ] = LogLevel.INFO.value, version: Annotated[ Optional[bool], Option("--version", callback=version_callback) - ] = None, - log_level: Annotated[ - LogLevel, Option("--log-level", click_type=LogParam(), help="Logging verbosity") - ] = LogLevel.WARNING + ] = None ): context.obj = { "base_url": base_url } logging.basicConfig( - level=log_level.value, format= "%(message)s", datefmt="[%X]", handlers=[RichHandler()] + level=log_level, format= "%(message)s", datefmt="[%X]", handlers=[RichHandler()] ) @@ -110,7 +110,7 @@ def invite( } } })) - logger.info(result) + logger.log(LogLevel.VERBOSE.value, result) logger.info("Invitation successfully sent") @app.command(context_settings=context) @@ -120,8 +120,8 @@ async def upload_voucher( guest_token: Annotated[str, Option(help="The guest token. This is the part of the upload URL after 'vid='")], email: Annotated[str, Option(help="The email address that was invited to upload files")], context: Context, - concurrent_files: ConcurrentFiles = None, - concurrent_chunks: ConcurrentChunks = None, + concurrent_files: ConcurrentFiles = 1, + concurrent_chunks: ConcurrentChunks = 2, chunk_size: ChunkSize = None, ): """ @@ -138,8 +138,8 @@ async def upload_voucher( await auth.prepare(client.http_client) await client.prepare() result: Transfer = await client.upload_workflow(files, {"from": email, "recipients": []}) - logger.info(result) - logger.info("Upload completed successfully") + logger.log(LogLevel.VERBOSE.value, result) + logger.log(LogLevel.INFO.value, "Upload completed successfully") @app.command(context_settings=context) @typer_async @@ -149,8 +149,8 @@ async def upload( files: UploadFiles, recipients: Annotated[List[str], Option(show_default=False, help="One or more email addresses to send the files")], context: Context, - concurrent_files: ConcurrentFiles = None, - concurrent_chunks: ConcurrentChunks = None, + concurrent_files: ConcurrentFiles = 1, + concurrent_chunks: ConcurrentChunks = 2, chunk_size: ChunkSize = None, delay: Delay = 0 ): @@ -170,8 +170,8 @@ async def upload( ) await client.prepare() result: Transfer = await client.upload_workflow(files, {"recipients": recipients, "from": username}) - logger.info(result) - logger.info("Upload completed successfully") + logger.log(LogLevel.VERBOSE.value, result) + logger.log(LogLevel.INFO.value, "Upload completed successfully") @app.command(context_settings=context) def download( @@ -188,7 +188,7 @@ def download( token=token, out_dir=out_dir )) - print(f"Download completed successfully. Files can be found in {out_dir}") + logger.log(LogLevel.INFO.value, f"Download completed successfully. Files can be found in {out_dir}") @app.command(context_settings=context) @typer_async @@ -198,7 +198,7 @@ async def server_info( """Prints out information about the FileSender server you are interfacing with""" client = FileSenderClient(base_url=context.obj["base_url"]) result = await client.get_server_info() - print(result) + logger.log(LogLevel.INFO.value, result) if __name__ == "__main__": app()