Skip to content

Commit

Permalink
Logging module and download module; download progress bar
Browse files Browse the repository at this point in the history
  • Loading branch information
multimeric committed Aug 21, 2024
1 parent 8e1ccdc commit 100541a
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 55 deletions.
93 changes: 55 additions & 38 deletions filesender/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Set

Check failure on line 1 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.9)

Import "Set" is not accessed (reportUnusedImport)

Check failure on line 1 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.10)

Import "Set" is not accessed (reportUnusedImport)

Check failure on line 1 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.11)

Import "Set" is not accessed (reportUnusedImport)

Check failure on line 1 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.12)

Import "Set" is not accessed (reportUnusedImport)
from bs4 import BeautifulSoup
from filesender.download import files_from_page, DownloadFile
import filesender.response_types as response
import filesender.request_types as request
from urllib.parse import urlparse, urlunparse, unquote
Expand All @@ -10,7 +10,7 @@
import aiofiles
from aiostream import stream
from contextlib import contextmanager
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception
from tenacity import RetryCallState, retry, stop_after_attempt, wait_fixed, retry_if_exception
import logging
from tqdm.asyncio import tqdm

Expand All @@ -25,9 +25,13 @@ def should_retry(e: BaseException) -> bool:
# Seems to be just a bug in the backend
# https://github.com/encode/httpx/discussions/2941
return True
elif isinstance(e, HTTPStatusError) and e.response.status_code == 500 and e.response.json()["message"] == "auth_remote_too_late":
elif isinstance(e, HTTPStatusError) and e.response.status_code == 500:
message = e.response.json()["message"]
if message == "auth_remote_too_late":
return True
if message == "auth_remote_signature_check_failed":
return True
# These errors are caused by lag between creating the response and it being received
return True
return False


Expand All @@ -40,6 +44,13 @@ def url_without_scheme(url: str) -> str:
"""
return unquote(urlunparse(urlparse(url)._replace(scheme="")).lstrip("/"))

def exception_to_message(e: BaseException) -> str:
if isinstance(e, HTTPStatusError):
return f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}."
elif isinstance(e, RequestError):
return f"Request failed for request {e.request.method} {e.request.url}. {repr(e)}"
else:
return repr(e)

@contextmanager
def raise_status():
Expand All @@ -49,16 +60,8 @@ def raise_status():
"""
try:
yield
except HTTPStatusError as e:
raise Exception(
f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}"
) from e
except RequestError as e:
# TODO: check for SSL read error
raise Exception(
f"Request failed for request {e.request.method} {e.request.url}"
) from e

except BaseException as e:
raise Exception(exception_to_message(e)) from e

async def yield_chunks(path: Path, chunk_size: int) -> AsyncIterator[Tuple[bytes, int]]:
"""
Expand Down Expand Up @@ -166,11 +169,20 @@ async def _sign_send(self, request: Request) -> Any:
with raise_status():
return await self._sign_send_inner(request)

@staticmethod
def on_retry(state: RetryCallState) -> None:
message = str(state.outcome)
if state.outcome is not None and state.outcome.failed:
e = state.outcome.exception()
message = exception_to_message(e)

Check failure on line 177 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.9)

Argument of type "BaseException | None" cannot be assigned to parameter "e" of type "BaseException" in function "exception_to_message"   Type "BaseException | None" is incompatible with type "BaseException"     "None" is incompatible with "BaseException" (reportArgumentType)

Check failure on line 177 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.10)

Argument of type "BaseException | None" cannot be assigned to parameter "e" of type "BaseException" in function "exception_to_message"   Type "BaseException | None" is incompatible with type "BaseException"     "None" is incompatible with "BaseException" (reportArgumentType)

Check failure on line 177 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.11)

Argument of type "BaseException | None" cannot be assigned to parameter "e" of type "BaseException" in function "exception_to_message"   Type "BaseException | None" is incompatible with type "BaseException"     "None" is incompatible with "BaseException" (reportArgumentType)

Check failure on line 177 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.12)

Argument of type "BaseException | None" cannot be assigned to parameter "e" of type "BaseException" in function "exception_to_message"   Type "BaseException | None" is incompatible with type "BaseException"     "None" is incompatible with "BaseException" (reportArgumentType)

logger.warn(f"Attempt {state.attempt_number}. {message}")

@retry(
retry=retry_if_exception(should_retry),
wait=wait_fixed(0.1),
stop=stop_after_attempt(5),
before_sleep=lambda x: logger.warn(f"Attempt {x.attempt_number}.{x.outcome}")
before_sleep=on_retry
)
async def _sign_send_inner(self, request: Request) -> Any:
# Needs to be a separate function to handle retry policy correctly
Expand Down Expand Up @@ -313,19 +325,14 @@ async def create_guest(self, body: request.Guest) -> response.Guest:
self.http_client.build_request("POST", f"{self.base_url}/guest", json=body)
)

async def _files_from_token(self, token: str) -> Set[int]:
async def _files_from_token(self, token: str) -> Iterable[DownloadFile]:
"""
Internal function that returns a list of file IDs for a given guest token
"""
download_page = await self.http_client.get(
"https://filesender.aarnet.edu.au", params={"s": "download", "token": token}
)
files: Set[int] = set()
for file in BeautifulSoup(download_page.content, "html.parser").find_all(
class_="file"
):
files.add(int(file.attrs["data-id"]))
return files
return files_from_page(download_page.content)

async def download_files(
self,
Expand All @@ -342,12 +349,12 @@ async def download_files(
out_dir: The path to write the downloaded files.
"""

file_ids = await self._files_from_token(token)
file_meta = await self._files_from_token(token)

async def _download_args() -> AsyncIterator[Tuple[str, Any, Path]]:
async def _download_args() -> AsyncIterator[Tuple[str, Any, Path, int, str]]:
"Yields tuples of arguments to pass to download_file"
for file_id in file_ids:
yield token, file_id, out_dir
for file in file_meta:
yield token, file["id"], out_dir, file["size"], file["name"]

# Each file is downloaded in parallel
# Pyright messes this up
Expand All @@ -358,8 +365,8 @@ async def download_file(
token: str,
file_id: int,
out_dir: Path,
key: Optional[bytes] = None,
algorithm: Optional[str] = None,
file_size: int | float = float("inf"),

Check failure on line 368 in filesender/api.py

View workflow job for this annotation

GitHub Actions / build (3.9)

Alternative syntax for unions requires Python 3.10 or newer (reportGeneralTypeIssues)
file_name: Optional[str] = None
) -> None:
"""
Downloads a single file.
Expand All @@ -368,23 +375,33 @@ async def download_file(
token: Obtained from the transfer email. The same as [`GuestAuth`][filesender.GuestAuth]'s `guest_token`.
file_id: A single file ID indicating the file to be downloaded.
out_dir: The path to write the downloaded file.
file_size: The file size in bytes, optionally
file_name: The file name of the file being downloaded. This will impact the name by which it's saved.
"""
download_endpoint = urlunparse(
urlparse(self.base_url)._replace(path="/download.php")
)
async with self.http_client.stream(
"GET", download_endpoint, params={"files_ids": file_id, "token": token}
) as res:
for content_param in res.headers["Content-Disposition"].split(";"):
if "filename" in content_param:
filename = content_param.split("=")[1].lstrip('"').rstrip('"')
break
else:
raise Exception("No filename found")

async with aiofiles.open(out_dir / filename, "wb") as fp:
async for chunk in res.aiter_raw(chunk_size=8192):
await fp.write(chunk)
# Determine filename from response, if not provided
if file_name is None:
for content_param in res.headers["Content-Disposition"].split(";"):
if "filename" in content_param:
file_name = content_param.split("=")[1].lstrip('"').rstrip('"')
break
else:
raise Exception("No filename found")

chunk_size = 8192
chunk_size_mb = chunk_size / 1024 / 1024
with tqdm(desc=file_name, unit="MB", total=int(file_size / 1024 / 1024)) as progress:
async with aiofiles.open(out_dir / file_name, "wb") as fp:
# We can't add the total here, because we don't know it:
# https://github.com/filesender/filesender/issues/1555
async for chunk in res.aiter_raw(chunk_size=chunk_size):
await fp.write(chunk)
progress.update(chunk_size_mb)

async def get_server_info(self) -> response.ServerInfo:
"""
Expand Down
50 changes: 50 additions & 0 deletions filesender/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Iterable, TypedDict

from bs4 import BeautifulSoup


class DownloadFile(TypedDict):
client_entropy: str
encrypted: str
encrypted_size: int
fileaead: str
fileiv: str
id: int
key_salt: str
key_version: int
mime: str
#: filename
name: str
password_encoding: str
password_hash_iterations: int
password_version: int
size: int
transferid: int

def files_from_page(content: bytes) -> Iterable[DownloadFile]:
"""
Yields dictionaries describing the files listed on a FileSender web page
Params:
content: The HTML content of the FileSender download page
"""
for file in BeautifulSoup(content, "html.parser").find_all(
class_="file"
):
yield {
"client_entropy": file.attrs[f"data-client-entropy"],
"encrypted": file.attrs["data-encrypted"],
"encrypted_size": int(file.attrs["data-encrypted-size"]),
"fileaead": file.attrs["data-fileaead"],
"fileiv": file.attrs["data-fileiv"],
"id": int(file.attrs["data-id"]),
"key_salt": file.attrs["data-key-salt"],
"key_version": int(file.attrs["data-key-version"]),
"mime": file.attrs["data-mime"],
"name": file.attrs["data-name"],
"password_encoding": file.attrs["data-password-encoding"],
"password_hash_iterations": int(file.attrs["data-password-hash-iterations"]),
"password_version": int(file.attrs["data-password-version"]),
"size": int(file.attrs["data-size"]),
"transferid": int(file.attrs["data-transferid"]),
}
28 changes: 28 additions & 0 deletions filesender/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from click import ParamType, Context, Parameter
from enum import Enum

class LogLevel(Enum):
NOTSET = 0
DEBUG = 10
VERBOSE = 15
INFO = 20
WARNING = 30
ERROR = 40
CRITICAL = 50

class LogParam(ParamType):
name = "LogParam"

def convert(self, value: int | str, param: Parameter | None, ctx: Context | None) -> int:
if isinstance(value, int):
return value

# Convert string representation to int
if not hasattr(LogLevel, value):
self.fail(f"{value!r} is not a valid log level", param, ctx)

return LogLevel[value].value

def get_metavar(self, param: Parameter) -> str | None:
# Print out the choices
return "|".join(LogLevel._member_map_)
34 changes: 17 additions & 17 deletions filesender/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,20 @@ def version_callback(value: bool):

@app.callback(context_settings=context)
def common_args(
base_url: Annotated[str, Option(help="The URL of the FileSender REST API")],
context: Context,
base_url: Annotated[str, Option(help="The URL of the FileSender REST API")],
log_level: Annotated[
int, Option(click_type=LogParam(), help="Logging verbosity", )
] = LogLevel.INFO.value,
version: Annotated[
Optional[bool], Option("--version", callback=version_callback)
] = None,
log_level: Annotated[
LogLevel, Option("--log-level", click_type=LogParam(), help="Logging verbosity")
] = LogLevel.WARNING
] = None
):
context.obj = {
"base_url": base_url
}
logging.basicConfig(
level=log_level.value, format= "%(message)s", datefmt="[%X]", handlers=[RichHandler()]
level=log_level, format= "%(message)s", datefmt="[%X]", handlers=[RichHandler()]
)


Expand Down Expand Up @@ -110,7 +110,7 @@ def invite(
}
}
}))
logger.info(result)
logger.log(LogLevel.VERBOSE.value, result)
logger.info("Invitation successfully sent")

@app.command(context_settings=context)
Expand All @@ -120,8 +120,8 @@ async def upload_voucher(
guest_token: Annotated[str, Option(help="The guest token. This is the part of the upload URL after 'vid='")],
email: Annotated[str, Option(help="The email address that was invited to upload files")],
context: Context,
concurrent_files: ConcurrentFiles = None,
concurrent_chunks: ConcurrentChunks = None,
concurrent_files: ConcurrentFiles = 1,
concurrent_chunks: ConcurrentChunks = 2,
chunk_size: ChunkSize = None,
):
"""
Expand All @@ -138,8 +138,8 @@ async def upload_voucher(
await auth.prepare(client.http_client)
await client.prepare()
result: Transfer = await client.upload_workflow(files, {"from": email, "recipients": []})
logger.info(result)
logger.info("Upload completed successfully")
logger.log(LogLevel.VERBOSE.value, result)
logger.log(LogLevel.INFO.value, "Upload completed successfully")

@app.command(context_settings=context)
@typer_async
Expand All @@ -149,8 +149,8 @@ async def upload(
files: UploadFiles,
recipients: Annotated[List[str], Option(show_default=False, help="One or more email addresses to send the files")],
context: Context,
concurrent_files: ConcurrentFiles = None,
concurrent_chunks: ConcurrentChunks = None,
concurrent_files: ConcurrentFiles = 1,
concurrent_chunks: ConcurrentChunks = 2,
chunk_size: ChunkSize = None,
delay: Delay = 0
):
Expand All @@ -170,8 +170,8 @@ async def upload(
)
await client.prepare()
result: Transfer = await client.upload_workflow(files, {"recipients": recipients, "from": username})
logger.info(result)
logger.info("Upload completed successfully")
logger.log(LogLevel.VERBOSE.value, result)
logger.log(LogLevel.INFO.value, "Upload completed successfully")

@app.command(context_settings=context)
def download(
Expand All @@ -188,7 +188,7 @@ def download(
token=token,
out_dir=out_dir
))
print(f"Download completed successfully. Files can be found in {out_dir}")
logger.log(LogLevel.INFO.value, f"Download completed successfully. Files can be found in {out_dir}")

@app.command(context_settings=context)
@typer_async
Expand All @@ -198,7 +198,7 @@ async def server_info(
"""Prints out information about the FileSender server you are interfacing with"""
client = FileSenderClient(base_url=context.obj["base_url"])
result = await client.get_server_info()
print(result)
logger.log(LogLevel.INFO.value, result)

if __name__ == "__main__":
app()

0 comments on commit 100541a

Please sign in to comment.