From 36f951eb7ec7f750d0dde673a56dde30c6f3b049 Mon Sep 17 00:00:00 2001 From: datawhores Date: Thu, 4 Jul 2024 15:40:57 -0500 Subject: [PATCH] use disk write to measure overall speed and increase max chunk size --- ofscraper/classes/progress/transfercol.py | 3 +- ofscraper/const/values/req/req.py | 6 +-- ofscraper/utils/system/speed.py | 63 +++++++++++------------ 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/ofscraper/classes/progress/transfercol.py b/ofscraper/classes/progress/transfercol.py index ed0dc6f0f..4349026bb 100644 --- a/ofscraper/classes/progress/transfercol.py +++ b/ofscraper/classes/progress/transfercol.py @@ -4,6 +4,7 @@ from rich.text import Text import ofscraper.utils.live.progress as progress_utils from rich.filesize import decimal +from ofscraper.utils.system.speed import get_download_speed class OverallTransferSpeedColumn(TransferSpeedColumn): """Renders human readable transfer speed.""" @@ -12,7 +13,7 @@ def __init__(self, *args, **kwargs): self._process_list=None def render(self, task) -> Text: """Show data transfer speed.""" - speed = self._get_curr_speed_helper(task) + speed = get_download_speed() if not speed: return Text("?", style="progress.data.speed") data_speed = decimal(int(speed)) diff --git a/ofscraper/const/values/req/req.py b/ofscraper/const/values/req/req.py index cf5de1997..72df0de30 100644 --- a/ofscraper/const/values/req/req.py +++ b/ofscraper/const/values/req/req.py @@ -10,7 +10,7 @@ PROXY = None PROXY_MOUNTS = None PROXY_AUTH = None -MAX_CHUNK_SIZE = 1024 * 1024 * 50 +MAX_CHUNK_SIZE = 1024 * 1024 * 100 MIN_CHUNK_SIZE = 4 * 1024 CHUNK_UPDATE_COUNT = 12 CHUNK_SIZE_UPDATE_COUNT=15 @@ -50,7 +50,7 @@ # ideal chunk -CHUNK_MEMORY_SPLIT = 128 -CHUNK_FILE_SPLIT = 128 +CHUNK_MEMORY_SPLIT = 64 +CHUNK_FILE_SPLIT = 64 MAX_READ_SIZE = 1024 * 1024 * 5 diff --git a/ofscraper/utils/system/speed.py b/ofscraper/utils/system/speed.py index a6834a1e8..342a19489 100644 --- a/ofscraper/utils/system/speed.py +++ b/ofscraper/utils/system/speed.py @@ -1,5 +1,5 @@ import psutil -import time +import arrow from ofscraper.utils.system.system import get_all_ofscrapers_processes DOWNLOAD_OBJ=None @@ -8,7 +8,7 @@ def get_download_speed(): if not DOWNLOAD_OBJ: pids=list(map(lambda x:x.pid,get_all_ofscrapers_processes())) DOWNLOAD_OBJ=MultiProcessDownloadSpeed(pids) - return DOWNLOAD_OBJ.get_download_speed() + return DOWNLOAD_OBJ.speed class MultiProcessDownloadSpeed: @@ -22,9 +22,12 @@ class MultiProcessDownloadSpeed: def __init__(self, pids): self.pids = pids self.previous_stats = {pid: None for pid in pids} # Stores previous stats per pid - self.previous_time = None # Stores previous time of measurement - - def get_download_speed(self): + self.previous_time = {pid: None for pid in pids} + self.previous_speed = {pid:0 for pid in pids} # Stores previous time of measurement + self._speed=None + self.previous_run=None + @property + def speed(self): """ Calculates and returns the total download speed (received bytes per second) for all processes in the pids list. @@ -35,41 +38,37 @@ def get_download_speed(self): """ try: # Get current network interface stats - current_stats = psutil.net_io_counters() - current_time = time.time() - # Check if this is the first call (no previous stats) - if self.previous_time is None: - self.previous_stats = {pid: current_stats for pid in self.pids} - self.previous_time = current_time - return 0 - - # Calculate time difference - time_delta = current_time - self.previous_time # Calculate total received bytes since last call - total_received_bytes = 0 + total_bytes_second = 0 for pid in self.pids: try: - if pid in current_stats: # Check if process is still running - previous_stats = self.previous_stats[pid] - total_received_bytes += current_stats.bytes_recv - previous_stats.bytes_recv - self.previous_stats[pid] = current_stats # Update previous stats - else: - print(f"Process {pid} not found. Removing from list.") - self.pids.remove(pid) - if not self.pids: # No processes left, stop tracking - return None - except Exception as E: - print(E) + if not psutil.pid_exists(pid): + continue # Check if process is still running + process=psutil.Process(pid) + curr_stats=process.io_counters() + curr_time=arrow.now().float_timestamp - # Update previous time for next call - self.previous_time = current_time + if not self.previous_time[pid] or not self.previous_stats[pid]: + self.previous_stats[pid]=curr_stats + self.previous_time[pid] = curr_time + continue + previous_stats = self.previous_stats[pid] + previous_time=self.previous_time[pid] + if curr_time-previous_time<1.6: + total_bytes_second=total_bytes_second+self.previous_speed[pid] + else: + self.previous_stats[pid] = curr_stats # Update previous stats + self.previous_time[pid] = curr_time # Update time stats + new_speed=(curr_stats.write_bytes - previous_stats.write_bytes)/(curr_time-previous_time) or self.previous_speed[pid] + total_bytes_second= total_bytes_second+new_speed + self.previous_speed[pid]=new_speed - # Calculate total download speed in bytes per second - total_download_speed = total_received_bytes / time_delta - return total_download_speed + except Exception as E: + print(E) + return total_bytes_second except (psutil.NoSuchProcess, psutil.AccessDenied) as e: print(f"Error getting download speed: {e}") return None \ No newline at end of file