Skip to content

Commit

Permalink
use disk write to measure overall speed
Browse files Browse the repository at this point in the history
and increase max chunk size
  • Loading branch information
datawhores committed Jul 4, 2024
1 parent 8783eac commit 36f951e
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 36 deletions.
3 changes: 2 additions & 1 deletion ofscraper/classes/progress/transfercol.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from rich.text import Text
import ofscraper.utils.live.progress as progress_utils
from rich.filesize import decimal
from ofscraper.utils.system.speed import get_download_speed

class OverallTransferSpeedColumn(TransferSpeedColumn):
"""Renders human readable transfer speed."""
Expand All @@ -12,7 +13,7 @@ def __init__(self, *args, **kwargs):
self._process_list=None
def render(self, task) -> Text:
"""Show data transfer speed."""
speed = self._get_curr_speed_helper(task)
speed = get_download_speed()
if not speed:
return Text("?", style="progress.data.speed")
data_speed = decimal(int(speed))
Expand Down
6 changes: 3 additions & 3 deletions ofscraper/const/values/req/req.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
PROXY = None
PROXY_MOUNTS = None
PROXY_AUTH = None
MAX_CHUNK_SIZE = 1024 * 1024 * 50
MAX_CHUNK_SIZE = 1024 * 1024 * 100
MIN_CHUNK_SIZE = 4 * 1024
CHUNK_UPDATE_COUNT = 12
CHUNK_SIZE_UPDATE_COUNT=15
Expand Down Expand Up @@ -50,7 +50,7 @@


# ideal chunk
CHUNK_MEMORY_SPLIT = 128
CHUNK_FILE_SPLIT = 128
CHUNK_MEMORY_SPLIT = 64
CHUNK_FILE_SPLIT = 64

MAX_READ_SIZE = 1024 * 1024 * 5
63 changes: 31 additions & 32 deletions ofscraper/utils/system/speed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import psutil
import time
import arrow
from ofscraper.utils.system.system import get_all_ofscrapers_processes

DOWNLOAD_OBJ=None
Expand All @@ -8,7 +8,7 @@ def get_download_speed():
if not DOWNLOAD_OBJ:
pids=list(map(lambda x:x.pid,get_all_ofscrapers_processes()))
DOWNLOAD_OBJ=MultiProcessDownloadSpeed(pids)
return DOWNLOAD_OBJ.get_download_speed()
return DOWNLOAD_OBJ.speed


class MultiProcessDownloadSpeed:
Expand All @@ -22,9 +22,12 @@ class MultiProcessDownloadSpeed:
def __init__(self, pids):
self.pids = pids
self.previous_stats = {pid: None for pid in pids} # Stores previous stats per pid
self.previous_time = None # Stores previous time of measurement

def get_download_speed(self):
self.previous_time = {pid: None for pid in pids}
self.previous_speed = {pid:0 for pid in pids} # Stores previous time of measurement
self._speed=None
self.previous_run=None
@property
def speed(self):
"""
Calculates and returns the total download speed (received bytes per second)
for all processes in the pids list.
Expand All @@ -35,41 +38,37 @@ def get_download_speed(self):
"""
try:
# Get current network interface stats
current_stats = psutil.net_io_counters()
current_time = time.time()

# Check if this is the first call (no previous stats)
if self.previous_time is None:
self.previous_stats = {pid: current_stats for pid in self.pids}
self.previous_time = current_time
return 0

# Calculate time difference
time_delta = current_time - self.previous_time

# Calculate total received bytes since last call
total_received_bytes = 0
total_bytes_second = 0
for pid in self.pids:
try:
if pid in current_stats: # Check if process is still running
previous_stats = self.previous_stats[pid]
total_received_bytes += current_stats.bytes_recv - previous_stats.bytes_recv
self.previous_stats[pid] = current_stats # Update previous stats
else:
print(f"Process {pid} not found. Removing from list.")
self.pids.remove(pid)
if not self.pids: # No processes left, stop tracking
return None
except Exception as E:
print(E)
if not psutil.pid_exists(pid):
continue # Check if process is still running
process=psutil.Process(pid)
curr_stats=process.io_counters()
curr_time=arrow.now().float_timestamp

# Update previous time for next call
self.previous_time = current_time
if not self.previous_time[pid] or not self.previous_stats[pid]:
self.previous_stats[pid]=curr_stats
self.previous_time[pid] = curr_time
continue
previous_stats = self.previous_stats[pid]
previous_time=self.previous_time[pid]
if curr_time-previous_time<1.6:
total_bytes_second=total_bytes_second+self.previous_speed[pid]
else:
self.previous_stats[pid] = curr_stats # Update previous stats
self.previous_time[pid] = curr_time # Update time stats
new_speed=(curr_stats.write_bytes - previous_stats.write_bytes)/(curr_time-previous_time) or self.previous_speed[pid]
total_bytes_second= total_bytes_second+new_speed
self.previous_speed[pid]=new_speed

# Calculate total download speed in bytes per second
total_download_speed = total_received_bytes / time_delta

return total_download_speed
except Exception as E:
print(E)
return total_bytes_second
except (psutil.NoSuchProcess, psutil.AccessDenied) as e:
print(f"Error getting download speed: {e}")
return None

0 comments on commit 36f951e

Please sign in to comment.