Skip to content

Commit

Permalink
fix: broken large linux downloads and hanging processes
Browse files Browse the repository at this point in the history
  • Loading branch information
imLinguin committed Mar 2, 2024
1 parent b65d67b commit 8a32d87
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 5 deletions.
4 changes: 2 additions & 2 deletions gogdl/dl/managers/linux.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def download(self):
diff = BaseDiff()

final_files = list()
for file in new:
for i, file in enumerate(new):
# Prepare file for download
# Calculate data offsets
handler = None
Expand All @@ -254,7 +254,7 @@ def download(self):
print("Orphan file found")
continue

data_start = handler.start_of_archive_index + file.relative_local_file_offset + 34 + file.file_name_length + file.extra_field_length
data_start = handler.start_of_archive_index + file.file_data_offset
c_size = file.compressed_size
size = file.uncompressed_size
method = file.compression_method
Expand Down
11 changes: 11 additions & 0 deletions gogdl/dl/managers/task_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,18 +529,27 @@ def interrupt_shutdown(self):
self.shared_memory.close()
self.shared_memory.unlink()
self.shared_memory = None
self.manager.shutdown()


def shutdown(self):
self.logger.debug("Stopping progressbar")
self.progress.completed = True

# Clear speed queues
for q in [self.download_speed_updates, self.writer_speed_updates]:
while True:
try:
_ = q.get_nowait()
except Empty:
break

self.logger.debug("Sending terminate instruction to workers")
for _ in range(self.allowed_threads):
self.download_queue.put(generic.TerminateWorker())

self.writer_queue.put(generic.TerminateWorker())

for worker in self.download_workers:
worker.join(timeout=2)
if worker.is_alive():
Expand Down Expand Up @@ -568,6 +577,8 @@ def shutdown(self):
with self.shm_cond:
self.shm_cond.notify()

self.manager.shutdown()

try:
if os.path.exists(self.resume_file):
os.remove(self.resume_file)
Expand Down
26 changes: 23 additions & 3 deletions gogdl/dl/objects/linux.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from io import BytesIO
import stat


END_OF_CENTRAL_DIRECTORY = b"\x50\x4b\x05\x06"
Expand Down Expand Up @@ -93,6 +94,7 @@ def __init__(self, product):
self.extra_field: BytesIO
self.comment: bytes
self.last_byte: int
self.file_data_offset: int

@classmethod
def from_bytes(cls, data, product):
Expand All @@ -114,6 +116,7 @@ def from_bytes(cls, data, product):
cd_file.int_file_attrs = data[36:38]
cd_file.ext_file_attrs = data[38:42]
cd_file.relative_local_file_offset = int.from_bytes(data[42:46], "little")
cd_file.file_data_offset = 0

extra_field_start = 46 + cd_file.file_name_length
cd_file.file_name = bytes(data[46:extra_field_start]).decode()
Expand All @@ -128,13 +131,15 @@ def from_bytes(cls, data, product):
size = int.from_bytes(cd_file.extra_field.read(2), "little")

if id == 0x01:
field = BytesIO(cd_file.extra_field.read(size))
if cd_file.extra_field_length - cd_file.extra_field.tell() >= size:
field = BytesIO(cd_file.extra_field.read(size))
break

cd_file.extra_field.seek(size, 1)

if cd_file.extra_field_length - cd_file.extra_field.tell() > 0:
if cd_file.extra_field_length - cd_file.extra_field.tell() == 0:
break


if field:
if cd_file.uncompressed_size == 0xFFFFFFFF:
Expand All @@ -156,7 +161,7 @@ def from_bytes(cls, data, product):
return cd_file, comment_start + cd_file.file_comment_length

def is_symlink(self):
return (int.from_bytes(self.ext_file_attrs, "little") & 1 << 29) != 0
return stat.S_ISLNK(int.from_bytes(self.ext_file_attrs, "little") >> 16)

def as_dict(self):
return {'file_name': self.file_name, 'crc32': self.crc32, 'compressed_size': self.compressed_size, 'size': self.uncompressed_size, 'is_symlink': self.is_symlink()}
Expand Down Expand Up @@ -184,6 +189,15 @@ def from_bytes(cls, data, n, product):
cd_file, next_offset = central_dir.create_central_dir_file(data, product)
central_dir.files.append(cd_file)
data = data[next_offset:]
if record == 0:
continue

prev_i = record - 1
if not (prev_i >= 0 and prev_i < len(central_dir.files)):
continue
prev = central_dir.files[prev_i]
prev.file_data_offset = cd_file.relative_local_file_offset - prev.compressed_size

return central_dir

class Zip64EndOfCentralDirLocator:
Expand Down Expand Up @@ -323,8 +337,10 @@ def __find_end_of_cd(self):

end_of_cd_header_data_index = end_of_cd_data.find(END_OF_CENTRAL_DIRECTORY)
zip64_end_of_cd_locator_index = end_of_cd_data.find(ZIP_64_END_OF_CD_LOCATOR)
assert end_of_cd_header_data_index != -1
end_of_cd = EndOfCentralDir.from_bytes(end_of_cd_data[end_of_cd_header_data_index:])
if end_of_cd.central_directory_offset == 0xFFFFFFFF:
assert zip64_end_of_cd_locator_index != -1
# We need to find zip64 headers

zip64_end_of_cd_locator = Zip64EndOfCentralDirLocator.from_bytes(end_of_cd_data[zip64_end_of_cd_locator_index:])
Expand All @@ -345,9 +361,13 @@ def __find_central_directory(self):
size=self.size_of_central_directory,
)

assert central_directory_data[:4] == CENTRAL_DIRECTORY

self.central_directory = CentralDirectory.from_bytes(
central_directory_data, self.central_directory_records, self.product
)
last_entry = self.central_directory.files[-1]
last_entry.file_data_offset = self.central_directory_offset - last_entry.compressed_size


class LinuxFile:
Expand Down

0 comments on commit 8a32d87

Please sign in to comment.