From 054bd70581c9535083c231ea7413336b52744dd8 Mon Sep 17 00:00:00 2001 From: datawhores Date: Sun, 17 Dec 2023 10:44:23 -0600 Subject: [PATCH] remove misc --- ofscraper/commands/check.py | 16 +- ofscraper/commands/manual.py | 7 +- ofscraper/commands/scraper.py | 11 +- ofscraper/download/download.py | 231 ++++++------------------ ofscraper/download/downloadnormal.py | 171 ++++++++++++++++++ ofscraper/utils/{misc.py => network.py} | 49 ----- 6 files changed, 244 insertions(+), 241 deletions(-) create mode 100644 ofscraper/download/downloadnormal.py rename ofscraper/utils/{misc.py => network.py} (62%) diff --git a/ofscraper/commands/check.py b/ofscraper/commands/check.py index 3349907d1..6ac5d5dc8 100644 --- a/ofscraper/commands/check.py +++ b/ofscraper/commands/check.py @@ -21,12 +21,12 @@ import ofscraper.commands.manual as manual import ofscraper.constants as constants import ofscraper.db.operations as operations -import ofscraper.download.download as download +import ofscraper.download.downloadnormal as downloadnormal import ofscraper.utils.args as args_ import ofscraper.utils.auth as auth import ofscraper.utils.config as config_ import ofscraper.utils.console as console_ -import ofscraper.utils.misc as misc +import ofscraper.utils.network as network from ..utils.paths import getcachepath @@ -58,7 +58,7 @@ def process_download_cart(): global app while app and not app.row_queue.empty(): if process_download_cart.counter == 0: - if not misc.check_cdm(): + if not network.check_cdm(): log.info( "error was raised by cdm checker\nncdm will not be check again\n\n" ) @@ -104,7 +104,7 @@ def process_download_cart(): operations.create_tables(model_id=model_id, username=username) operations.create_backup(model_id, username) operations.write_profile_table(model_id=model_id, username=username) - values = download.process_dicts( + values = downloadnormal.process_dicts( username, model_id, [media], @@ -199,7 +199,7 @@ def post_checker(): ROWS.extend(row_gather(media, downloaded, user_name)) reset_url() set_count(ROWS) - misc.check_cdm() + network.check_cdm() thread_starters(ROWS) @@ -272,7 +272,7 @@ def message_checker(): reset_url() set_count(ROWS) - misc.check_cdm() + network.check_cdm() thread_starters(ROWS) @@ -298,7 +298,7 @@ def purchase_checker(): ROWS.extend(row_gather(media, downloaded, user_name)) reset_url() set_count(ROWS) - misc.check_cdm() + network.check_cdm() thread_starters(ROWS) @@ -326,7 +326,7 @@ def stories_checker(): ROWS.extend(row_gather(media, downloaded, user_name)) reset_url() set_count(ROWS) - misc.check_cdm() + network.check_cdm() thread_starters(ROWS) diff --git a/ofscraper/commands/manual.py b/ofscraper/commands/manual.py index 2b355c630..9362417a4 100644 --- a/ofscraper/commands/manual.py +++ b/ofscraper/commands/manual.py @@ -9,14 +9,15 @@ import ofscraper.classes.sessionbuilder as sessionbuilder import ofscraper.constants as constants import ofscraper.db.operations as operations +import ofscraper.download.download as download import ofscraper.utils.args as args_ -import ofscraper.utils.misc as misc +import ofscraper.utils.network as network import ofscraper.utils.of as of def manual_download(urls=None): log = logging.getLogger("shared") - misc.check_cdm() + network.check_cdm() media_dict = get_media_from_urls(urls) log.debug(f"Media dict length {len(list(media_dict.values()))}") args = args_.getargs() @@ -32,7 +33,7 @@ def manual_download(urls=None): operations.create_tables(model_id=model_id, username=username) operations.create_backup(model_id, username) operations.write_profile_table(model_id=model_id, username=username) - misc.download_picker(username, model_id, value) + download.download_picker(username, model_id, value) log.info(f"Finished") diff --git a/ofscraper/commands/scraper.py b/ofscraper/commands/scraper.py index 4873c3543..0f32b5b4c 100755 --- a/ofscraper/commands/scraper.py +++ b/ofscraper/commands/scraper.py @@ -26,6 +26,7 @@ import ofscraper.classes.placeholder as placeholder import ofscraper.constants as constants import ofscraper.db.operations as operations +import ofscraper.download.download as download import ofscraper.filters.media.main as filters import ofscraper.filters.models.selector as userselector import ofscraper.interaction.like as like @@ -35,7 +36,7 @@ import ofscraper.utils.config as config import ofscraper.utils.console as console import ofscraper.utils.exit as exit -import ofscraper.utils.misc as misc +import ofscraper.utils.network as network import ofscraper.utils.of as OF import ofscraper.utils.paths as paths import ofscraper.utils.profiles as profiles @@ -166,7 +167,7 @@ def process_post_user_first(): operations.create_tables(model_id=model_id, username=username) operations.create_backup(model_id, username) operations.write_profile_table(model_id=model_id, username=username) - misc.download_picker( + download.download_picker( username, model_id, value, @@ -201,7 +202,7 @@ def normal_post_process(): operations.create_backup(model_id, ele.name) operations.write_profile_table(model_id=model_id, username=ele.name) combined_urls = OF.process_areas(ele, model_id) - misc.download_picker(ele.name, model_id, combined_urls) + download.download_picker(ele.name, model_id, combined_urls) except Exception as e: if isinstance(e, KeyboardInterrupt): raise e @@ -233,7 +234,7 @@ def normal_post_process(): operations.create_tables(model_id=model_id, username=username) operations.create_backup(model_id, username) operations.write_profile_table(model_id=model_id, username=username) - misc.download_picker( + download.download_picker( username, model_id, value, @@ -450,7 +451,7 @@ def main(): print_start() paths.cleanup() paths.cleanDB() - misc.check_cdm() + network.check_cdm() scrapper() paths.cleanup() diff --git a/ofscraper/download/download.py b/ofscraper/download/download.py index ffce52d5f..d6cb5a482 100644 --- a/ofscraper/download/download.py +++ b/ofscraper/download/download.py @@ -1,177 +1,56 @@ -r""" - - _____ - _____/ ____\______ ________________ ____ ___________ - / _ \ __\/ ___// ___\_ __ \__ \ / _ \_/ __ \_ __ \ -( <_> ) | \___ \\ \___| | \// __ \( <_> ) ___/| | \/ - \____/|__| /____ >\___ >__| (____ /\____/ \___ >__| - \/ \/ \/ \/ -""" -import asyncio import logging -import pathlib -import traceback - -from rich.live import Live -from rich.table import Column - -try: - from win32_setctime import setctime # pylint: disable=import-error -except ModuleNotFoundError: - pass -import ofscraper.classes.placeholder as placeholder -import ofscraper.classes.sessionbuilder as sessionbuilder -import ofscraper.constants as constants -import ofscraper.download.common as common -import ofscraper.utils.console as console -import ofscraper.utils.exit as exit -import ofscraper.utils.paths as paths -import ofscraper.utils.stdout as stdout -from ofscraper.classes.semaphoreDelayed import semaphoreDelayed -from ofscraper.download.alt_download import alt_download -from ofscraper.download.common import ( - convert_num_bytes, - get_medialog, - reset_globals, - setDirectoriesDate, - setupProgressBar, -) -from ofscraper.download.main_download import main_download -from ofscraper.utils.run_async import run - - -@run -async def process_dicts(username, model_id, medialist): - with stdout.lowstdout(): - progress_group, overall_progress, job_progress = setupProgressBar() - # This need to be here: https://stackoverflow.com/questions/73599594/asyncio-works-in-python-3-10-but-not-in-python-3-8 - reset_globals() - common.log = logging.getLogger("ofscraper-download") - - try: - with Live( - progress_group, - refresh_per_second=constants.refreshScreen, - console=console.shared_console, - ): - aws = [] - photo_count = 0 - video_count = 0 - audio_count = 0 - skipped = 0 - forced_skipped = 0 - total_downloaded = 0 - sum = 0 - desc = "Progress: ({p_count} photos, {v_count} videos, {a_count} audios, {forced_skipped} skipped, {skipped} failed || {sumcount}/{mediacount}||{total_bytes_download}/{total_bytes})" - - async with sessionbuilder.sessionBuilder() as c: - for ele in medialist: - aws.append( - asyncio.create_task( - download(c, ele, model_id, username, job_progress) - ) - ) - task1 = overall_progress.add_task( - desc.format( - p_count=photo_count, - v_count=video_count, - a_count=audio_count, - skipped=skipped, - mediacount=len(medialist), - forced_skipped=forced_skipped, - sumcount=0, - total_bytes_download=0, - total_bytes=0, - ), - total=len(aws), - visible=True, - ) - for coro in asyncio.as_completed(aws): - try: - pack = await coro - common.log.debug(f"unpack {pack} count {len(pack)}") - media_type, num_bytes_downloaded = pack - except Exception as e: - common.log.traceback_(e) - common.log.traceback_(traceback.format_exc()) - media_type = "skipped" - num_bytes_downloaded = 0 - - total_downloaded += num_bytes_downloaded - total_bytes_downloaded = convert_num_bytes(total_downloaded) - total_bytes = convert_num_bytes(common.total_data) - if media_type == "images": - photo_count += 1 - - elif media_type == "videos": - video_count += 1 - elif media_type == "audios": - audio_count += 1 - elif media_type == "skipped": - skipped += 1 - elif media_type == "forced_skipped": - forced_skipped += 1 - sum += 1 - overall_progress.update( - task1, - description=desc.format( - p_count=photo_count, - v_count=video_count, - a_count=audio_count, - skipped=skipped, - forced_skipped=forced_skipped, - mediacount=len(medialist), - sumcount=sum, - total_bytes=total_bytes, - total_bytes_download=total_bytes_downloaded, - ), - refresh=True, - advance=1, - ) - overall_progress.remove_task(task1) - setDirectoriesDate() - common.log.warning( - f"[bold]{username}[/bold] ({photo_count+audio_count+video_count} downloads total [{video_count} videos, {audio_count} audios, {photo_count} photos] {forced_skipped} skipped, {skipped} failed)" - ) - return photo_count, video_count, audio_count, forced_skipped, skipped - - except Exception as E: - with exit.DelayedKeyboardInterrupt(): - raise E - finally: - await asyncio.get_event_loop().run_in_executor( - common.cache_thread, common.cache.close - ) - common.cache_thread.shutdown() - - -async def download(c, ele, model_id, username, progress): - async with common.maxfile_sem: - with paths.set_directory( - placeholder.Placeholders().getmediadir(ele, username, model_id) - ): - try: - if ele.url: - return await main_download( - c, - ele, - pathlib.Path(".").absolute(), - username, - model_id, - progress, - ) - elif ele.mpd: - return await alt_download( - c, - ele, - pathlib.Path(".").absolute(), - username, - model_id, - progress, - ) - except Exception as E: - common.log.debug(f"{get_medialog(ele)} exception {E}") - common.log.debug( - f"{get_medialog(ele)} exception {traceback.format_exc()}" - ) - return "skipped", 0 +import time + +import httpx + +import ofscraper.db.operations as operations +import ofscraper.download.downloadbatch as batchdownloader +import ofscraper.download.downloadnormal as normaldownloader +import ofscraper.utils.args as args_ +import ofscraper.utils.config as config_ +import ofscraper.utils.separate as seperate +import ofscraper.utils.system as system + + +def medialist_filter(medialist, model_id, username): + log = logging.getLogger("shared") + if not args_.getargs().dupe: + media_ids = set( + operations.get_media_ids_downloaded(model_id=model_id, username=username) + ) + log.debug( + f"Number of unique media ids in database for {username}: {len(media_ids)}" + ) + medialist = seperate.separate_by_id(medialist, media_ids) + log.debug(f"Number of new mediaids with dupe ids removed: {len(medialist)}") + medialist = seperate.seperate_avatars(medialist) + log.debug(f"Removed previously downloaded avatars/headers") + log.debug(f"Final Number of media to download {len(medialist)}") + + else: + log.info(f"forcing all downloads media count {len(medialist)}") + return medialist + + +def download_picker(username, model_id, medialist): + medialist = medialist_filter(medialist, model_id, username) + + if len(medialist) == 0: + logging.getLogger("shared").error( + f"[bold]{username}[/bold] ({0} photos, {0} videos, {0} audios, {0} skipped, {0} failed)" + ) + return 0, 0, 0, 0, 0 + elif ( + system.getcpu_count() > 1 + and ( + len(medialist) >= config_.get_download_semaphores(config_.read_config()) * 5 + ) + and ( + args_.getargs().downloadthreads + or config_.get_threads(config_.read_config()) + ) + > 0 + ): + return batchdownloader.process_dicts(username, model_id, medialist) + else: + return normaldownloader.process_dicts(username, model_id, medialist) diff --git a/ofscraper/download/downloadnormal.py b/ofscraper/download/downloadnormal.py new file mode 100644 index 000000000..127e05a66 --- /dev/null +++ b/ofscraper/download/downloadnormal.py @@ -0,0 +1,171 @@ +r""" + + _____ + _____/ ____\______ ________________ ____ ___________ + / _ \ __\/ ___// ___\_ __ \__ \ / _ \_/ __ \_ __ \ +( <_> ) | \___ \\ \___| | \// __ \( <_> ) ___/| | \/ + \____/|__| /____ >\___ >__| (____ /\____/ \___ >__| + \/ \/ \/ \/ +""" +import asyncio +import logging +import pathlib +import traceback + +from rich.live import Live + +import ofscraper.classes.placeholder as placeholder +import ofscraper.classes.sessionbuilder as sessionbuilder +import ofscraper.constants as constants +import ofscraper.download.common as common +import ofscraper.utils.console as console +import ofscraper.utils.exit as exit +import ofscraper.utils.paths as paths +import ofscraper.utils.stdout as stdout +from ofscraper.download.alt_download import alt_download +from ofscraper.download.common import ( + convert_num_bytes, + get_medialog, + reset_globals, + setDirectoriesDate, + setupProgressBar, +) +from ofscraper.download.main_download import main_download +from ofscraper.utils.run_async import run + + +@run +async def process_dicts(username, model_id, medialist): + with stdout.lowstdout(): + progress_group, overall_progress, job_progress = setupProgressBar() + # This need to be here: https://stackoverflow.com/questions/73599594/asyncio-works-in-python-3-10-but-not-in-python-3-8 + reset_globals() + common.log = logging.getLogger("ofscraper-download") + + try: + with Live( + progress_group, + refresh_per_second=constants.refreshScreen, + console=console.shared_console, + ): + aws = [] + photo_count = 0 + video_count = 0 + audio_count = 0 + skipped = 0 + forced_skipped = 0 + total_downloaded = 0 + sum = 0 + desc = "Progress: ({p_count} photos, {v_count} videos, {a_count} audios, {forced_skipped} skipped, {skipped} failed || {sumcount}/{mediacount}||{total_bytes_download}/{total_bytes})" + + async with sessionbuilder.sessionBuilder() as c: + for ele in medialist: + aws.append( + asyncio.create_task( + download(c, ele, model_id, username, job_progress) + ) + ) + task1 = overall_progress.add_task( + desc.format( + p_count=photo_count, + v_count=video_count, + a_count=audio_count, + skipped=skipped, + mediacount=len(medialist), + forced_skipped=forced_skipped, + sumcount=0, + total_bytes_download=0, + total_bytes=0, + ), + total=len(aws), + visible=True, + ) + for coro in asyncio.as_completed(aws): + try: + pack = await coro + common.log.debug(f"unpack {pack} count {len(pack)}") + media_type, num_bytes_downloaded = pack + except Exception as e: + common.log.traceback_(e) + common.log.traceback_(traceback.format_exc()) + media_type = "skipped" + num_bytes_downloaded = 0 + + total_downloaded += num_bytes_downloaded + total_bytes_downloaded = convert_num_bytes(total_downloaded) + total_bytes = convert_num_bytes(common.total_data) + if media_type == "images": + photo_count += 1 + + elif media_type == "videos": + video_count += 1 + elif media_type == "audios": + audio_count += 1 + elif media_type == "skipped": + skipped += 1 + elif media_type == "forced_skipped": + forced_skipped += 1 + sum += 1 + overall_progress.update( + task1, + description=desc.format( + p_count=photo_count, + v_count=video_count, + a_count=audio_count, + skipped=skipped, + forced_skipped=forced_skipped, + mediacount=len(medialist), + sumcount=sum, + total_bytes=total_bytes, + total_bytes_download=total_bytes_downloaded, + ), + refresh=True, + advance=1, + ) + overall_progress.remove_task(task1) + setDirectoriesDate() + common.log.warning( + f"[bold]{username}[/bold] ({photo_count+audio_count+video_count} downloads total [{video_count} videos, {audio_count} audios, {photo_count} photos] {forced_skipped} skipped, {skipped} failed)" + ) + return photo_count, video_count, audio_count, forced_skipped, skipped + + except Exception as E: + with exit.DelayedKeyboardInterrupt(): + raise E + finally: + await asyncio.get_event_loop().run_in_executor( + common.cache_thread, common.cache.close + ) + common.cache_thread.shutdown() + + +async def download(c, ele, model_id, username, progress): + async with common.maxfile_sem: + with paths.set_directory( + placeholder.Placeholders().getmediadir(ele, username, model_id) + ): + try: + if ele.url: + return await main_download( + c, + ele, + pathlib.Path(".").absolute(), + username, + model_id, + progress, + ) + elif ele.mpd: + return await alt_download( + c, + ele, + pathlib.Path(".").absolute(), + username, + model_id, + progress, + ) + except Exception as E: + common.log.debug(f"{get_medialog(ele)} exception {E}") + common.log.debug( + f"{get_medialog(ele)} exception {traceback.format_exc()}" + ) + return "skipped", 0 diff --git a/ofscraper/utils/misc.py b/ofscraper/utils/network.py similarity index 62% rename from ofscraper/utils/misc.py rename to ofscraper/utils/network.py index 582d9a097..b6198dd2b 100644 --- a/ofscraper/utils/misc.py +++ b/ofscraper/utils/network.py @@ -5,59 +5,10 @@ import ofscraper.classes.sessionbuilder as sessionbuilder import ofscraper.constants as constants -import ofscraper.db.operations as operations -import ofscraper.download.download as download -import ofscraper.download.downloadbatch as batchdownloader import ofscraper.utils.args as args_ import ofscraper.utils.config as config_ import ofscraper.utils.console as console_ -import ofscraper.utils.separate as seperate import ofscraper.utils.stdout as stdout -import ofscraper.utils.system as system - - -def medialist_filter(medialist, model_id, username): - log = logging.getLogger("shared") - if not args_.getargs().dupe: - media_ids = set( - operations.get_media_ids_downloaded(model_id=model_id, username=username) - ) - log.debug( - f"Number of unique media ids in database for {username}: {len(media_ids)}" - ) - medialist = seperate.separate_by_id(medialist, media_ids) - log.debug(f"Number of new mediaids with dupe ids removed: {len(medialist)}") - medialist = seperate.seperate_avatars(medialist) - log.debug(f"Removed previously downloaded avatars/headers") - log.debug(f"Final Number of media to download {len(medialist)}") - - else: - log.info(f"forcing all downloads media count {len(medialist)}") - return medialist - - -def download_picker(username, model_id, medialist): - medialist = medialist_filter(medialist, model_id, username) - - if len(medialist) == 0: - logging.getLogger("shared").error( - f"[bold]{username}[/bold] ({0} photos, {0} videos, {0} audios, {0} skipped, {0} failed)" - ) - return 0, 0, 0, 0, 0 - elif ( - system.getcpu_count() > 1 - and ( - len(medialist) >= config_.get_download_semaphores(config_.read_config()) * 5 - ) - and ( - args_.getargs().downloadthreads - or config_.get_threads(config_.read_config()) - ) - > 0 - ): - return batchdownloader.process_dicts(username, model_id, medialist) - else: - return download.process_dicts(username, model_id, medialist) def check_cdm():