diff --git a/ofscraper/actions/process.py b/ofscraper/actions/process.py index 0652bb022..75baac370 100644 --- a/ofscraper/actions/process.py +++ b/ofscraper/actions/process.py @@ -170,6 +170,7 @@ def normal_post_process(): model_id = ele.id operations.table_init_create(model_id, ele.name) combined_urls, posts = asyncio.run(OF.process_areas(ele, model_id)) + return download.download_process( ele.name, model_id, combined_urls, posts=posts ) diff --git a/ofscraper/api/archive.py b/ofscraper/api/archive.py index 259157192..a71b9f8ce 100644 --- a/ofscraper/api/archive.py +++ b/ofscraper/api/archive.py @@ -322,7 +322,7 @@ async def scrape_archived_posts( scrape_archived_posts( c, model_id, - job_progress=None, + job_progress=job_progress, timestamp=posts[-1]["postedAtPrecise"], ) ) @@ -341,7 +341,7 @@ async def scrape_archived_posts( scrape_archived_posts( c, model_id, - job_progress=None, + job_progress=job_progress, timestamp=posts[-1]["postedAtPrecise"], required_ids=required_ids, ) @@ -361,5 +361,7 @@ async def scrape_archived_posts( finally: sem.release() - job_progress.remove_task(task) if job_progress and task else None + job_progress.remove_task( + task + ) if job_progress and task != None else None return posts, new_tasks diff --git a/ofscraper/api/highlights.py b/ofscraper/api/highlights.py index f5191eeef..1b5b063c3 100644 --- a/ofscraper/api/highlights.py +++ b/ofscraper/api/highlights.py @@ -213,7 +213,9 @@ async def scrape_stories(c, user_id, job_progress=None) -> list: finally: sem.release() - job_progress.remove_task(task) if job_progress and task else None + job_progress.remove_task( + task + ) if job_progress and task != None else None return stories, new_tasks @@ -382,9 +384,12 @@ async def get_highlight_list(model_id, c=None): async def get_highlights_via_list(highlightLists, c): + job_progress = None tasks = [] [ - tasks.append(asyncio.create_task(scrape_highlights(c, i, job_progress=None))) + tasks.append( + asyncio.create_task(scrape_highlights(c, i, job_progress=job_progress)) + ) for i in highlightLists ] @@ -487,7 +492,9 @@ async def scrape_highlight_list(c, user_id, job_progress=None, offset=0) -> list finally: sem.release() - job_progress.remove_task(task) if job_progress and task else None + job_progress.remove_task( + task + ) if job_progress and task != None else None return data, new_tasks @@ -540,7 +547,9 @@ async def scrape_highlights(c, id, job_progress=None) -> list: finally: sem.release() - job_progress.remove_task(task) if job_progress and task else None + job_progress.remove_task( + task + ) if job_progress and task != None else None return resp_data["stories"], new_tasks diff --git a/ofscraper/api/labels.py b/ofscraper/api/labels.py index 067a12ced..0f772ea00 100644 --- a/ofscraper/api/labels.py +++ b/ofscraper/api/labels.py @@ -173,7 +173,9 @@ async def scrape_labels(c, model_id, job_progress=None, offset=0): finally: sem.release() - job_progress.remove_task(task) if job_progress and task else None + job_progress.remove_task( + task + ) if job_progress and task != None else None @run @@ -344,7 +346,9 @@ async def scrape_labelled_posts(c, label, model_id, job_progress=None, offset=0) finally: sem.release() - job_progress.remove_task(task) if job_progress and task else None + job_progress.remove_task( + task + ) if job_progress and task != None else None return label, posts, new_tasks diff --git a/ofscraper/api/messages.py b/ofscraper/api/messages.py index 57f8aa334..f0b7723e5 100644 --- a/ofscraper/api/messages.py +++ b/ofscraper/api/messages.py @@ -136,7 +136,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None): if len(IDArray) <= 2: tasks.append( asyncio.create_task( - scrape_messages(c, model_id, job_progress, message_id=None) + scrape_messages(c, model_id, progress=job_progress, message_id=None) ) ) @@ -156,7 +156,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None): scrape_messages( c, model_id, - job_progress, + progress=job_progress, message_id=None, required_ids=set(splitArraysTime[0]), ) @@ -168,7 +168,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None): scrape_messages( c, model_id, - job_progress, + progress=job_progress, message_id=splitArraysID[0][0], required_ids=set(splitArraysTime[0]), ) @@ -181,7 +181,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None): scrape_messages( c, model_id, - job_progress, + progress=job_progress, required_ids=set(splitArraysTime[i]), message_id=splitArraysID[i - 1][-1], ) @@ -195,7 +195,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None): scrape_messages( c, model_id, - job_progress, + progress=job_progress, message_id=splitArraysID[-2][-1], ) ) @@ -206,7 +206,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None): scrape_messages( c, model_id, - job_progress, + progress=job_progress, message_id=splitArraysID[-1][-1], ) ) @@ -217,7 +217,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None): scrape_messages( c, model_id, - job_progress, + progress=job_progress, message_id=IDArray[0], required_ids=set(postedAtArray[1:]), ) @@ -282,7 +282,7 @@ async def get_messages(model_id, username, forced_after=None, c=None): global sem sem = sems.get_req_sem() global after - + job_progress = None tasks = [] responseArray = [] # require a min num of posts to be returned @@ -371,7 +371,7 @@ async def get_messages(model_id, username, forced_after=None, c=None): if len(IDArray) <= 2: tasks.append( asyncio.create_task( - scrape_messages(c, model_id, job_progress=None, message_id=None) + scrape_messages(c, model_id, progress=job_progress, message_id=None) ) ) diff --git a/ofscraper/api/paid.py b/ofscraper/api/paid.py index b57559cc5..c6f602f62 100644 --- a/ofscraper/api/paid.py +++ b/ofscraper/api/paid.py @@ -101,11 +101,14 @@ async def get_paid_posts(model_id, username, c=None): sem = sems.get_req_sem() responseArray = [] tasks = [] + job_progress = None # async with c or sessionbuilder.sessionBuilder( # limit=constants.getattr("API_MAX_CONNECTION") # ) as c: - tasks.append(asyncio.create_task(scrape_paid(c, username, job_progress=None))) + tasks.append( + asyncio.create_task(scrape_paid(c, username, job_progress=job_progress)) + ) while bool(tasks): new_tasks = [] try: diff --git a/ofscraper/api/pinned.py b/ofscraper/api/pinned.py index b1f5d43b5..7328c36b7 100644 --- a/ofscraper/api/pinned.py +++ b/ofscraper/api/pinned.py @@ -49,7 +49,7 @@ async def get_pinned_post(model_id, c=None): scrape_pinned_posts( c, model_id, - job_progress, + job_progress=job_progress, timestamp=read_args.retriveArgs().after.float_timestamp if read_args.retriveArgs().after else None, @@ -102,7 +102,9 @@ async def get_pinned_post(model_id, c=None): return list(outdict.values()) -async def scrape_pinned_posts(c, model_id, progress, timestamp=None, count=0) -> list: +async def scrape_pinned_posts( + c, model_id, job_progress=None, timestamp=None, count=0 +) -> list: global sem sem = semaphoreDelayed(constants.getattr("AlT_SEM")) posts = None diff --git a/ofscraper/api/timeline.py b/ofscraper/api/timeline.py index f9e56955f..f1ca349d7 100644 --- a/ofscraper/api/timeline.py +++ b/ofscraper/api/timeline.py @@ -159,8 +159,9 @@ async def scrape_timeline_posts( finally: sem.release() - job_progress.remove_task(task) if job_progress and task else None - + job_progress.remove_task( + task + ) if job_progress and task != None else None return posts, new_tasks @@ -172,7 +173,6 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N min_posts = 50 responseArray = [] page_count = 0 - counter = None if not read_args.retriveArgs().no_cache: oldtimeline = operations.get_timeline_postdates( model_id=model_id, username=username @@ -199,6 +199,7 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N """ ) filteredArray = list(filter(lambda x: x >= after, postedAtArray)) + filteredArray[1:] job_progress = progress_utils.timeline_progress overall_progress = progress_utils.overall_progress @@ -241,7 +242,7 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N scrape_timeline_posts( c, model_id, - job_progress=None, + job_progress=job_progress, timestamp=splitArrays[-2][-1], ) ) @@ -252,7 +253,7 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N scrape_timeline_posts( c, model_id, - job_progress=None, + job_progress=job_progress, timestamp=splitArrays[-1][-1], ) ) @@ -261,7 +262,9 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N else: tasks.append( asyncio.create_task( - scrape_timeline_posts(c, model_id, job_progress=None, timestamp=after) + scrape_timeline_posts( + c, model_id, job_progress=job_progress, timestamp=after + ) ) ) page_task = overall_progress.add_task( @@ -319,14 +322,12 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N @run async def get_timeline_media(model_id, username, forced_after=None, c=None): global sem - global data_queue - global counter + job_progress = None sem = sems.get_req_sem() tasks = [] min_posts = 50 responseArray = [] - data_queue = queue.Queue(maxsize=0) if not read_args.retriveArgs().no_cache: oldtimeline = operations.get_timeline_postdates( model_id=model_id, username=username @@ -369,7 +370,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None): scrape_timeline_posts( c, model_id, - job_progress=None, + job_progress=job_progress, required_ids=set(splitArrays[0]), timestamp=after, ) @@ -381,7 +382,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None): scrape_timeline_posts( c, model_id, - job_progress=None, + job_progress=job_progress, required_ids=set(splitArrays[i]), timestamp=splitArrays[i - 1][-1], ) @@ -395,7 +396,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None): scrape_timeline_posts( c, model_id, - job_progress=None, + job_progress=job_progress, timestamp=splitArrays[-2][-1], ) ) @@ -406,7 +407,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None): scrape_timeline_posts( c, model_id, - job_progress=None, + job_progress=job_progress, timestamp=splitArrays[-1][-1], ) ) @@ -415,7 +416,9 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None): else: tasks.append( asyncio.create_task( - scrape_timeline_posts(c, model_id, job_progress=None, timestamp=after) + scrape_timeline_posts( + c, model_id, job_progress=job_progress, timestamp=after + ) ) ) diff --git a/ofscraper/const/config.py b/ofscraper/const/config.py index 7a0f94257..d747d09e3 100644 --- a/ofscraper/const/config.py +++ b/ofscraper/const/config.py @@ -53,3 +53,4 @@ HASHED_DEFAULT = False EMPTY_MEDIA_DEFAULT = {} DEFAULT_LOG_LEVEL = "DEBUG" +INCLUDED_LABELS_ALL = False diff --git a/ofscraper/utils/args/areas.py b/ofscraper/utils/args/areas.py index b8e603d48..674443257 100644 --- a/ofscraper/utils/args/areas.py +++ b/ofscraper/utils/args/areas.py @@ -1,4 +1,5 @@ import ofscraper.utils.args.read as read_args +import ofscraper.utils.constants as const def get_like_area(): @@ -7,14 +8,19 @@ def get_like_area(): "Archived", "Timeline", "Pinned", - "Labels", ] + all_choices.append("Label") if const.getattr("INCLUDED_LABELS_ALL") else None if len(read_args.retriveArgs().like_area) == 0: post = set(read_args.retriveArgs().posts) else: post = set(read_args.retriveArgs().like_area) if "All" in post: post.update(set(all_choices)) + elif "Labels*" or "Labels+" in post: + post.update(set(all_choices)) + post.update({"Labels"}) + post.discard("Labels*") + post.discard("Laabels+") return list( filter( lambda x: x != "All" @@ -38,13 +44,14 @@ def get_download_area(): "Purchased", "Profile", ] + all_choices.append("Label") if const.getattr("INCLUDED_LABELS_ALL") else None if len(read_args.retriveArgs().download_area) == 0: post = set(read_args.retriveArgs().posts) else: post = set(read_args.retriveArgs().download_area) if "All" in post: post.update(set(all_choices)) - elif "Labels*" or "Labels+": + elif "Labels*" or "Labels+" in post: post.update(set(all_choices)) post.update({"Labels"}) post.discard("Labels*") diff --git a/ofscraper/utils/args/helpers.py b/ofscraper/utils/args/helpers.py index d5ed8feb9..e3d0ecfe6 100644 --- a/ofscraper/utils/args/helpers.py +++ b/ofscraper/utils/args/helpers.py @@ -86,7 +86,7 @@ def like_helper(x): choices = set(["All", "Archived", "Timeline", "Pinned", "Labels"]) if isinstance(x, str): words = re.split(",| ", x) - words = list(map(lambda x: re.sub("[^a-zA-Z-]", "", str.title(x)), words)) + words = list(map(lambda x: re.sub("[^a-zA-Z-\*\+]", "", str.title(x)), words)) if ( len(list(filter(lambda y: y not in choices and y[1:] not in choices, words))) > 0