From bc03f3878efde80fb4dc4f66bc9d08656d5cc584 Mon Sep 17 00:00:00 2001 From: datawhores Date: Mon, 11 Mar 2024 20:33:46 -0500 Subject: [PATCH] fix issue with check and manual mode cause by changes to api --- ofscraper/api/highlights.py | 22 +-- ofscraper/api/messages.py | 4 +- ofscraper/api/paid.py | 1 + ofscraper/api/timeline.py | 22 ++- ofscraper/commands/check.py | 364 +++++++++++++++++++---------------- ofscraper/commands/manual.py | 108 ++++++----- 6 files changed, 279 insertions(+), 242 deletions(-) diff --git a/ofscraper/api/highlights.py b/ofscraper/api/highlights.py index 6d8256c52..e3a904232 100644 --- a/ofscraper/api/highlights.py +++ b/ofscraper/api/highlights.py @@ -495,8 +495,8 @@ def get_highlightList(data): return [] -def get_individual_highlights(id, c=None): - return get_individual_stories(id, c) +def get_individual_highlights(id): + return get_individual_stories(id) # with c.requests(constants.getattr("highlightSPECIFIC").format(id))() as r: # if r.ok: # log.trace(f"highlight raw highlight individua; {r.json()}") @@ -508,12 +508,12 @@ def get_individual_highlights(id, c=None): def get_individual_stories(id, c=None): - # with c or sessionbuilder.sessionBuilder(backend="httpx") as c: - with c.requests(constants.getattr("storiesSPECIFIC").format(id))() as r: - if r.ok: - log.trace(f"highlight raw highlight individua; {r.json_()}") - return r.json() - else: - log.debug(f"[bold]highlight response status code:[/bold]{r.status}") - log.debug(f"[bold]highlightresponse:[/bold] {r.text_()}") - log.debug(f"[bold]highlight headers:[/bold] {r.headers}") + with sessionbuilder.sessionBuilder(backend="httpx") as c: + with c.requests(constants.getattr("storiesSPECIFIC").format(id))() as r: + if r.ok: + log.trace(f"highlight raw highlight individua; {r.json_()}") + return r.json() + else: + log.debug(f"[bold]highlight response status code:[/bold]{r.status}") + log.debug(f"[bold]highlightresponse:[/bold] {r.text_()}") + log.debug(f"[bold]highlight headers:[/bold] {r.headers}") diff --git a/ofscraper/api/messages.py b/ofscraper/api/messages.py index d1ca087e5..7d75f93b3 100644 --- a/ofscraper/api/messages.py +++ b/ofscraper/api/messages.py @@ -630,8 +630,8 @@ async def scrape_messages( return messages, new_tasks -def get_individual_post(model_id, postid, c=None): - with c or sessionbuilder.sessionBuilder( +def get_individual_post(model_id, postid): + with sessionbuilder.sessionBuilder( backend="httpx", ) as c: with c.requests( diff --git a/ofscraper/api/paid.py b/ofscraper/api/paid.py index eba372f8c..7cc77bf99 100644 --- a/ofscraper/api/paid.py +++ b/ofscraper/api/paid.py @@ -144,6 +144,7 @@ def set_check(unduped, model_id): cache.close() +@run async def scrape_paid(c, username, job_progress=None, offset=0): """Takes headers to access onlyfans as an argument and then checks the purchased content url to look for any purchased content. If it finds some it will return it as a list. diff --git a/ofscraper/api/timeline.py b/ofscraper/api/timeline.py index 743f44957..df5b4c065 100644 --- a/ofscraper/api/timeline.py +++ b/ofscraper/api/timeline.py @@ -455,16 +455,18 @@ def set_check(unduped, model_id, after): cache.close() -def get_individual_post(id, c=None): - # c=c or sessionbuilder.sessionBuilder(backend="httpx") - with c.requests(constants.getattr("INDIVIDUAL_TIMELINE").format(id))() as r: - if r.ok: - log.trace(f"post raw individual {r.json()}") - return r.json() - else: - log.debug(f"[bold]individual post response status code:[/bold]{r.status}") - log.debug(f"[bold]individual post response:[/bold] {r.text_()}") - log.debug(f"[bold]individual post headers:[/bold] {r.headers}") +def get_individual_post(id): + with sessionbuilder.sessionBuilder(backend="httpx") as c: + with c.requests(constants.getattr("INDIVIDUAL_TIMELINE").format(id))() as r: + if r.ok: + log.trace(f"post raw individual {r.json()}") + return r.json() + else: + log.debug( + f"[bold]individual post response status code:[/bold]{r.status}" + ) + log.debug(f"[bold]individual post response:[/bold] {r.text_()}") + log.debug(f"[bold]individual post headers:[/bold] {r.headers}") def get_after(model_id, username, forced_after=None): diff --git a/ofscraper/commands/check.py b/ofscraper/commands/check.py index 30adcc21a..7c1501ed1 100644 --- a/ofscraper/commands/check.py +++ b/ofscraper/commands/check.py @@ -30,6 +30,7 @@ import ofscraper.utils.settings as settings import ofscraper.utils.system.network as network from ofscraper.download.common.common import textDownloader +from ofscraper.utils.context.run_async import run log = logging.getLogger("shared") console = console_.get_shared_console() @@ -150,78 +151,83 @@ def checker(): def post_checker(): - user_dict = {} + ROWS = post_check_helper() + start_helper(ROWS) - links = list(url_helper()) - for ele in links: - name_match = re.search( - f"onlyfans.com/({constants.getattr('USERNAME_REGEX')}+$)", ele - ) - name_match2 = re.search(f"^{constants.getattr('USERNAME_REGEX')}+$", ele) - if name_match: - user_name = name_match.group(1) - log.info(f"Getting Full Timeline for {user_name}") - model_id = profile.get_id(user_name) - elif name_match2: - user_name = name_match2.group(0) - model_id = profile.get_id(user_name) - else: - continue - if user_dict.get(user_name): - continue - - oldtimeline = cache.get(f"timeline_check_{model_id}", default=[]) - user_dict[user_name] = {} - user_dict[user_name] = user_dict[user_name] or [] - if len(oldtimeline) > 0 and not read_args.retriveArgs().force: - user_dict[user_name].extend(oldtimeline) - else: - user_dict[user_name] = {} - user_dict[user_name] = user_dict[user_name] or [] - c = sessionbuilder.sessionBuilder(backend="httpx") - data = timeline.get_timeline_media(model_id, user_name, forced_after=0, c=c) - user_dict[user_name].extend(data) - cache.set( - f"timeline_check_{model_id}", - data, - expire=constants.getattr("DAY_SECONDS"), - ) - cache.close() - - # individual links - for ele in list( - filter( - lambda x: re.search( - f"onlyfans.com/{constants.getattr('NUMBER_REGEX')}+/{constants.getattr('USERNAME_REGEX')}+$", - x, - ), - links, +@run +async def post_check_helper(): + user_dict = {} + links = list(url_helper()) + async with sessionbuilder.sessionBuilder(backend="httpx") as c: + for ele in links: + name_match = re.search( + f"onlyfans.com/({constants.getattr('USERNAME_REGEX')}+$)", ele ) - ): - name_match = re.search(f"/({constants.getattr('USERNAME_REGEX')}+$)", ele) - num_match = re.search(f"/({constants.getattr('NUMBER_REGEX')}+)", ele) - if name_match and num_match: + name_match2 = re.search(f"^{constants.getattr('USERNAME_REGEX')}+$", ele) + + if name_match: user_name = name_match.group(1) - post_id = num_match.group(1) + log.info(f"Getting Full Timeline for {user_name}") + model_id = profile.get_id(user_name) + elif name_match2: + user_name = name_match2.group(0) model_id = profile.get_id(user_name) - log.info(f"Getting Invidiual Link for {user_name}") - if not user_dict.get(user_name): - user_dict[name_match.group(1)] = {} - c = sessionbuilder.sessionBuilder(backend="httpx") - data = timeline.get_individual_post(post_id, c) + else: + continue + if user_dict.get(user_name): + continue + + oldtimeline = cache.get(f"timeline_check_{model_id}", default=[]) + user_dict[user_name] = {} + user_dict[user_name] = user_dict[user_name] or [] + if len(oldtimeline) > 0 and not read_args.retriveArgs().force: + user_dict[user_name].extend(oldtimeline) + else: + user_dict[user_name] = {} user_dict[user_name] = user_dict[user_name] or [] - user_dict[user_name].append(data) + data = await timeline.get_timeline_media( + model_id, user_name, forced_after=0, c=c + ) + user_dict[user_name].extend(data) + cache.set( + f"timeline_check_{model_id}", + data, + expire=constants.getattr("DAY_SECONDS"), + ) + cache.close() + + # individual links + for ele in list( + filter( + lambda x: re.search( + f"onlyfans.com/{constants.getattr('NUMBER_REGEX')}+/{constants.getattr('USERNAME_REGEX')}+$", + x, + ), + links, + ) + ): + name_match = re.search( + f"/({constants.getattr('USERNAME_REGEX')}+$)", ele + ) + num_match = re.search(f"/({constants.getattr('NUMBER_REGEX')}+)", ele) + if name_match and num_match: + user_name = name_match.group(1) + post_id = num_match.group(1) + model_id = profile.get_id(user_name) + log.info(f"Getting Invidiual Link for {user_name}") + if not user_dict.get(user_name): + user_dict[name_match.group(1)] = {} + data = timeline.get_individual_post(post_id) + user_dict[user_name] = user_dict[user_name] or [] + user_dict[user_name].append(data) ROWS = [] for user_name in user_dict.keys(): - downloaded = get_downloaded(user_name, model_id, True) + downloaded = await get_downloaded(user_name, model_id, True) media = get_all_found_media(user_name, user_dict[user_name]) ROWS.extend(row_gather(media, downloaded, user_name)) - reset_url() - set_count(ROWS) - network.check_cdm() - thread_starters(ROWS) + return ROWS def reset_url(): @@ -242,121 +248,142 @@ def set_count(ROWS): ele[0] = count + 1 -def message_checker(): - links = list(url_helper()) - ROWS = [] - for item in links: - num_match = re.search( - f"({constants.getattr('NUMBER_REGEX')}+)", item - ) or re.search(f"^({constants.getattr('NUMBER_REGEX')}+)$", item) - name_match = re.search(f"^{constants.getattr('USERNAME_REGEX')}+$", item) - if num_match: - model_id = num_match.group(1) - user_name = profile.scrape_profile(model_id)["username"] - elif name_match: - user_name = name_match.group(0) - model_id = profile.get_id(user_name) - else: - continue - log.info(f"Getting Messages/Paid content for {user_name}") - # messages - messages = None - oldmessages = cache.get(f"message_check_{model_id}", default=[]) - log.debug(f"Number of messages in cache {len(oldmessages)}") - - if len(oldmessages) > 0 and not read_args.retriveArgs().force: - messages = oldmessages - else: - messages = messages_.get_messages(model_id, user_name, forced_after=0) - cache.set( - f"message_check_{model_id}", - messages, - expire=constants.getattr("DAY_SECONDS"), - ) - oldpaid = cache.get(f"purchased_check_{model_id}", default=[]) - paid = None - # paid content - if len(oldpaid) > 0 and not read_args.retriveArgs().force: - paid = oldpaid - else: - paid = paid_.get_paid_posts(model_id, user_name) - cache.set( - f"purchased_check_{model_id}", - paid, - expire=constants.getattr("DAY_SECONDS"), - ) - media = get_all_found_media(user_name, messages + paid) - unduped = [] - id_set = set() - for ele in media: - if ele.id == None or ele.id not in id_set: - unduped.append(ele) - id_set.add(ele.id) - downloaded = get_downloaded(user_name, model_id, True) - - ROWS.extend(row_gather(unduped, downloaded, user_name)) - +def start_helper(ROWS): reset_url() set_count(ROWS) network.check_cdm() thread_starters(ROWS) +def message_checker(): + ROWS = message_checker_helper() + start_helper(ROWS) + + +@run +async def message_checker_helper(): + links = list(url_helper()) + ROWS = [] + async with sessionbuilder.sessionBuilder(backend="httpx") as c: + for item in links: + num_match = re.search( + f"({constants.getattr('NUMBER_REGEX')}+)", item + ) or re.search(f"^({constants.getattr('NUMBER_REGEX')}+)$", item) + name_match = re.search(f"^{constants.getattr('USERNAME_REGEX')}+$", item) + if num_match: + model_id = num_match.group(1) + user_name = profile.scrape_profile(model_id)["username"] + elif name_match: + user_name = name_match.group(0) + model_id = profile.get_id(user_name) + else: + continue + log.info(f"Getting Messages/Paid content for {user_name}") + # messages + messages = None + oldmessages = cache.get(f"message_check_{model_id}", default=[]) + log.debug(f"Number of messages in cache {len(oldmessages)}") + + if len(oldmessages) > 0 and not read_args.retriveArgs().force: + messages = oldmessages + else: + messages = await messages_.get_messages( + model_id, user_name, forced_after=0 + ) + cache.set( + f"message_check_{model_id}", + messages, + expire=constants.getattr("DAY_SECONDS"), + ) + oldpaid = cache.get(f"purchased_check_{model_id}", default=[]) + paid = None + # paid content + if len(oldpaid) > 0 and not read_args.retriveArgs().force: + paid = oldpaid + else: + paid = await paid_.get_paid_posts(model_id, user_name, c=c) + cache.set( + f"purchased_check_{model_id}", + paid, + expire=constants.getattr("DAY_SECONDS"), + ) + media = get_all_found_media(user_name, messages + paid) + unduped = [] + id_set = set() + for ele in media: + if ele.id == None or ele.id not in id_set: + unduped.append(ele) + id_set.add(ele.id) + downloaded = await get_downloaded(user_name, model_id, True) + + ROWS.extend(row_gather(unduped, downloaded, user_name)) + return ROWS + + def purchase_checker(): + ROWS = purchase_checker_helper() + start_helper(ROWS) + + +@run +async def purchase_checker_helper(): user_dict = {} auth_requests.make_headers() ROWS = [] - for user_name in read_args.retriveArgs().username: - user_name = profile.scrape_profile(user_name)["username"] - user_dict[user_name] = user_dict.get(user_name, []) - model_id = profile.get_id(user_name) - oldpaid = cache.get(f"purchased_check_{model_id}", default=[]) - paid = None - - if len(oldpaid) > 0 and not read_args.retriveArgs().force: - paid = oldpaid - else: - paid = paid_.get_paid_posts(model_id, user_name) - cache.set( - f"purchased_check_{model_id}", - paid, - expire=constants.getattr("DAY_SECONDS"), - ) - downloaded = get_downloaded(user_name, model_id) - media = get_all_found_media(user_name, paid) - ROWS.extend(row_gather(media, downloaded, user_name)) - reset_url() - set_count(ROWS) - network.check_cdm() - thread_starters(ROWS) + async with sessionbuilder.sessionBuilder(backend="httpx") as c: + for user_name in read_args.retriveArgs().username: + user_name = profile.scrape_profile(user_name)["username"] + user_dict[user_name] = user_dict.get(user_name, []) + model_id = profile.get_id(user_name) + oldpaid = cache.get(f"purchased_check_{model_id}", default=[]) + paid = None + + if len(oldpaid) > 0 and not read_args.retriveArgs().force: + paid = oldpaid + else: + paid = await paid_.get_paid_posts(model_id, user_name, c=c) + cache.set( + f"purchased_check_{model_id}", + paid, + expire=constants.getattr("DAY_SECONDS"), + ) + downloaded = await get_downloaded(user_name, model_id) + media = get_all_found_media(user_name, paid) + ROWS.extend(row_gather(media, downloaded, user_name)) + return ROWS def stories_checker(): + ROWS = stories_checker_helper() + start_helper(ROWS) + + +@run +async def stories_checker_helper(): user_dict = {} ROWS = [] - for user_name in read_args.retriveArgs().username: - user_name = profile.scrape_profile(user_name)["username"] - user_dict[user_name] = user_dict.get(user_name, []) - model_id = profile.get_id(user_name) - stories = highlights.get_stories_post(model_id) - highlights_ = highlights.get_highlight_post(model_id) - highlights_ = list( - map( - lambda x: posts_.Post(x, model_id, user_name, "highlights"), highlights_ + async with sessionbuilder.sessionBuilder(backend="httpx") as c: + for user_name in read_args.retriveArgs().username: + user_name = profile.scrape_profile(user_name)["username"] + user_dict[user_name] = user_dict.get(user_name, []) + model_id = profile.get_id(user_name) + stories = await highlights.get_stories_post(model_id, c=c) + highlights_ = await highlights.get_highlight_post(model_id, c=c) + highlights_ = list( + map( + lambda x: posts_.Post(x, model_id, user_name, "highlights"), + highlights_, + ) + ) + stories = list( + map(lambda x: posts_.Post(x, model_id, user_name, "stories"), stories) ) - ) - stories = list( - map(lambda x: posts_.Post(x, model_id, user_name, "stories"), stories) - ) - downloaded = get_downloaded(user_name, model_id) - media = [] - [media.extend(ele.all_media) for ele in stories + highlights_] - ROWS.extend(row_gather(media, downloaded, user_name)) - reset_url() - set_count(ROWS) - network.check_cdm() - thread_starters(ROWS) + downloaded = await get_downloaded(user_name, model_id) + media = [] + [media.extend(ele.all_media) for ele in stories + highlights_] + ROWS.extend(row_gather(media, downloaded, user_name)) + return ROWS def url_helper(): @@ -374,11 +401,12 @@ def get_all_found_media(user_name, posts): return temp -def get_downloaded(user_name, model_id, paid=False): +@run +async def get_downloaded(user_name, model_id, paid=False): downloaded = {} operations.table_init_create(model_id=model_id, username=user_name) - paid = get_paid_ids(model_id, user_name) if paid else [] + paid = await get_paid_ids(model_id, user_name) if paid else [] [ downloaded.update({ele: downloaded.get(ele, 0) + 1}) for ele in operations.get_media_ids_downloaded( @@ -390,17 +418,21 @@ def get_downloaded(user_name, model_id, paid=False): return downloaded -def get_paid_ids(model_id, user_name): +@run +async def get_paid_ids(model_id, user_name): oldpaid = cache.get(f"purchased_check_{model_id}", default=[]) paid = None if len(oldpaid) > 0 and not read_args.retriveArgs().force: paid = oldpaid else: - paid = paid_.get_paid_posts(model_id, user_name) - cache.set( - f"purchased_check_{model_id}", paid, expire=constants.getattr("DAY_SECONDS") - ) + async with sessionbuilder.sessionBuilder(backend="httpx") as c: + paid = await paid_.get_paid_posts(model_id, user_name, c=c) + cache.set( + f"purchased_check_{model_id}", + paid, + expire=constants.getattr("DAY_SECONDS"), + ) media = get_all_found_media(user_name, paid) media = list(filter(lambda x: x.canview == True, media)) return list(map(lambda x: x.id, media)) diff --git a/ofscraper/commands/manual.py b/ofscraper/commands/manual.py index ddf04e0ae..24e2d7e9c 100644 --- a/ofscraper/commands/manual.py +++ b/ofscraper/commands/manual.py @@ -17,6 +17,7 @@ import ofscraper.utils.constants as constants import ofscraper.utils.system.network as network from ofscraper.download.common.common import textDownloader +from ofscraper.utils.context.run_async import run def manual_download(urls=None): @@ -59,49 +60,48 @@ def get_media_from_urls(urls): user_name_dict = {} media_dict = {} post_dict = {} - with sessionbuilder.sessionBuilder(backend="httpx") as c: - for url in url_helper(urls): - response = get_info(url) - model = response[0] - postid = response[1] - type = response[2] - if type == "post": - model_id = user_name_dict.get(model) or profile.get_id(model) - value = timeline.get_individual_post(postid, c=c) - media_dict.update(get_all_media(postid, model_id, value)) - post_dict.update(get_post_item(model_id, value)) - elif type == "msg": - model_id = model - value = messages_.get_individual_post(model_id, postid, c=c) - media_dict.update(get_all_media(postid, model_id, value)) - post_dict.update(get_post_item(model_id, value)) - elif type == "msg2": - model_id = user_name_dict.get(model) or profile.get_id(model) - value = messages_.get_individual_post(model_id, postid, c=c) - media_dict.update(get_all_media(postid, model_id, value)) - post_dict.update(get_post_item(model_id, value)) - elif type == "unknown": - value = unknown_type_helper(postid, c=c) or {} - model_id = value.get("author", {}).get("id") - media_dict.update(get_all_media(postid, model_id, value)) - post_dict.update(get_post_item(model_id, value)) - elif type == "highlights": - value = highlights_.get_individual_highlights(postid, c=c) or {} - model_id = value.get("userId") - media_dict.update(get_all_media(postid, model_id, value, "highlights")) - post_dict.update(get_post_item(model_id, value, "highlights")) - # special case - elif type == "stories": - value = highlights_.get_individual_stories(postid, c=c) or {} - model_id = value.get("userId") - media_dict.update(get_all_media(postid, model_id, value, "stories")) - post_dict.update(get_post_item(model_id, value, "stories")) - # special case + for url in url_helper(urls): + response = get_info(url) + model = response[0] + postid = response[1] + type = response[2] + if type == "post": + model_id = user_name_dict.get(model) or profile.get_id(model) + value = timeline.get_individual_post(postid) + media_dict.update(get_all_media(postid, model_id, value)) + post_dict.update(get_post_item(model_id, value)) + elif type == "msg": + model_id = model + value = messages_.get_individual_post(model_id, postid) + media_dict.update(get_all_media(postid, model_id, value)) + post_dict.update(get_post_item(model_id, value)) + elif type == "msg2": + model_id = user_name_dict.get(model) or profile.get_id(model) + value = messages_.get_individual_post(model_id, postid) + media_dict.update(get_all_media(postid, model_id, value)) + post_dict.update(get_post_item(model_id, value)) + elif type == "unknown": + value = unknown_type_helper(postid) or {} + model_id = value.get("author", {}).get("id") + media_dict.update(get_all_media(postid, model_id, value)) + post_dict.update(get_post_item(model_id, value)) + elif type == "highlights": + value = highlights_.get_individual_highlights(postid) or {} + model_id = value.get("userId") + media_dict.update(get_all_media(postid, model_id, value, "highlights")) + post_dict.update(get_post_item(model_id, value, "highlights")) + # special case + elif type == "stories": + value = highlights_.get_individual_stories(postid) or {} + model_id = value.get("userId") + media_dict.update(get_all_media(postid, model_id, value, "stories")) + post_dict.update(get_post_item(model_id, value, "stories")) + # special case return media_dict, post_dict -def unknown_type_helper(postid, c=None): - return timeline.get_individual_post(postid, c=c) +def unknown_type_helper(postid): + return timeline.get_individual_post(postid) def get_post_item(model_id, value, inputtype=None): @@ -134,24 +134,26 @@ def get_all_media(posts_id, model_id, value, inputtype=None): return media_dict -def paid_failback(post_id, model_id, username): +@run +async def paid_failback(post_id, model_id, username): logging.getLogger("shared").debug( "Using failback search because query return 0 media" ) post_id = str(post_id) - data = paid.get_paid_posts(id, username) or [] - posts = list( - map(lambda x: posts_.Post(x, model_id, username, responsetype="paid"), data) - ) - output = [] - [output.extend(post.media) for post in posts] - return list( - filter( - lambda x: isinstance(x, media_.Media) - and (str(x.id) == post_id or str(x.postid) == post_id), - output, + async with sessionbuilder.sessionBuilder(backend="httpx") as c: + data = await paid.get_paid_posts(id, username, c=c) or [] + posts = list( + map(lambda x: posts_.Post(x, model_id, username, responsetype="paid"), data) + ) + output = [] + [output.extend(post.media) for post in posts] + return list( + filter( + lambda x: isinstance(x, media_.Media) + and (str(x.id) == post_id or str(x.postid) == post_id), + output, + ) ) - ) def get_info(url):