From bd16f80f6a399f3cf4e80ee5b86a47607e1d85c6 Mon Sep 17 00:00:00 2001 From: datawhores Date: Mon, 11 Mar 2024 11:36:41 -0500 Subject: [PATCH] make some changes to paid failback use --- ofscraper/commands/manual.py | 40 +++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/ofscraper/commands/manual.py b/ofscraper/commands/manual.py index c5b8974f1..59950e9bf 100644 --- a/ofscraper/commands/manual.py +++ b/ofscraper/commands/manual.py @@ -6,6 +6,7 @@ import ofscraper.api.paid as paid import ofscraper.api.profile as profile import ofscraper.api.timeline as timeline +import ofscraper.classes.media as media_ import ofscraper.classes.posts as posts_ import ofscraper.classes.sessionbuilder as sessionbuilder import ofscraper.db.operations as operations @@ -68,21 +69,21 @@ def get_media_from_urls(urls): value = timeline.get_individual_post( postid, c=sessionbuilder.sessionBuilder(backend="httpx") ) - media_dict.update(get_all_media(model_id, value)) + media_dict.update(get_all_media(postid, model_id, value)) post_dict.update(get_post_item(model_id, value)) elif type == "msg": model_id = model value = messages_.get_individual_post( model_id, postid, c=sessionbuilder.sessionBuilder(backend="httpx") ) - media_dict.update(get_all_media(model_id, value)) + media_dict.update(get_all_media(postid, model_id, value)) post_dict.update(get_post_item(model_id, value)) elif type == "msg2": model_id = user_name_dict.get(model) or profile.get_id(model) value = messages_.get_individual_post( model_id, postid, c=sessionbuilder.sessionBuilder(backend="httpx") ) - media_dict.update(get_all_media(model_id, value)) + media_dict.update(get_all_media(postid, model_id, value)) post_dict.update(get_post_item(model_id, value)) elif type == "unknown": value = ( @@ -92,7 +93,7 @@ def get_media_from_urls(urls): or {} ) model_id = value.get("author", {}).get("id") - media_dict.update(get_all_media(model_id, value)) + media_dict.update(get_all_media(postid, model_id, value)) post_dict.update(get_post_item(model_id, value)) elif type == "highlights": value = ( @@ -102,7 +103,7 @@ def get_media_from_urls(urls): or {} ) model_id = value.get("userId") - media_dict.update(get_all_media(model_id, value, "highlights")) + media_dict.update(get_all_media(postid, model_id, value, "highlights")) post_dict.update(get_post_item(model_id, value, "highlights")) # special case elif type == "stories": @@ -113,7 +114,7 @@ def get_media_from_urls(urls): or {} ) model_id = value.get("userId") - media_dict.update(get_all_media(model_id, value, "stories")) + media_dict.update(get_all_media(postid, model_id, value, "stories")) post_dict.update(get_post_item(model_id, value, "stories")) # special case return media_dict, post_dict @@ -131,7 +132,7 @@ def get_post_item(model_id, value, inputtype=None): return {post.id: post} -def get_all_media(model_id, value, inputtype=None): +def get_all_media(posts_id, model_id, value, inputtype=None): media_dict = {} value = value or {} media = [] @@ -140,18 +141,37 @@ def get_all_media(model_id, value, inputtype=None): user_name = profile.scrape_profile(model_id)["username"] post_item = posts_.Post(value, model_id, user_name, responsetype=inputtype) media = post_item.media + media = list( + filter( + lambda x: isinstance(x, media_.Media) + and (str(x.id) == str(posts_id) or str(x.postid) == str(posts_id)), + media, + ) + ) if len(media) == 0: - media.extend(paid_failback(model_id, user_name)) + media.extend(paid_failback(posts_id, model_id, user_name)) media_dict[model_id] = media return media_dict -def paid_failback(id, username): +def paid_failback(post_id, model_id, username): logging.getLogger("shared").debug( "Using failback search because query return 0 media" ) + post_id = str(post_id) data = paid.get_paid_posts(id, username) or [] - return list(map(lambda x: posts_.Post(x, id, username, responsetype="paid"), data)) + posts = list( + map(lambda x: posts_.Post(x, model_id, username, responsetype="paid"), data) + ) + output = [] + [output.extend(post.media) for post in posts] + return list( + filter( + lambda x: isinstance(x, media_.Media) + and (str(x.id) == post_id or str(x.postid) == post_id), + output, + ) + ) def get_info(url):