Skip to content

Commit

Permalink
fix issue with progress bar and add consts for label in all
Browse files Browse the repository at this point in the history
  • Loading branch information
datawhores committed Mar 22, 2024
1 parent 752f3e9 commit 45c68de
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 38 deletions.
1 change: 1 addition & 0 deletions ofscraper/actions/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def normal_post_process():
model_id = ele.id
operations.table_init_create(model_id, ele.name)
combined_urls, posts = asyncio.run(OF.process_areas(ele, model_id))
return
download.download_process(
ele.name, model_id, combined_urls, posts=posts
)
Expand Down
8 changes: 5 additions & 3 deletions ofscraper/api/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ async def scrape_archived_posts(
scrape_archived_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
timestamp=posts[-1]["postedAtPrecise"],
)
)
Expand All @@ -341,7 +341,7 @@ async def scrape_archived_posts(
scrape_archived_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
timestamp=posts[-1]["postedAtPrecise"],
required_ids=required_ids,
)
Expand All @@ -361,5 +361,7 @@ async def scrape_archived_posts(

finally:
sem.release()
job_progress.remove_task(task) if job_progress and task else None
job_progress.remove_task(
task
) if job_progress and task != None else None
return posts, new_tasks
17 changes: 13 additions & 4 deletions ofscraper/api/highlights.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,9 @@ async def scrape_stories(c, user_id, job_progress=None) -> list:

finally:
sem.release()
job_progress.remove_task(task) if job_progress and task else None
job_progress.remove_task(
task
) if job_progress and task != None else None

return stories, new_tasks

Expand Down Expand Up @@ -382,9 +384,12 @@ async def get_highlight_list(model_id, c=None):


async def get_highlights_via_list(highlightLists, c):
job_progress = None
tasks = []
[
tasks.append(asyncio.create_task(scrape_highlights(c, i, job_progress=None)))
tasks.append(
asyncio.create_task(scrape_highlights(c, i, job_progress=job_progress))
)
for i in highlightLists
]

Expand Down Expand Up @@ -487,7 +492,9 @@ async def scrape_highlight_list(c, user_id, job_progress=None, offset=0) -> list

finally:
sem.release()
job_progress.remove_task(task) if job_progress and task else None
job_progress.remove_task(
task
) if job_progress and task != None else None

return data, new_tasks

Expand Down Expand Up @@ -540,7 +547,9 @@ async def scrape_highlights(c, id, job_progress=None) -> list:

finally:
sem.release()
job_progress.remove_task(task) if job_progress and task else None
job_progress.remove_task(
task
) if job_progress and task != None else None

return resp_data["stories"], new_tasks

Expand Down
8 changes: 6 additions & 2 deletions ofscraper/api/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ async def scrape_labels(c, model_id, job_progress=None, offset=0):

finally:
sem.release()
job_progress.remove_task(task) if job_progress and task else None
job_progress.remove_task(
task
) if job_progress and task != None else None


@run
Expand Down Expand Up @@ -344,7 +346,9 @@ async def scrape_labelled_posts(c, label, model_id, job_progress=None, offset=0)

finally:
sem.release()
job_progress.remove_task(task) if job_progress and task else None
job_progress.remove_task(
task
) if job_progress and task != None else None

return label, posts, new_tasks

Expand Down
18 changes: 9 additions & 9 deletions ofscraper/api/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None):
if len(IDArray) <= 2:
tasks.append(
asyncio.create_task(
scrape_messages(c, model_id, job_progress, message_id=None)
scrape_messages(c, model_id, progress=job_progress, message_id=None)
)
)

Expand All @@ -156,7 +156,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None):
scrape_messages(
c,
model_id,
job_progress,
progress=job_progress,
message_id=None,
required_ids=set(splitArraysTime[0]),
)
Expand All @@ -168,7 +168,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None):
scrape_messages(
c,
model_id,
job_progress,
progress=job_progress,
message_id=splitArraysID[0][0],
required_ids=set(splitArraysTime[0]),
)
Expand All @@ -181,7 +181,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None):
scrape_messages(
c,
model_id,
job_progress,
progress=job_progress,
required_ids=set(splitArraysTime[i]),
message_id=splitArraysID[i - 1][-1],
)
Expand All @@ -195,7 +195,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None):
scrape_messages(
c,
model_id,
job_progress,
progress=job_progress,
message_id=splitArraysID[-2][-1],
)
)
Expand All @@ -206,7 +206,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None):
scrape_messages(
c,
model_id,
job_progress,
progress=job_progress,
message_id=splitArraysID[-1][-1],
)
)
Expand All @@ -217,7 +217,7 @@ async def get_messages_progress(model_id, username, forced_after=None, c=None):
scrape_messages(
c,
model_id,
job_progress,
progress=job_progress,
message_id=IDArray[0],
required_ids=set(postedAtArray[1:]),
)
Expand Down Expand Up @@ -282,7 +282,7 @@ async def get_messages(model_id, username, forced_after=None, c=None):
global sem
sem = sems.get_req_sem()
global after

job_progress = None
tasks = []
responseArray = []
# require a min num of posts to be returned
Expand Down Expand Up @@ -371,7 +371,7 @@ async def get_messages(model_id, username, forced_after=None, c=None):
if len(IDArray) <= 2:
tasks.append(
asyncio.create_task(
scrape_messages(c, model_id, job_progress=None, message_id=None)
scrape_messages(c, model_id, progress=job_progress, message_id=None)
)
)

Expand Down
5 changes: 4 additions & 1 deletion ofscraper/api/paid.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,14 @@ async def get_paid_posts(model_id, username, c=None):
sem = sems.get_req_sem()
responseArray = []
tasks = []
job_progress = None

# async with c or sessionbuilder.sessionBuilder(
# limit=constants.getattr("API_MAX_CONNECTION")
# ) as c:
tasks.append(asyncio.create_task(scrape_paid(c, username, job_progress=None)))
tasks.append(
asyncio.create_task(scrape_paid(c, username, job_progress=job_progress))
)
while bool(tasks):
new_tasks = []
try:
Expand Down
6 changes: 4 additions & 2 deletions ofscraper/api/pinned.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ async def get_pinned_post(model_id, c=None):
scrape_pinned_posts(
c,
model_id,
job_progress,
job_progress=job_progress,
timestamp=read_args.retriveArgs().after.float_timestamp
if read_args.retriveArgs().after
else None,
Expand Down Expand Up @@ -102,7 +102,9 @@ async def get_pinned_post(model_id, c=None):
return list(outdict.values())


async def scrape_pinned_posts(c, model_id, progress, timestamp=None, count=0) -> list:
async def scrape_pinned_posts(
c, model_id, job_progress=None, timestamp=None, count=0
) -> list:
global sem
sem = semaphoreDelayed(constants.getattr("AlT_SEM"))
posts = None
Expand Down
31 changes: 17 additions & 14 deletions ofscraper/api/timeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,9 @@ async def scrape_timeline_posts(

finally:
sem.release()
job_progress.remove_task(task) if job_progress and task else None

job_progress.remove_task(
task
) if job_progress and task != None else None
return posts, new_tasks


Expand All @@ -172,7 +173,6 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N
min_posts = 50
responseArray = []
page_count = 0
counter = None
if not read_args.retriveArgs().no_cache:
oldtimeline = operations.get_timeline_postdates(
model_id=model_id, username=username
Expand All @@ -199,6 +199,7 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N
"""
)
filteredArray = list(filter(lambda x: x >= after, postedAtArray))
filteredArray[1:]

job_progress = progress_utils.timeline_progress
overall_progress = progress_utils.overall_progress
Expand Down Expand Up @@ -241,7 +242,7 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N
scrape_timeline_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
timestamp=splitArrays[-2][-1],
)
)
Expand All @@ -252,7 +253,7 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N
scrape_timeline_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
timestamp=splitArrays[-1][-1],
)
)
Expand All @@ -261,7 +262,9 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N
else:
tasks.append(
asyncio.create_task(
scrape_timeline_posts(c, model_id, job_progress=None, timestamp=after)
scrape_timeline_posts(
c, model_id, job_progress=job_progress, timestamp=after
)
)
)
page_task = overall_progress.add_task(
Expand Down Expand Up @@ -319,14 +322,12 @@ async def get_timeline_media_progress(model_id, username, forced_after=None, c=N
@run
async def get_timeline_media(model_id, username, forced_after=None, c=None):
global sem
global data_queue
global counter
job_progress = None

sem = sems.get_req_sem()
tasks = []
min_posts = 50
responseArray = []
data_queue = queue.Queue(maxsize=0)
if not read_args.retriveArgs().no_cache:
oldtimeline = operations.get_timeline_postdates(
model_id=model_id, username=username
Expand Down Expand Up @@ -369,7 +370,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None):
scrape_timeline_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
required_ids=set(splitArrays[0]),
timestamp=after,
)
Expand All @@ -381,7 +382,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None):
scrape_timeline_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
required_ids=set(splitArrays[i]),
timestamp=splitArrays[i - 1][-1],
)
Expand All @@ -395,7 +396,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None):
scrape_timeline_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
timestamp=splitArrays[-2][-1],
)
)
Expand All @@ -406,7 +407,7 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None):
scrape_timeline_posts(
c,
model_id,
job_progress=None,
job_progress=job_progress,
timestamp=splitArrays[-1][-1],
)
)
Expand All @@ -415,7 +416,9 @@ async def get_timeline_media(model_id, username, forced_after=None, c=None):
else:
tasks.append(
asyncio.create_task(
scrape_timeline_posts(c, model_id, job_progress=None, timestamp=after)
scrape_timeline_posts(
c, model_id, job_progress=job_progress, timestamp=after
)
)
)

Expand Down
1 change: 1 addition & 0 deletions ofscraper/const/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@
HASHED_DEFAULT = False
EMPTY_MEDIA_DEFAULT = {}
DEFAULT_LOG_LEVEL = "DEBUG"
INCLUDED_LABELS_ALL = False
11 changes: 9 additions & 2 deletions ofscraper/utils/args/areas.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import ofscraper.utils.args.read as read_args
import ofscraper.utils.constants as const


def get_like_area():
Expand All @@ -7,14 +8,19 @@ def get_like_area():
"Archived",
"Timeline",
"Pinned",
"Labels",
]
all_choices.append("Label") if const.getattr("INCLUDED_LABELS_ALL") else None
if len(read_args.retriveArgs().like_area) == 0:
post = set(read_args.retriveArgs().posts)
else:
post = set(read_args.retriveArgs().like_area)
if "All" in post:
post.update(set(all_choices))
elif "Labels*" or "Labels+" in post:
post.update(set(all_choices))
post.update({"Labels"})
post.discard("Labels*")
post.discard("Laabels+")
return list(
filter(
lambda x: x != "All"
Expand All @@ -38,13 +44,14 @@ def get_download_area():
"Purchased",
"Profile",
]
all_choices.append("Label") if const.getattr("INCLUDED_LABELS_ALL") else None
if len(read_args.retriveArgs().download_area) == 0:
post = set(read_args.retriveArgs().posts)
else:
post = set(read_args.retriveArgs().download_area)
if "All" in post:
post.update(set(all_choices))
elif "Labels*" or "Labels+":
elif "Labels*" or "Labels+" in post:
post.update(set(all_choices))
post.update({"Labels"})
post.discard("Labels*")
Expand Down
2 changes: 1 addition & 1 deletion ofscraper/utils/args/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def like_helper(x):
choices = set(["All", "Archived", "Timeline", "Pinned", "Labels"])
if isinstance(x, str):
words = re.split(",| ", x)
words = list(map(lambda x: re.sub("[^a-zA-Z-]", "", str.title(x)), words))
words = list(map(lambda x: re.sub("[^a-zA-Z-\*\+]", "", str.title(x)), words))
if (
len(list(filter(lambda y: y not in choices and y[1:] not in choices, words)))
> 0
Expand Down

0 comments on commit 45c68de

Please sign in to comment.