Skip to content

Commit

Permalink
split highlight function
Browse files Browse the repository at this point in the history
  • Loading branch information
datawhores committed Mar 21, 2024
1 parent 6315f4e commit e788d78
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
1 change: 1 addition & 0 deletions ofscraper/actions/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def normal_post_process():
model_id = ele.id
operations.table_init_create(model_id, ele.name)
combined_urls, posts = asyncio.run(OF.process_areas(ele, model_id))
return
download.download_process(
ele.name, model_id, combined_urls, posts=posts
)
Expand Down
27 changes: 23 additions & 4 deletions ofscraper/api/highlights.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,11 @@ async def scrape_stories(c, user_id, job_progress=None) -> list:

@run
async def get_highlight_post_progress(model_id, c=None):
highlightLists = await get_highlight_list_progress(model_id, c)
return await get_highlights_via_list_progress(highlightLists, c)


async def get_highlight_list_progress(model_id, c=None):
global sem
sem = semaphoreDelayed(1)

Expand Down Expand Up @@ -265,8 +270,14 @@ async def get_highlight_post_progress(model_id, c=None):
log.traceback_(traceback.format_exc())

tasks = new_tasks
tasks = []
overall_progress.remove_task(page_task)
return highlightLists


async def get_highlights_via_list_progress(highlightLists, c=None):
job_progress = progress_utils.highlights_progress
overall_progress = progress_utils.overall_progress
tasks = []
[
tasks.append(
asyncio.create_task(scrape_highlights(c, i, job_progress=job_progress))
Expand Down Expand Up @@ -303,9 +314,6 @@ async def get_highlight_post_progress(model_id, c=None):
log.traceback_(E)
log.traceback_(traceback.format_exc())
tasks = new_tasks
overall_progress.remove_task(page_task)
progress_utils.highlights_layout.visible = False

log.trace(
"highlight raw unduped {posts}".format(
posts="\n\n".join(
Expand All @@ -324,11 +332,18 @@ async def get_highlight_post_progress(model_id, c=None):
log.debug(
f"[bold]highlight Count with Dupes[/bold] {len(list(outdict.values()))} found"
)
overall_progress.remove_task(page_task)
progress_utils.highlights_layout.visible = False
return list(outdict.values())


@run
async def get_highlight_post(model_id, c=None):
highlightList = await get_highlight_list(model_id, c)
return await get_highlights_via_list(highlightList)


async def get_highlight_list(model_id, c=None):
global sem
sem = semaphoreDelayed(1)

Expand Down Expand Up @@ -363,6 +378,10 @@ async def get_highlight_post(model_id, c=None):
log.traceback_(traceback.format_exc())

tasks = new_tasks
return highlightLists


async def get_highlights_via_list(highlightLists, c):
tasks = []
[
tasks.append(asyncio.create_task(scrape_highlights(c, i, job_progress=None)))
Expand Down
2 changes: 1 addition & 1 deletion ofscraper/const/req.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@
LOGIN_NUM_TRIES = 3
MAX_REQUEST_WORKERS = 20
API_REQUEST_THREADONLY = ["Windows", "Linux", "Mac"]
API_MAX_AREAS = 3
API_MAX_AREAS = 2
API_TIMEOUT_PER_TASKS = 35

0 comments on commit e788d78

Please sign in to comment.