From f6fa34649d0a27976b17687a1205589216116f6c Mon Sep 17 00:00:00 2001 From: datawhores Date: Thu, 30 Nov 2023 12:31:47 -0600 Subject: [PATCH] add scrape more users --- ofscraper/api/subscriptions.py | 47 ++++++++++++++++++++++++++-------- wiki | 2 +- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/ofscraper/api/subscriptions.py b/ofscraper/api/subscriptions.py index 16acf04e1..61ca362d7 100644 --- a/ofscraper/api/subscriptions.py +++ b/ofscraper/api/subscriptions.py @@ -25,24 +25,51 @@ async def get_subscriptions(headers, subscribe_count): - offsets = range(0, subscribe_count, 10) - tasks = [scrape_subscriptions(headers, offset) for offset in offsets] - subscriptions = await asyncio.gather(*tasks) - return list(chain.from_iterable(subscriptions)) + global tasks + global new_tasks + tasks = [asyncio.create_task(scrape_subscriptions(headers, offset)) for offset in range(0, subscribe_count+1, 10)] + tasks.extend([asyncio.create_task(scrape_subscriptions(headers, subscribe_count+1,recurs=True))] ) + new_tasks=[] + out=[] + while tasks: + done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) + for result in done: + try: + result=await result + except Exception as E: + log.debug(E) + continue + out.extend(result) + tasks = list(pending) + tasks.extend(new_tasks) + new_tasks=[] + return out + + + @retry(stop=stop_after_attempt(NUM_TRIES),wait=wait_random(min=constants.OF_MIN, max=constants.OF_MAX),reraise=True) -async def scrape_subscriptions(headers, offset=0) -> list: +async def scrape_subscriptions(headers, offset=0,recurs=False) -> list: async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=constants.API_REEQUEST_TIMEOUT, connect=None,sock_connect=None, sock_read=None)) as c: url = subscriptionsEP.format(offset) headers=auth.make_headers(auth.read_auth()) headers=auth.create_sign(url, headers) - async with c.request("get",url,ssl=ssl.create_default_context(cafile=certifi.where()),cookies=auth.add_cookies_aio(),headers=headers) as r: - if r.ok: - subscriptions = await r.json() - log.debug(f"usernames offset {offset}: usernames retrived -> {list(map(lambda x:x.get('username'),subscriptions))}") - return subscriptions + try: + async with c.request("get",url,ssl=ssl.create_default_context(cafile=certifi.where()),cookies=auth.add_cookies_aio(),headers=headers) as r: + if r.ok: + data=await r.json() + if len(data)==0: + None + elif recurs: + new_tasks.append(asyncio.create_task(scrape_subscriptions(c,recurs=True,offset=offset+len(data)))) + log.debug(f"usernames offset {offset}: usernames retrived -> {list(map(lambda x:x.get('username'),data))}") + return data + r.raise_for_status() + except Exception as E: + log.debug(E) + raise E def parse_subscriptions(subscriptions: list) -> list: datenow=arrow.now() diff --git a/wiki b/wiki index 947488840..ea4011b23 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 9474888402f8771c55559e48fc9d15bc05f2020f +Subproject commit ea4011b23b55786a24fcf2dd6156edd32fb35946