Skip to content

Commit

Permalink
fix some issues with previous commits
Browse files Browse the repository at this point in the history
split up large utilities to reduce change of circular imports
  • Loading branch information
datawhores committed Jan 19, 2024
1 parent 9895613 commit 6dec3b3
Show file tree
Hide file tree
Showing 108 changed files with 2,022 additions and 1,909 deletions.
2 changes: 1 addition & 1 deletion ofscraper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import multiprocessing

import ofscraper.start as start
import ofscraper.utils.system as system
import ofscraper.utils.system.system as system


def main():
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions ofscraper/interaction/like.py → ofscraper/actions/like.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,17 @@
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.run_async import run
from ofscraper.utils.context.run_async import run

from ..api import timeline

sem = semaphoreDelayed(1)
log = logging.getLogger("shared")
import ofscraper.utils.args as args_
import ofscraper.utils.args.globals as global_args


def get_posts(model_id, username):
args = args_.getargs()
args = global_args.getArgs()
pinned_posts = []
timeline_posts = []
archived_posts = []
Expand Down
39 changes: 20 additions & 19 deletions ofscraper/utils/of.py → ofscraper/actions/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,15 @@
import ofscraper.classes.posts as posts_
import ofscraper.db.operations as operations
import ofscraper.filters.media.main as filters
import ofscraper.utils.args as args_
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.cache as cache
import ofscraper.utils.config as config_
import ofscraper.utils.stdout as stdout
import ofscraper.utils.system as system
import ofscraper.utils.context.stdout as stdout
import ofscraper.utils.system.free as free

log = logging.getLogger("shared")


@system.space_checker
@free.space_checker
def process_messages(model_id, username):
with stdout.lowstdout():
messages_ = messages.get_messages(
Expand Down Expand Up @@ -70,13 +69,14 @@ def process_messages(model_id, username):
# Update after database
cache.set(
"{model_id}_scrape_messages",
args_.getargs().after is not None and args_.getargs().after != 0,
global_args.getArgs().after is not None
and global_args.getArgs().after != 0,
)

return list(filter(lambda x: isinstance(x, media.Media), output))


@system.space_checker
@free.space_checker
def process_paid_post(model_id, username):
with stdout.lowstdout():
paid_content = paid.get_paid_posts(username, model_id)
Expand Down Expand Up @@ -107,7 +107,7 @@ def process_paid_post(model_id, username):
return list(filter(lambda x: isinstance(x, media.Media), output))


@system.space_checker
@free.space_checker
def process_stories(model_id, username):
with stdout.lowstdout():
stories = highlights.get_stories_post(model_id)
Expand Down Expand Up @@ -140,7 +140,7 @@ def process_stories(model_id, username):
return list(filter(lambda x: isinstance(x, media.Media), output))


@system.space_checker
@free.space_checker
def process_highlights(model_id, username):
with stdout.lowstdout():
highlights_ = highlights.get_highlight_post(model_id)
Expand Down Expand Up @@ -173,7 +173,7 @@ def process_highlights(model_id, username):
return list(filter(lambda x: isinstance(x, media.Media), output))


@system.space_checker
@free.space_checker
def process_timeline_posts(model_id, username, individual=False):
with stdout.lowstdout():
timeline_posts = (
Expand Down Expand Up @@ -218,11 +218,11 @@ def process_timeline_posts(model_id, username, individual=False):
username=username,
downloaded=False,
)
cache.set("{model_id}_scrape_timeline", args_.getargs().after is not None)
cache.set("{model_id}_scrape_timeline", global_args.getArgs().after is not None)
return list(filter(lambda x: isinstance(x, media.Media), output))


@system.space_checker
@free.space_checker
def process_archived_posts(model_id, username):
with stdout.lowstdout():
archived_posts = archive.get_archived_media(
Expand Down Expand Up @@ -264,11 +264,11 @@ def process_archived_posts(model_id, username):
username=username,
downloaded=False,
)
cache.set("{model_id}_scrape_archived", args_.getargs().after is not None)
cache.set("{model_id}_scrape_archived", global_args.getArgs().after is not None)
return list(filter(lambda x: isinstance(x, media.Media), output))


@system.space_checker
@free.space_checker
def process_pinned_posts(model_id, username):
with stdout.lowstdout():
pinned_posts = pinned.get_pinned_post(model_id)
Expand Down Expand Up @@ -307,7 +307,7 @@ def process_pinned_posts(model_id, username):
return list(filter(lambda x: isinstance(x, media.Media), output))


@system.space_checker
@free.space_checker
def process_profile(username) -> list:
with stdout.lowstdout():
user_profile = profile.scrape_profile(username)
Expand All @@ -329,7 +329,7 @@ def process_profile(username) -> list:
return output


@system.space_checker
@free.space_checker
def process_all_paid():
with stdout.lowstdout():
paid_content = paid.get_all_paid_posts()
Expand Down Expand Up @@ -399,17 +399,18 @@ def process_all_paid():
return output


@system.space_checker
@free.space_checker
def process_labels(model_id, username):
with stdout.lowstdout():
labels_ = labels_api.get_labels(model_id)

labels_ = (
labels_
if not args_.getargs().label
if not global_args.getArgs().label
else list(
filter(
lambda x: x.get("name").lower() in args_.getargs().label, labels_
lambda x: x.get("name").lower() in global_args.getArgs().label,
labels_,
)
)
)
Expand Down
24 changes: 13 additions & 11 deletions ofscraper/api/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.db.operations as operations
import ofscraper.utils.args as args_
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.cache as cache
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.run_async import run
from ofscraper.utils.context.run_async import run

log = logging.getLogger("shared")
attempt = contextvars.ContextVar("attempt")
Expand All @@ -52,7 +52,7 @@ async def scrape_archived_posts(
attempt.set(0)
sem = semaphoreDelayed(constants.getattr("AlT_SEM"))
if timestamp and (
float(timestamp) > (args_.getargs().before or arrow.now()).float_timestamp
float(timestamp) > (global_args.getArgs().before or arrow.now()).float_timestamp
):
return []
if timestamp:
Expand Down Expand Up @@ -182,7 +182,9 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
responseArray = []
page_count = 0
setCache = (
True if (args_.getargs().after == 0 or not args_.getargs().after) else False
True
if (global_args.getArgs().after == 0 or not global_args.getArgs().after)
else False
)

with Live(
Expand All @@ -193,7 +195,7 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
operations.get_archived_postinfo(
model_id=model_id, username=username
)
if not args_.getargs().no_cache
if not global_args.getArgs().no_cache
else []
)

Expand All @@ -210,7 +212,7 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
rescan = (
rescan
or cache.get("{model_id}_scrape_archived")
and not args_.getargs().after
and not global_args.getArgs().after
)
after = after = (
0 if rescan else forced_after or get_after(model_id, username)
Expand Down Expand Up @@ -243,8 +245,8 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
required_ids=set(
list(map(lambda x: x[0], splitArrays[0]))
),
timestamp=args_.getargs().after.float_timestamp
if args_.getargs().after
timestamp=global_args.getArgs().after.float_timestamp
if global_args.getArgs().after
else None,
)
)
Expand Down Expand Up @@ -323,7 +325,7 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
)
)
log.debug(f"[bold]Archived Count without Dupes[/bold] {len(unduped)} found")
if setCache and not args_.getargs().after:
if setCache and not global_args.getArgs().after:
newCheck = {}
for post in cache.get(f"archived_check_{model_id}", []) + list(
unduped.values()
Expand All @@ -340,8 +342,8 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)


def get_after(model_id, username):
if args_.getargs().after:
return args_.getargs().after.float_timestamp
if global_args.getArgs().after:
return global_args.getArgs().after.float_timestamp
curr = operations.get_archived_media(model_id=model_id, username=username)
if cache.get(f"{model_id}_scrape_archived"):
log.debug(
Expand Down
2 changes: 1 addition & 1 deletion ofscraper/api/highlights.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.run_async import run
from ofscraper.utils.context.run_async import run

log = logging.getLogger("shared")
sem = None
Expand Down
2 changes: 1 addition & 1 deletion ofscraper/api/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from rich.console import Console

import ofscraper.utils.stdout as stdout
import ofscraper.utils.context.stdout as stdout

from . import me

Expand Down
2 changes: 1 addition & 1 deletion ofscraper/api/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.run_async import run
from ofscraper.utils.context.run_async import run

log = logging.getLogger("shared")
attempt = contextvars.ContextVar("attempt")
Expand Down
15 changes: 8 additions & 7 deletions ofscraper/api/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
)

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.utils.args as args_
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.run_async import run
from ofscraper.utils.context.run_async import run

log = logging.getLogger("shared")
attempt = contextvars.ContextVar("attempt")
Expand All @@ -42,12 +42,13 @@
async def get_otherlist():
out = []
if (
len(args_.getargs().user_list) >= 2
or constants.getattr("OFSCRAPER_RESERVED_LIST") not in args_.getargs().user_list
len(global_args.getArgs().user_list) >= 2
or constants.getattr("OFSCRAPER_RESERVED_LIST")
not in global_args.getArgs().user_list
):
out.extend(await get_lists())
out = list(
filter(lambda x: x.get("name").lower() in args_.getargs().user_list, out)
filter(lambda x: x.get("name").lower() in global_args.getArgs().user_list, out)
)
log.debug(
f"User lists found on profile {list(map(lambda x:x.get('name').lower(),out))}"
Expand All @@ -58,10 +59,10 @@ async def get_otherlist():
@run
async def get_blacklist():
out = []
if len(args_.getargs().black_list) >= 1:
if len(global_args.getArgs().black_list) >= 1:
out.extend(await get_lists())
out = list(
filter(lambda x: x.get("name").lower() in args_.getargs().black_list, out)
filter(lambda x: x.get("name").lower() in global_args.getArgs().black_list, out)
)
log.debug(
f"Black lists found on profile {list(map(lambda x:x.get('name').lower(),out))}"
Expand Down
2 changes: 1 addition & 1 deletion ofscraper/api/me.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.utils.constants as constants
import ofscraper.utils.context.stdout as stdout
import ofscraper.utils.encoding as encoding
import ofscraper.utils.logger as logger
import ofscraper.utils.stdout as stdout

log = logging.getLogger("shared")

Expand Down
14 changes: 7 additions & 7 deletions ofscraper/api/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.db.operations as operations
import ofscraper.utils.args as args_
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.cache as cache
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.run_async import run
from ofscraper.utils.context.run_async import run

log = logging.getLogger("shared")
attempt = contextvars.ContextVar("attempt")
Expand Down Expand Up @@ -73,7 +73,7 @@ async def get_messages(model_id, username, forced_after=None, rescan=None):
async with sessionbuilder.sessionBuilder() as c:
oldmessages = (
operations.get_messages_data(model_id=model_id, username=username)
if not args_.getargs().no_cache
if not global_args.getArgs().no_cache
else []
)
log.trace(
Expand All @@ -97,11 +97,11 @@ async def get_messages(model_id, username, forced_after=None, rescan=None):
{"date": arrow.now().float_timestamp, "id": None}
] + oldmessages

before = (args_.getargs().before or arrow.now()).float_timestamp
before = (global_args.getArgs().before or arrow.now()).float_timestamp
rescan = (
rescan
or cache.get("{model_id}_scrape_messages")
and not args_.getargs().after
and not global_args.getArgs().after
)
after = after = (
0 if rescan else forced_after or get_after(model_id, username)
Expand Down Expand Up @@ -441,8 +441,8 @@ def get_individual_post(model_id, postid, c=None):


def get_after(model_id, username):
if args_.getargs().after:
return args_.getargs().after.float_timestamp
if global_args.getArgs().after:
return global_args.getArgs().after.float_timestamp
if cache.get(f"{model_id}_scrape_messages"):
log.debug(
"Used after previously scraping entire timeline to make sure content is not missing"
Expand Down
2 changes: 1 addition & 1 deletion ofscraper/api/paid.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.run_async import run
from ofscraper.utils.context.run_async import run

paid_content_list_name = "list"
log = logging.getLogger("shared")
Expand Down
Loading

0 comments on commit 6dec3b3

Please sign in to comment.