Skip to content

Commit

Permalink
fix log date format
Browse files Browse the repository at this point in the history
split frequent utils into multiple files
add runner for start
lots of refactoring
  • Loading branch information
datawhores committed Jan 21, 2024
1 parent 6dec3b3 commit c957303
Show file tree
Hide file tree
Showing 63 changed files with 628 additions and 481 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ share/python-wheels/
*.egg
MANIFEST


# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
Expand Down Expand Up @@ -147,3 +148,4 @@ core
doc*
out*
*.txt
*.log
4 changes: 2 additions & 2 deletions ofscraper/__main__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/root/OF-Scraper/.venv/bin/python
import multiprocessing

import ofscraper.start as start
import ofscraper.runner.load as load
import ofscraper.utils.system.system as system


def main():
if system.get_parent():
start.main()
load.main()


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions ofscraper/actions/like.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@

sem = semaphoreDelayed(1)
log = logging.getLogger("shared")
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.args.read as read_args


def get_posts(model_id, username):
args = global_args.getArgs()
args = read_args.retriveArgs()
pinned_posts = []
timeline_posts = []
archived_posts = []
Expand Down
21 changes: 13 additions & 8 deletions ofscraper/actions/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import ofscraper.classes.posts as posts_
import ofscraper.db.operations as operations
import ofscraper.filters.media.main as filters
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.args.areas as areas
import ofscraper.utils.args.read as read_args
import ofscraper.utils.cache as cache
import ofscraper.utils.context.stdout as stdout
import ofscraper.utils.system.free as free
Expand Down Expand Up @@ -69,8 +70,8 @@ def process_messages(model_id, username):
# Update after database
cache.set(
"{model_id}_scrape_messages",
global_args.getArgs().after is not None
and global_args.getArgs().after != 0,
read_args.retriveArgs().after is not None
and read_args.retriveArgs().after != 0,
)

return list(filter(lambda x: isinstance(x, media.Media), output))
Expand Down Expand Up @@ -218,7 +219,9 @@ def process_timeline_posts(model_id, username, individual=False):
username=username,
downloaded=False,
)
cache.set("{model_id}_scrape_timeline", global_args.getArgs().after is not None)
cache.set(
"{model_id}_scrape_timeline", read_args.retriveArgs().after is not None
)
return list(filter(lambda x: isinstance(x, media.Media), output))


Expand Down Expand Up @@ -264,7 +267,9 @@ def process_archived_posts(model_id, username):
username=username,
downloaded=False,
)
cache.set("{model_id}_scrape_archived", global_args.getArgs().after is not None)
cache.set(
"{model_id}_scrape_archived", read_args.retriveArgs().after is not None
)
return list(filter(lambda x: isinstance(x, media.Media), output))


Expand Down Expand Up @@ -406,10 +411,10 @@ def process_labels(model_id, username):

labels_ = (
labels_
if not global_args.getArgs().label
if not read_args.retriveArgs().label
else list(
filter(
lambda x: x.get("name").lower() in global_args.getArgs().label,
lambda x: x.get("name").lower() in read_args.retriveArgs().label,
labels_,
)
)
Expand Down Expand Up @@ -461,7 +466,7 @@ def process_areas(ele, model_id) -> list:
labels_dicts = []

username = ele.name
final_post_areas = set(args_.get_download_area())
final_post_areas = set(areas.get_download_area())
if "Profile" in final_post_areas:
profile_dicts = process_profile(username)
if "Pinned" in final_post_areas:
Expand Down
21 changes: 11 additions & 10 deletions ofscraper/api/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.db.operations as operations
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.args.read as read_args
import ofscraper.utils.cache as cache
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
Expand All @@ -52,7 +52,8 @@ async def scrape_archived_posts(
attempt.set(0)
sem = semaphoreDelayed(constants.getattr("AlT_SEM"))
if timestamp and (
float(timestamp) > (global_args.getArgs().before or arrow.now()).float_timestamp
float(timestamp)
> (read_args.retriveArgs().before or arrow.now()).float_timestamp
):
return []
if timestamp:
Expand Down Expand Up @@ -183,7 +184,7 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
page_count = 0
setCache = (
True
if (global_args.getArgs().after == 0 or not global_args.getArgs().after)
if (read_args.retriveArgs().after == 0 or not read_args.retriveArgs().after)
else False
)

Expand All @@ -195,7 +196,7 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
operations.get_archived_postinfo(
model_id=model_id, username=username
)
if not global_args.getArgs().no_cache
if not read_args.retriveArgs().no_cache
else []
)

Expand All @@ -212,7 +213,7 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
rescan = (
rescan
or cache.get("{model_id}_scrape_archived")
and not global_args.getArgs().after
and not read_args.retriveArgs().after
)
after = after = (
0 if rescan else forced_after or get_after(model_id, username)
Expand Down Expand Up @@ -245,8 +246,8 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
required_ids=set(
list(map(lambda x: x[0], splitArrays[0]))
),
timestamp=global_args.getArgs().after.float_timestamp
if global_args.getArgs().after
timestamp=read_args.retriveArgs().after.float_timestamp
if read_args.retriveArgs().after
else None,
)
)
Expand Down Expand Up @@ -325,7 +326,7 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)
)
)
log.debug(f"[bold]Archived Count without Dupes[/bold] {len(unduped)} found")
if setCache and not global_args.getArgs().after:
if setCache and not read_args.retriveArgs().after:
newCheck = {}
for post in cache.get(f"archived_check_{model_id}", []) + list(
unduped.values()
Expand All @@ -342,8 +343,8 @@ async def get_archived_media(model_id, username, forced_after=None, rescan=None)


def get_after(model_id, username):
if global_args.getArgs().after:
return global_args.getArgs().after.float_timestamp
if read_args.retriveArgs().after:
return read_args.retriveArgs().after.float_timestamp
curr = operations.get_archived_media(model_id=model_id, username=username)
if cache.get(f"{model_id}_scrape_archived"):
log.debug(
Expand Down
16 changes: 10 additions & 6 deletions ofscraper/api/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
)

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.args.read as read_args
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
Expand All @@ -42,13 +42,15 @@
async def get_otherlist():
out = []
if (
len(global_args.getArgs().user_list) >= 2
len(read_args.retriveArgs().user_list) >= 2
or constants.getattr("OFSCRAPER_RESERVED_LIST")
not in global_args.getArgs().user_list
not in read_args.retriveArgs().user_list
):
out.extend(await get_lists())
out = list(
filter(lambda x: x.get("name").lower() in global_args.getArgs().user_list, out)
filter(
lambda x: x.get("name").lower() in read_args.retriveArgs().user_list, out
)
)
log.debug(
f"User lists found on profile {list(map(lambda x:x.get('name').lower(),out))}"
Expand All @@ -59,10 +61,12 @@ async def get_otherlist():
@run
async def get_blacklist():
out = []
if len(global_args.getArgs().black_list) >= 1:
if len(read_args.retriveArgs().black_list) >= 1:
out.extend(await get_lists())
out = list(
filter(lambda x: x.get("name").lower() in global_args.getArgs().black_list, out)
filter(
lambda x: x.get("name").lower() in read_args.retriveArgs().black_list, out
)
)
log.debug(
f"Black lists found on profile {list(map(lambda x:x.get('name').lower(),out))}"
Expand Down
12 changes: 6 additions & 6 deletions ofscraper/api/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.db.operations as operations
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.args.read as read_args
import ofscraper.utils.cache as cache
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
Expand Down Expand Up @@ -73,7 +73,7 @@ async def get_messages(model_id, username, forced_after=None, rescan=None):
async with sessionbuilder.sessionBuilder() as c:
oldmessages = (
operations.get_messages_data(model_id=model_id, username=username)
if not global_args.getArgs().no_cache
if not read_args.retriveArgs().no_cache
else []
)
log.trace(
Expand All @@ -97,11 +97,11 @@ async def get_messages(model_id, username, forced_after=None, rescan=None):
{"date": arrow.now().float_timestamp, "id": None}
] + oldmessages

before = (global_args.getArgs().before or arrow.now()).float_timestamp
before = (read_args.retriveArgs().before or arrow.now()).float_timestamp
rescan = (
rescan
or cache.get("{model_id}_scrape_messages")
and not global_args.getArgs().after
and not read_args.retriveArgs().after
)
after = after = (
0 if rescan else forced_after or get_after(model_id, username)
Expand Down Expand Up @@ -441,8 +441,8 @@ def get_individual_post(model_id, postid, c=None):


def get_after(model_id, username):
if global_args.getArgs().after:
return global_args.getArgs().after.float_timestamp
if read_args.retriveArgs().after:
return read_args.retriveArgs().after.float_timestamp
if cache.get(f"{model_id}_scrape_messages"):
log.debug(
"Used after previously scraping entire timeline to make sure content is not missing"
Expand Down
9 changes: 5 additions & 4 deletions ofscraper/api/pinned.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
)

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.args.read as read_args
import ofscraper.utils.console as console
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
Expand All @@ -49,7 +49,8 @@ async def scrape_pinned_posts(c, model_id, progress, timestamp=None, count=0) ->
attempt.set(0)

if timestamp and (
float(timestamp) > (global_args.getArgs().before or arrow.now()).float_timestamp
float(timestamp)
> (read_args.retriveArgs().before or arrow.now()).float_timestamp
):
return []
url = constants.getattr("timelinePinnedEP").format(model_id, count)
Expand Down Expand Up @@ -172,8 +173,8 @@ async def get_pinned_post(model_id):
c,
model_id,
job_progress,
timestamp=global_args.getArgs().after.float_timestamp
if global_args.getArgs().after
timestamp=read_args.retriveArgs().after.float_timestamp
if read_args.retriveArgs().after
else None,
)
)
Expand Down
20 changes: 11 additions & 9 deletions ofscraper/api/subscriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)

import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.utils.args.globals as global_args
import ofscraper.utils.args.read as read_args
import ofscraper.utils.constants as constants
from ofscraper.classes.semaphoreDelayed import semaphoreDelayed
from ofscraper.utils.context.run_async import run
Expand Down Expand Up @@ -69,16 +69,17 @@ async def activeHelper(subscribe_count, c):
global new_tasks

if (
constants.getattr("OFSCRAPER_RESERVED_LIST") in global_args.getArgs().black_list
constants.getattr("OFSCRAPER_RESERVED_LIST")
in read_args.retriveArgs().black_list
or constants.getattr("OFSCRAPER_ACTIVE_LIST")
in global_args.getArgs().black_list
in read_args.retriveArgs().black_list
):
return []
if (
constants.getattr("OFSCRAPER_RESERVED_LIST")
not in global_args.getArgs().user_list
not in read_args.retriveArgs().user_list
and constants.getattr("OFSCRAPER_ACTIVE_LIST")
not in global_args.getArgs().user_list
not in read_args.retriveArgs().user_list
):
return []
funct = scrape_subscriptions_active
Expand Down Expand Up @@ -111,16 +112,17 @@ async def expiredHelper(subscribe_count, c):
global new_tasks

if (
constants.getattr("OFSCRAPER_RESERVED_LIST") in global_args.getArgs().black_list
constants.getattr("OFSCRAPER_RESERVED_LIST")
in read_args.retriveArgs().black_list
or constants.getattr("OFSCRAPER_EXPIRED_LIST")
in global_args.getArgs().black_list
in read_args.retriveArgs().black_list
):
return []
if (
constants.getattr("OFSCRAPER_RESERVED_LIST")
not in global_args.getArgs().user_list
not in read_args.retriveArgs().user_list
and constants.getattr("OFSCRAPER_EXPIRED_LIST")
not in global_args.getArgs().user_list
not in read_args.retriveArgs().user_list
):
return []
funct = scrape_subscriptions_disabled
Expand Down
Loading

0 comments on commit c957303

Please sign in to comment.