Skip to content

Commit

Permalink
fix issue with downloader,check modes,manual and update package version
Browse files Browse the repository at this point in the history
  • Loading branch information
datawhores committed Mar 12, 2024
1 parent 9cb6474 commit f316155
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 148 deletions.
8 changes: 5 additions & 3 deletions ofscraper/commands/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import threading
import time
import traceback

import arrow

Expand Down Expand Up @@ -102,15 +103,15 @@ def process_download_cart():
list(media_dict.values())[0],
)
)
media = medialist[0] if len(medialist) > 0 else None
media = medialist[0]
model_id = media.post.model_id
username = media.post.username
args = read_args.retriveArgs()
args.username = set([username])
write_args.setArgs(args)
selector.all_subs_helper()
log.info(
f"Downloading individual media for {username} {media.filename}"
f"Downloading individual media ({media.filename}) to disk for {username}"
)
operations.table_init_create(model_id=model_id, username=username)
values = downloadnormal.process_dicts(
Expand All @@ -126,7 +127,8 @@ def process_download_cart():

except Exception as E:
app.update_downloadcart_cell(key, "[failed]")
log.debug(E)
log.traceback_(E)
log.traceback_(traceback.format_exc())
time.sleep(10)


Expand Down
35 changes: 25 additions & 10 deletions ofscraper/commands/manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ofscraper.api.paid as paid
import ofscraper.api.profile as profile
import ofscraper.api.timeline as timeline
import ofscraper.classes.media as media_
import ofscraper.classes.posts as posts_
import ofscraper.classes.sessionbuilder as sessionbuilder
import ofscraper.db.operations as operations
Expand Down Expand Up @@ -64,28 +65,28 @@ def get_media_from_urls(urls):
if type == "post":
model_id = user_name_dict.get(model) or profile.get_id(model)
value = timeline.get_individual_post(postid, c=c)
media_dict.update(get_all_media(model_id, value))
media_dict.update(get_all_media(postid, model_id, value))
elif type == "msg":
model_id = model
value = messages_.get_individual_post(model_id, postid, c=c)
media_dict.update(get_all_media(model_id, value))
media_dict.update(get_all_media(postid, model_id, value))
elif type == "msg2":
model_id = user_name_dict.get(model) or profile.get_id(model)
value = messages_.get_individual_post(model_id, postid, c=c)
media_dict.update(get_all_media(model_id, value))
media_dict.update(get_all_media(postid, model_id, value))
elif type == "unknown":
value = unknown_type_helper(postid, c) or {}
model_id = value.get("author", {}).get("id")
media_dict.update(get_all_media(model_id, value))
media_dict.update(get_all_media(postid, model_id, value))
elif type == "highlights":
value = highlights_.get_individual_highlights(postid, c) or {}
model_id = value.get("userId")
media_dict.update(get_all_media(model_id, value, "highlights"))
media_dict.update(get_all_media(postid, model_id, value, "highlights"))
# special case
elif type == "stories":
value = highlights_.get_individual_stories(postid, c) or {}
model_id = value.get("userId")
media_dict.update(get_all_media(model_id, value, "stories"))
media_dict.update(get_all_media(postid, model_id, value, "stories"))
# special case
return media_dict

Expand All @@ -94,7 +95,7 @@ def unknown_type_helper(postid, client):
return timeline.get_individual_post(postid, client)


def get_all_media(model_id, value, inputtype=None):
def get_all_media(posts_id, model_id, value, inputtype=None):
media_dict = {}
value = value or {}
media = []
Expand All @@ -103,17 +104,31 @@ def get_all_media(model_id, value, inputtype=None):
user_name = profile.scrape_profile(model_id)["username"]
post_item = posts_.Post(value, model_id, user_name, responsetype=inputtype)
media = post_item.media
media = list(
filter(
lambda x: isinstance(x, media_.Media)
and (str(x.id) == str(posts_id) or str(x.postid) == str(posts_id)),
media,
)
)
if len(media) == 0:
media.extend(paid_failback(model_id, user_name))
media.extend(paid_failback(posts_id, model_id, user_name))
media_dict[model_id] = media
return media_dict


def paid_failback(id, username):
def paid_failback(post_id, id, username):
logging.getLogger("shared").debug(
"Using failback search because query return 0 media"
)
return of.process_paid_post(id, username)
data = of.process_paid_post(id, username)
return list(
filter(
lambda x: isinstance(x, media_.Media)
and (str(x.id) == post_id or str(x.postid) == post_id),
data,
)
)


def get_info(url):
Expand Down
86 changes: 41 additions & 45 deletions ofscraper/download/alt_downloadbatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,53 +285,48 @@ async def send_req_inner(c, ele, item, placeholderObj):
}
base_url = re.sub("[0-9a-z]*\.mpd$", "", ele.mpd, re.IGNORECASE)
url = f"{base_url}{item['origname']}"
old_total = total
try:
await common.send_msg((None, 0, total)) if total else None
async with sem_wrapper(common_globals.req_sem):
async with c.requests(url=url, headers=headers, params=params)() as l:
if l.ok:
await asyncio.get_event_loop().run_in_executor(
common_globals.cache_thread,
partial(
cache.set,
f"{item['name']}_headers",
{
"content-length": l.headers.get("content-length"),
"content-type": l.headers.get("content-type"),
},
),
)
new_total = int(l.headers["content-length"])
await common.send_msg((None, 0, new_total)) if not total else None
temp_file_logger(placeholderObj, ele, common_globals.innerlog.get())
item["total"] = new_total
total = item["total"]
if await check_forced_skip(ele, total) == 0:
item["total"] = 0
return item
elif total == resume_size:
None
else:
await download_fileobject_writerr(total, l, ele, placeholderObj)

await common.send_msg((None, 0, total)) if total else None
async with sem_wrapper(common_globals.req_sem):
async with c.requests(url=url, headers=headers, params=params)() as l:
if l.ok:
await asyncio.get_event_loop().run_in_executor(
common_globals.cache_thread,
partial(
cache.set,
f"{item['name']}_headers",
{
"content-length": l.headers.get("content-length"),
"content-type": l.headers.get("content-type"),
},
),
)
new_total = int(l.headers["content-length"])
await common.send_msg((None, 0, new_total)) if not total else None
temp_file_logger(placeholderObj, ele, common_globals.innerlog.get())
item["total"] = new_total
total = item["total"]
if await check_forced_skip(ele, total) == 0:
item["total"] = 0
return item
elif total == resume_size:
None
else:
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download data finder status[/bold]: {l.status}"
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download data finder text [/bold]: {await l.text_()}"
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download data finder headers [/bold]: {l.headers}"
)
l.raise_for_status()
await download_fileobject_writerr(total, l, ele, placeholderObj)

await size_checker(placeholderObj.tempfilepath, ele, total)
return item
except Exception as E:
await common.send_msg((None, 0, -(new_total if not old_total else old_total)))
raise
else:
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download data finder status[/bold]: {l.status}"
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download data finder text [/bold]: {await l.text_()}"
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download data finder headers [/bold]: {l.headers}"
)
l.raise_for_status()

await size_checker(placeholderObj.tempfilepath, ele, total)
return item


async def download_fileobject_writerr(total, l, ele, placeholderObj):
Expand Down Expand Up @@ -376,6 +371,7 @@ async def download_fileobject_writerr(total, l, ele, placeholderObj):
(await asyncio.sleep(download_sleep)) if download_sleep else None
except Exception as E:
# reset download data
await common.send_msg((None, 0, -total))
raise E
finally:
# Close file if needed
Expand Down
94 changes: 44 additions & 50 deletions ofscraper/download/main_downloadbatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,59 +230,52 @@ async def send_req_inner(c, ele, tempholderObj, placeholderObj=None, total=None)
if resume_size == 0 or not total
else {"Range": f"bytes={resume_size}-{total}"}
)
old_total = total
try:
await common.send_msg((None, 0, total)) if total else None
async with sem_wrapper(common_globals.req_sem):
async with c.requests(url=ele.url, headers=headers)() as r:
if r.ok:
await asyncio.get_event_loop().run_in_executor(
common_globals.cache_thread,
partial(
cache.set,
f"{ele.id}_headers",
{
"content-length": r.headers.get("content-length"),
"content-type": r.headers.get("content-type"),
},
),
)
new_total = int(r.headers["content-length"])
await common.send_msg((None, 0, new_total)) if not total else None
total = new_total
content_type = r.headers.get("content-type").split("/")[-1]
content_type = get_unknown_content_type(ele)
if not placeholderObj:
placeholderObj = placeholder.Placeholders(ele, content_type)
await placeholderObj.init()
path_to_file_logger(
placeholderObj, ele, common_globals.innerlog.get()
)
if await check_forced_skip(ele, total) == 0:
total = 0
elif total == resume_size:
None
else:
await download_fileobject_writer(
r, ele, total, tempholderObj, placeholderObj
)
await common.send_msg((None, 0, total)) if total else None
async with sem_wrapper(common_globals.req_sem):
async with c.requests(url=ele.url, headers=headers)() as r:
if r.ok:
await asyncio.get_event_loop().run_in_executor(
common_globals.cache_thread,
partial(
cache.set,
f"{ele.id}_headers",
{
"content-length": r.headers.get("content-length"),
"content-type": r.headers.get("content-type"),
},
),
)
new_total = int(r.headers["content-length"])
await common.send_msg((None, 0, new_total)) if not total else None
total = new_total
content_type = r.headers.get("content-type").split("/")[-1]
content_type = get_unknown_content_type(ele)
if not placeholderObj:
placeholderObj = placeholder.Placeholders(ele, content_type)
await placeholderObj.init()
path_to_file_logger(placeholderObj, ele, common_globals.innerlog.get())
if await check_forced_skip(ele, total) == 0:
total = 0
elif total == resume_size:
None
else:
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download response status code [/bold]: {r.status}"
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download response text [/bold]: {await r.text_()}"
await download_fileobject_writer(
r, ele, total, tempholderObj, placeholderObj
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)}main download headers [/bold]: {r.headers}"
)
r.raise_for_status()
else:
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download response status code [/bold]: {r.status}"
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)} main download response text [/bold]: {await r.text_()}"
)
common_globals.innerlog.get().debug(
f"[bold] {get_medialog(ele)}main download headers [/bold]: {r.headers}"
)
r.raise_for_status()

await size_checker(tempholderObj.tempfilepath, ele, total)
return (total, tempholderObj.tempfilepath, placeholderObj)
except Exception as E:
await common.send_msg((None, 0, -(new_total if not old_total else old_total)))
raise E
await size_checker(tempholderObj.tempfilepath, ele, total)
return (total, tempholderObj.tempfilepath, placeholderObj)


async def download_fileobject_writer(r, ele, total, tempholderObj, placeholderObj):
Expand Down Expand Up @@ -331,6 +324,7 @@ async def download_fileobject_writer(r, ele, total, tempholderObj, placeholderOb
(await asyncio.sleep(download_sleep)) if download_sleep else None
except Exception as E:
# reset download data
await common.send_msg((None, 0, -total))
raise E
finally:
try:
Expand Down
Loading

0 comments on commit f316155

Please sign in to comment.