From 7bf4e4b8df8f353addba2df4fd7f7de888f5434e Mon Sep 17 00:00:00 2001 From: datawhores Date: Wed, 24 Jul 2024 17:24:11 -0500 Subject: [PATCH] partial commit for hls --- ofscraper/classes/media.py | 12 ++++- ofscraper/download/alt_download.py | 73 ++++++++++++++++---------- ofscraper/download/utils/alt/params.py | 8 +++ 3 files changed, 62 insertions(+), 31 deletions(-) diff --git a/ofscraper/classes/media.py b/ofscraper/classes/media.py index b0fe628c3..f853c1121 100644 --- a/ofscraper/classes/media.py +++ b/ofscraper/classes/media.py @@ -35,6 +35,7 @@ def __init__(self, media, count, post): self._cached_parse_mpd = None self._mpd = None self._log = None + self._hls=None def __eq__(self, other): return self.postid == other.postid @@ -198,8 +199,8 @@ def mpd(self): @property def hls(self): - if self._mpd: - return self._mpd + if self._hls: + return self._hls elif self.protected is False: return None return ( @@ -277,6 +278,13 @@ def hls_signature(self): .get("hls", {}) .get("CloudFront-Signature") ) + @property + def hls_header(self): + return f"CloudFront-Policy={self.hls_policy}; CloudFront-Signature={self.hls_signature}; CloudFront-Key-Pair-Id={self.hls_keypair}" + + @property + def hls_base(self): + return re.sub(r"[a-z0-9]+.m3u8$","",self.hls) @property def mpdout(self): diff --git a/ofscraper/download/alt_download.py b/ofscraper/download/alt_download.py index bcf7e5dfc..5137f2362 100644 --- a/ofscraper/download/alt_download.py +++ b/ofscraper/download/alt_download.py @@ -34,7 +34,7 @@ media_item_keys_alt, media_item_post_process_alt, ) -from ofscraper.download.utils.alt.params import get_alt_params +from ofscraper.download.utils.alt.params import get_alt_params,get_alt_params_hls from ofscraper.download.utils.log import get_medialog from ofscraper.download.utils.check.forced import ( @@ -69,6 +69,7 @@ FORCED_NEW, SIGN, ) +import ofscraper.utils.auth.request as auth_requests async def alt_download(c, ele, username, model_id): common_globals.log.debug( @@ -88,7 +89,7 @@ async def alt_download(c, ele, username, model_id): path_to_file_logger(sharedPlaceholderObj, ele) audio = await alt_download_downloader(audio, c, ele) - video = await alt_download_downloader(video, c, ele) + # video = await alt_download_downloader(video, c, ele) post_result = await media_item_post_process_alt( audio, video, ele, username, model_id @@ -176,44 +177,58 @@ async def send_req_inner(c, ele, item, placeholderObj): resume_size = get_resume_size(placeholderObj, mediatype=ele.mediatype) headers = get_resume_header(resume_size, item["total"]) common_globals.log.debug(f"{get_medialog(ele)} resume header {headers}") - params = get_alt_params(ele) + # params = get_alt_params(ele) + params=get_alt_params_hls(ele) base_url = re.sub("[0-9a-z]*\.mpd$", "", ele.mpd, re.IGNORECASE) url = f"{base_url}{item['origname']}" - + url=ele.hls + headers={"Cookie":f"{ele.hls_header}{auth_requests.get_cookies_str()}"} common_globals.log.debug( f"{get_medialog(ele)} [attempt {alt_attempt_get(item).get()}/{get_download_retries()}] Downloading media with url {ele.mpd}" ) + import m3u8 async with c.requests_async( url=url, headers=headers, params=params, - action=[FORCED_NEW,SIGN] if constants.getattr("ALT_FORCE_KEY") else None + # action=[FORCED_NEW,SIGN] if constants.getattr("ALT_FORCE_KEY") else None ) as l: - item["total"] = int(l.headers.get("content-length")) - total = item["total"] - - data = { - "content-total": total, - "content-type": l.headers.get("content-type"), - } - - common_globals.log.debug(f"{get_medialog(ele)} data from request {data}") - common_globals.log.debug( - f"{get_medialog(ele)} total from request {format_size(data.get('content-total')) if data.get('content-total') else 'unknown'}" - ) - await total_change_helper(None, total) - await set_data(ele,item,data) - - temp_file_logger(placeholderObj, ele) - if await check_forced_skip(ele, total) == 0: - item["total"] = 0 - total = item["total"] - await total_change_helper(total, 0) - return item - elif total != resume_size: - await download_fileobject_writer(total, l, ele, placeholderObj,item) - + # item["total"] = int(l.headers.get("content-length")) + # total = item["total"] + data=m3u8.loads(await l.text_()) + + # data = { + # "content-total": total, + # "content-type": l.headers.get("content-type"), + # } + + # common_globals.log.debug(f"{get_medialog(ele)} data from request {data}") + # common_globals.log.debug( + # f"{get_medialog(ele)} total from request {format_size(data.get('content-total')) if data.get('content-total') else 'unknown'}" + # ) + # await total_change_helper(None, total) + # await set_data(ele,item,data) + + # temp_file_logger(placeholderObj, ele) + # if await check_forced_skip(ele, total) == 0: + # item["total"] = 0 + # total = item["total"] + # await total_change_helper(total, 0) + # return item + # elif total != resume_size: + # await download_fileobject_writer(total, l, ele, placeholderObj,item) + from urllib.parse import urljoin + async with c.requests_async( + url=urljoin(ele.hls_base,data.playlists[1].uri), + headers=headers, + params=params, + # action=[FORCED_NEW,SIGN] if constants.getattr("ALT_FORCE_KEY") else None + ) as k: + r=await k.text_() + b=await k.json_() + print("ddd") + await size_checker(placeholderObj.tempfilepath, ele, total) return item diff --git a/ofscraper/download/utils/alt/params.py b/ofscraper/download/utils/alt/params.py index 8a4a7f135..7fd9c9191 100644 --- a/ofscraper/download/utils/alt/params.py +++ b/ofscraper/download/utils/alt/params.py @@ -4,3 +4,11 @@ def get_alt_params(ele): "Key-Pair-Id": ele.keypair, "Signature": ele.signature, } + + +def get_alt_params_hls(ele): + return { + "Policy": ele.hls_policy, + "Key-Pair-Id": ele.hls_keypair, + "Signature": ele.hls_signature, + } \ No newline at end of file