From e793d8407ed029c51048d917d2abfba18f1da4f5 Mon Sep 17 00:00:00 2001 From: Igor Popov Date: Sun, 18 Dec 2022 19:07:46 +0300 Subject: [PATCH] Fix empty vid from new page update --- bot/api/tiktok.py | 40 ++++++++++++++++++++++++++-------------- bot/exception.py | 6 ------ bot/handlers/messages.py | 5 ----- 3 files changed, 26 insertions(+), 25 deletions(-) delete mode 100644 bot/exception.py diff --git a/bot/api/tiktok.py b/bot/api/tiktok.py index 820a23f..7a8a9bd 100644 --- a/bot/api/tiktok.py +++ b/bot/api/tiktok.py @@ -1,7 +1,7 @@ import asyncio +import json import logging import random -import re import string from datetime import datetime from functools import wraps @@ -13,6 +13,10 @@ from bs4 import BeautifulSoup +class Retrying(Exception): + pass + + def retries(times: int): def decorator(func): @wraps(func) @@ -30,9 +34,8 @@ async def wrapper(*args, **kwargs): @define class TikTokAPI: headers: dict = field(converter=dict) - link: str = field(converter=str) - regexp_key: str = field(converter=str) - description_selector: str = field(converter=str) + link: str = field(default='tiktok.com', converter=str) + script_selector: str = field(default='script[id="SIGI_STATE"]', converter=str) async def handle_message(self, message: Message) -> AsyncIterator[tuple[str, str, bytes]]: entries = (message.text[e.offset:e.offset + e.length] for e in message.entities) @@ -49,18 +52,27 @@ async def download_video(self, url: str) -> tuple[str, bytes]: async with httpx.AsyncClient(headers=self.headers, timeout=30, cookies=self._tt_webid_v2, follow_redirects=True) as client: page = await client.get(url, headers=self._user_agent) - tid = page.url.path.rsplit('/', 1)[-1] - for vid, link in re.findall(self.regexp_key, page.text): - if vid != tid: raise Exception("Retrying") - soup = BeautifulSoup(page.text, 'html.parser') - if div := soup.select_one(self.description_selector): - title = div.text - else: - title = "" - link = link.encode('utf-8').decode('unicode_escape') + page_id = page.url.path.rsplit('/', 1)[-1] + + soup = BeautifulSoup(page.text, 'html.parser') + + if script := soup.select_one(self.script_selector): + script = json.loads(script.text) + else: + raise Retrying("no script") + + modules = tuple(script.get("ItemModule").values()) + if not modules: + raise Retrying("no modules") + + for data in modules: + if data["id"] != page_id: + raise Retrying("video_id is different from page_id") + description = data["desc"] + link = data["video"]["downloadAddr"].encode('utf-8').decode('unicode_escape') if video := await client.get(link, headers=self._user_agent): video.raise_for_status() - return title, video.content + return description, video.content @property def _user_agent(self) -> dict: diff --git a/bot/exception.py b/bot/exception.py deleted file mode 100644 index 0afde39..0000000 --- a/bot/exception.py +++ /dev/null @@ -1,6 +0,0 @@ - -class HandleException(Exception): - pass - -class DownloadException(Exception): - pass \ No newline at end of file diff --git a/bot/handlers/messages.py b/bot/handlers/messages.py index 9b735a6..ebe5fa3 100644 --- a/bot/handlers/messages.py +++ b/bot/handlers/messages.py @@ -4,11 +4,7 @@ from bot.api import TikTokAPI from settings import USER_ID - TikTok = TikTokAPI( - link='tiktok.com', - regexp_key=r'"video":{"id":"(.*?)",.*?"downloadAddr":"(.*?)",.*?}', - description_selector='div[data-e2e="browse-video-desc"]', headers={ "Referer": "https://www.tiktok.com/", } @@ -37,6 +33,5 @@ async def get_message(message: Message): message.chat.id, video, caption=f"{description}\n\n{url}", - parse_mode="Markdown", reply_to_message_id=message.message_id, )