Skip to content

Commit

Permalink
Fix empty vid from new page update
Browse files Browse the repository at this point in the history
  • Loading branch information
captaincolonelfox committed Dec 18, 2022
1 parent 3ab8040 commit e793d84
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 25 deletions.
40 changes: 26 additions & 14 deletions bot/api/tiktok.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
import json
import logging
import random
import re
import string
from datetime import datetime
from functools import wraps
Expand All @@ -13,6 +13,10 @@
from bs4 import BeautifulSoup


class Retrying(Exception):
pass


def retries(times: int):
def decorator(func):
@wraps(func)
Expand All @@ -30,9 +34,8 @@ async def wrapper(*args, **kwargs):
@define
class TikTokAPI:
headers: dict = field(converter=dict)
link: str = field(converter=str)
regexp_key: str = field(converter=str)
description_selector: str = field(converter=str)
link: str = field(default='tiktok.com', converter=str)
script_selector: str = field(default='script[id="SIGI_STATE"]', converter=str)

async def handle_message(self, message: Message) -> AsyncIterator[tuple[str, str, bytes]]:
entries = (message.text[e.offset:e.offset + e.length] for e in message.entities)
Expand All @@ -49,18 +52,27 @@ async def download_video(self, url: str) -> tuple[str, bytes]:
async with httpx.AsyncClient(headers=self.headers, timeout=30,
cookies=self._tt_webid_v2, follow_redirects=True) as client:
page = await client.get(url, headers=self._user_agent)
tid = page.url.path.rsplit('/', 1)[-1]
for vid, link in re.findall(self.regexp_key, page.text):
if vid != tid: raise Exception("Retrying")
soup = BeautifulSoup(page.text, 'html.parser')
if div := soup.select_one(self.description_selector):
title = div.text
else:
title = ""
link = link.encode('utf-8').decode('unicode_escape')
page_id = page.url.path.rsplit('/', 1)[-1]

soup = BeautifulSoup(page.text, 'html.parser')

if script := soup.select_one(self.script_selector):
script = json.loads(script.text)
else:
raise Retrying("no script")

modules = tuple(script.get("ItemModule").values())
if not modules:
raise Retrying("no modules")

for data in modules:
if data["id"] != page_id:
raise Retrying("video_id is different from page_id")
description = data["desc"]
link = data["video"]["downloadAddr"].encode('utf-8').decode('unicode_escape')
if video := await client.get(link, headers=self._user_agent):
video.raise_for_status()
return title, video.content
return description, video.content

@property
def _user_agent(self) -> dict:
Expand Down
6 changes: 0 additions & 6 deletions bot/exception.py

This file was deleted.

5 changes: 0 additions & 5 deletions bot/handlers/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@
from bot.api import TikTokAPI
from settings import USER_ID


TikTok = TikTokAPI(
link='tiktok.com',
regexp_key=r'"video":{"id":"(.*?)",.*?"downloadAddr":"(.*?)",.*?}',
description_selector='div[data-e2e="browse-video-desc"]',
headers={
"Referer": "https://www.tiktok.com/",
}
Expand Down Expand Up @@ -37,6 +33,5 @@ async def get_message(message: Message):
message.chat.id,
video,
caption=f"{description}\n\n{url}",
parse_mode="Markdown",
reply_to_message_id=message.message_id,
)

0 comments on commit e793d84

Please sign in to comment.