Skip to content

Commit

Permalink
move format checking to outer statement
Browse files Browse the repository at this point in the history
  • Loading branch information
mazen-r committed Aug 12, 2024
1 parent 5ab9df4 commit e72ae15
Showing 1 changed file with 27 additions and 27 deletions.
54 changes: 27 additions & 27 deletions scrapfly/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,34 +735,32 @@ def sink(self, api_response:ScrapeApiResponse, content:Optional[Union[str, bytes

def _handle_scrape_large_objects(
self,
body: Dict
body: Dict,
format: Literal['clob', 'blob']
) -> Dict:
content_format = body['result']['format']
if content_format in ['clob', 'blob']:

request_data = {
'method': 'GET',
'url': body['result']['content'],
'verify': self.verify,
'timeout': (self.connect_timeout, self.read_timeout),
'headers': {
'accept-encoding': self.body_handler.content_encoding,
'accept': self.body_handler.accept,
'user-agent': self.ua
},
'params': {'key': self.key}
}
response = self._http_handler(**request_data)
if self.body_handler.support(headers=response.headers):
content = self.body_handler(content=response.content, content_type=response.headers['content-type'])
else:
content = response.content.decode('utf-8')
request_data = {
'method': 'GET',
'url': body['result']['content'],
'verify': self.verify,
'timeout': (self.connect_timeout, self.read_timeout),
'headers': {
'accept-encoding': self.body_handler.content_encoding,
'accept': self.body_handler.accept,
'user-agent': self.ua
},
'params': {'key': self.key}
}
response = self._http_handler(**request_data)
if self.body_handler.support(headers=response.headers):
content = self.body_handler(content=response.content, content_type=response.headers['content-type'])
else:
content = response.content.decode('utf-8')

body['result']['content'] = content
if content_format == 'clob':
body['result']['format'] = 'text'
if content_format == 'blob':
body['result']['format'] = 'binary'
body['result']['content'] = content
if format == 'clob':
body['result']['format'] = 'text'
if format == 'blob':
body['result']['format'] = 'binary'

return body

Expand All @@ -781,7 +779,9 @@ def _handle_api_response(
else:
body = response.content.decode('utf-8')

body = self._handle_scrape_large_objects(body=body)
content_format = body['result']['format']
if content_format in ['clob', 'blob']:
body = self._handle_scrape_large_objects(body=body, format=content_format)

api_response:ScrapeApiResponse = ScrapeApiResponse(
response=response,
Expand Down

0 comments on commit e72ae15

Please sign in to comment.