Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed issue with links not being found #298

Open
wants to merge 44 commits into
base: master
Choose a base branch
from
Open
Changes from 3 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
aa1f012
Fixed issue with links not being found
Joeclinton1 Feb 5, 2020
66f69d6
Fixed None type
Joeclinton1 Feb 9, 2020
8b794e0
Merge branch 'patch-1' into master
Joeclinton1 Feb 9, 2020
a36a378
Update google_images_download.py
Joeclinton1 Feb 9, 2020
fbc4a16
Fix more none type errors
Joeclinton1 Feb 10, 2020
ef577fc
Fix download of >100 items
voins Mar 14, 2020
90e52a4
Intercept ajax calls
voins Mar 24, 2020
7db9a46
Decode data from ajax calls
voins Mar 24, 2020
2cd6817
Get image format from Content-Type returned by server
voins Mar 25, 2020
068712b
changed start_line (ds:2 to ds:1)
Joeclinton1 Mar 25, 2020
d8dd8a9
google changed their format a little. again
Jun 17, 2020
18b0e45
Merge pull request #2 from Joeclinton1/master
Joeclinton1 Jun 27, 2020
36f798f
Merge branch 'patch-1' of https://github.com/voins/google-images-down…
Joeclinton1 Jun 27, 2020
620e7f5
removed unused get_next_item function
Joeclinton1 Jun 27, 2020
bcb2af3
Fixed end_object find code
Joeclinton1 Sep 6, 2020
58a190b
Improved exception handling
Joeclinton1 Sep 6, 2020
aa817df
Updated user agent to use newer browser.
Joeclinton1 Jan 31, 2021
2a310f1
Add files via upload
estuhr1206 May 25, 2021
c17c55d
Delete google_images_download.py
estuhr1206 May 25, 2021
4c5e6a4
Add files via upload
estuhr1206 May 25, 2021
dd0b83d
Merge pull request #6 from estuhr1206/patch-1
Joeclinton1 Jun 1, 2021
2f9f801
Get more than 400 images
NicolasGrosjean Jun 16, 2021
df2e289
Fix JSONDecodeError: Extra Data
matthewlehew Jun 30, 2021
a8e28e2
Manage API change
NicolasGrosjean Aug 25, 2021
375b6bb
Fix time_range argument
NicolasGrosjean Sep 20, 2021
a0c18fd
Remove time range from directoriy names
NicolasGrosjean Sep 22, 2021
7c91e00
Merge pull request #7 from NicolasGrosjean/patch-3
Joeclinton1 Sep 26, 2021
9a0008d
Merge pull request #8 from matthewlehew/patch-1
Joeclinton1 Sep 26, 2021
9070776
Merge pull request #9 from NicolasGrosjean/patch-4
Joeclinton1 Sep 26, 2021
e13cc55
Merge pull request #10 from NicolasGrosjean/patch-6
Joeclinton1 Sep 26, 2021
c773e1c
Fix "None type error" by changing location of image_host string in in…
Joeclinton1 Sep 26, 2021
36e5c06
Fix exact_size parameter #11
NicolasGrosjean Feb 23, 2022
ce512d9
Merge pull request #12 from NicolasGrosjean/patch-7
Joeclinton1 Mar 3, 2022
cf190d8
Support Firefox
voronaam Aug 5, 2022
ae03d01
Merge pull request #21 from voronaam/patch-1
Joeclinton1 Aug 15, 2022
dcb4619
Bypass "Before you continue" and ignore empty ajax data
Joeclinton1 Aug 18, 2022
03671f3
Merge pull request #23 from Joeclinton1/patch-3
Joeclinton1 Aug 18, 2022
945aeff
Merge pull request #1 from Joeclinton1/patch-1
ellisbrown Sep 23, 2022
dffca08
fix breaking change due to google's response format
ellisbrown Sep 23, 2022
3f58a9a
update error message to point to this PR
ellisbrown Sep 23, 2022
219b850
fix chromium downloads
ellisbrown Sep 24, 2022
1421a43
fix again after new update 9/26
ellisbrown Sep 26, 2022
2e117f3
revert rollback from 9/26
ellisbrown Sep 30, 2022
e91e6a3
Merge pull request #26 from ellisbrown/patch-1
Joeclinton1 Sep 30, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 33 additions & 22 deletions google_images_download/google_images_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,24 @@ def get_all_tabs(self,page):

#Format the object in readable format
def format_object(self,object):
data = object[1]
main = data[3]
info = data[9]
if info is None:
info = data[11]
formatted_object = {}
formatted_object['image_format'] = object['ity']
formatted_object['image_height'] = object['oh']
formatted_object['image_width'] = object['ow']
formatted_object['image_link'] = object['ou']
formatted_object['image_description'] = object['pt']
formatted_object['image_host'] = object['rh']
formatted_object['image_source'] = object['ru']
formatted_object['image_thumbnail_url'] = object['tu']
try:
formatted_object['image_height'] = main[2]
formatted_object['image_width'] = main[1]
formatted_object['image_link'] = main[0]
formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):]
formatted_object['image_description'] = info['2003'][3]
formatted_object['image_host'] = info['183836587'][0]
formatted_object['image_source'] = info['2003'][2]
formatted_object['image_thumbnail_url'] = data[2][0]
except Exception as e:
print(e)
return None
return formatted_object


Expand Down Expand Up @@ -482,7 +491,7 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
try:
if not os.path.exists(main_directory):
os.makedirs(main_directory)
time.sleep(0.2)
time.sleep(0.15)
path = (dir_name)
sub_directory = os.path.join(main_directory, path)
if not os.path.exists(sub_directory):
Expand Down Expand Up @@ -740,24 +749,30 @@ def _get_next_item(self,s):


# Getting all links with the help of '_images_get_next_image'
def _get_image_objects(self,s):
start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10
start_object = s.find('[', start_line + 1)
end_object = s.find('</script>', start_object + 1) - 4
object_raw = str(s[start_object:end_object])
object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
image_objects = json.loads(object_decode)[31][0][12][2]
image_objects = [x for x in image_objects if x[0]==1]
return image_objects

def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
items = []
abs_path = []
errorCount = 0
i = 0
count = 1
while count < limit+1:
object, end_content = self._get_next_item(page)
if object == "no_links":
image_objects = self._get_image_objects(page)
while count < limit+1 and i<len(image_objects):
if len(image_objects) == 0:
print("no_links")
break
elif object == "":
page = page[end_content:]
elif arguments['offset'] and count < int(arguments['offset']):
count += 1
page = page[end_content:]
else:
#format the item for readability
object = self.format_object(object)
object = self.format_object(image_objects[i])
if arguments['metadata']:
if not arguments["silent_mode"]:
print("\nImage Metadata: " + str(object))
Expand All @@ -784,8 +799,6 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
#delay param
if arguments['delay']:
time.sleep(int(arguments['delay']))

page = page[end_content:]
i += 1
if count < limit:
print("\n\nUnfortunately all " + str(
Expand Down Expand Up @@ -1007,5 +1020,3 @@ def main():

if __name__ == "__main__":
main()

# In[ ]: