From aa1f01254407ba49617e834850f82d6e4ce4eedc Mon Sep 17 00:00:00 2001
From: Joeclinton1 <48254978+Joeclinton1@users.noreply.github.com>
Date: Wed, 5 Feb 2020 23:56:38 +0100
Subject: [PATCH 01/31] Fixed issue with links not being found

Google recently changed the way they present the image data, and so the links were no longer being scraped.
I figured out how to get the image urls with the new system and made the appropriate changes so it would work.

Unfortunately, google no longer provides file format data so I had to try and retrieve it from the url of the image, which does not work in some cases.
---
 .../google_images_download.py                 | 45 +++++++++++--------
 1 file changed, 26 insertions(+), 19 deletions(-)
diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index fd89a3a9..4eebe057 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -6,6 +6,7 @@
 
 # Import Libraries
 import sys
+import ast
 version = (3, 0)
 cur_version = sys.version_info
 if cur_version >= version:  # If the Current Version of Python is 3.0 or above
@@ -271,15 +272,18 @@ def get_all_tabs(self,page):
 
     #Format the object in readable format
     def format_object(self,object):
+        data = object[1]
+        main = data[3]
+        info = data[9]
         formatted_object = {}
-        formatted_object['image_format'] = object['ity']
-        formatted_object['image_height'] = object['oh']
-        formatted_object['image_width'] = object['ow']
-        formatted_object['image_link'] = object['ou']
-        formatted_object['image_description'] = object['pt']
-        formatted_object['image_host'] = object['rh']
-        formatted_object['image_source'] = object['ru']
-        formatted_object['image_thumbnail_url'] = object['tu']
+        formatted_object['image_height'] = main[2]
+        formatted_object['image_width'] = main[1]
+        formatted_object['image_link'] = main[0]
+        formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):]
+        formatted_object['image_description'] = info['2003'][3]
+        formatted_object['image_host'] = info['183836587'][0]
+        formatted_object['image_source'] = info['2003'][2]
+        formatted_object['image_thumbnail_url'] = data[2][0]
         return formatted_object
 
 
@@ -482,7 +486,7 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
         try:
             if not os.path.exists(main_directory):
                 os.makedirs(main_directory)
-                time.sleep(0.2)
+                time.sleep(0.15)
                 path = (dir_name)
                 sub_directory = os.path.join(main_directory, path)
                 if not os.path.exists(sub_directory):
@@ -740,24 +744,29 @@ def _get_next_item(self,s):
 
 
     # Getting all links with the help of '_images_get_next_image'
+    def _get_image_objects(self,s):
+        start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10
+        start_object = s.find('[', start_line + 1)
+        end_object = s.find('</script>', start_object + 1) - 4
+        object_raw = str(s[start_object:end_object])
+        object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
+        image_objects = json.loads(object_decode)[31][0][12][2]
+        return image_objects
+
     def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
         items = []
         abs_path = []
         errorCount = 0
         i = 0
         count = 1
+        image_objects = self._get_image_objects(page)
         while count < limit+1:
-            object, end_content = self._get_next_item(page)
-            if object == "no_links":
+            if len(image_objects) == 0:
+                print("no_links")
                 break
-            elif object == "":
-                page = page[end_content:]
-            elif arguments['offset'] and count < int(arguments['offset']):
-                    count += 1
-                    page = page[end_content:]
             else:
                 #format the item for readability
-                object = self.format_object(object)
+                object = self.format_object(image_objects[i])
                 if arguments['metadata']:
                     if not arguments["silent_mode"]:
                         print("\nImage Metadata: " + str(object))
@@ -784,8 +793,6 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
                 #delay param
                 if arguments['delay']:
                     time.sleep(int(arguments['delay']))
-
-                page = page[end_content:]
             i += 1
         if count < limit:
             print("\n\nUnfortunately all " + str(

From 66f69d670db194e1d6b50c70a0940379ff963409 Mon Sep 17 00:00:00 2001
From: Joe Clinton <48254978+Joeclinton1@users.noreply.github.com>
Date: Sun, 9 Feb 2020 16:53:23 +0100
Subject: [PATCH 02/31] Fixed None type

By filtering out the image objects which had data[0]==2, I have removed the null items and it will no longer give the error: "TypeError: 'NoneType' object is not subscriptable".
---
 .../google_images_download.py                 | 47 +++++++++++--------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index fd89a3a9..5447de5f 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -271,15 +271,18 @@ def get_all_tabs(self,page):
 
     #Format the object in readable format
     def format_object(self,object):
+        data = object[1]
+        main = data[3]
+        info = data[9]
         formatted_object = {}
-        formatted_object['image_format'] = object['ity']
-        formatted_object['image_height'] = object['oh']
-        formatted_object['image_width'] = object['ow']
-        formatted_object['image_link'] = object['ou']
-        formatted_object['image_description'] = object['pt']
-        formatted_object['image_host'] = object['rh']
-        formatted_object['image_source'] = object['ru']
-        formatted_object['image_thumbnail_url'] = object['tu']
+        formatted_object['image_height'] = main[2]
+        formatted_object['image_width'] = main[1]
+        formatted_object['image_link'] = main[0]
+        formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):]
+        formatted_object['image_description'] = info['2003'][3]
+        formatted_object['image_host'] = info['183836587'][0]
+        formatted_object['image_source'] = info['2003'][2]
+        formatted_object['image_thumbnail_url'] = data[2][0]
         return formatted_object
 
 
@@ -482,7 +485,7 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
         try:
             if not os.path.exists(main_directory):
                 os.makedirs(main_directory)
-                time.sleep(0.2)
+                time.sleep(0.15)
                 path = (dir_name)
                 sub_directory = os.path.join(main_directory, path)
                 if not os.path.exists(sub_directory):
@@ -740,24 +743,30 @@ def _get_next_item(self,s):
 
 
     # Getting all links with the help of '_images_get_next_image'
+    def _get_image_objects(self,s):
+        start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10
+        start_object = s.find('[', start_line + 1)
+        end_object = s.find('</script>', start_object + 1) - 4
+        object_raw = str(s[start_object:end_object])
+        object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
+        image_objects = json.loads(object_decode)[31][0][12][2]
+        image_objects = [x for x in image_objects if x[0]==1]
+        return image_objects
+
     def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
         items = []
         abs_path = []
         errorCount = 0
         i = 0
         count = 1
-        while count < limit+1:
-            object, end_content = self._get_next_item(page)
-            if object == "no_links":
+        image_objects = self._get_image_objects(page)
+        while count < limit+1 and i<len(image_objects):
+            if len(image_objects) == 0:
+                print("no_links")
                 break
-            elif object == "":
-                page = page[end_content:]
-            elif arguments['offset'] and count < int(arguments['offset']):
-                    count += 1
-                    page = page[end_content:]
             else:
                 #format the item for readability
-                object = self.format_object(object)
+                object = self.format_object(image_objects[i])
                 if arguments['metadata']:
                     if not arguments["silent_mode"]:
                         print("\nImage Metadata: " + str(object))
@@ -784,8 +793,6 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
                 #delay param
                 if arguments['delay']:
                     time.sleep(int(arguments['delay']))
-
-                page = page[end_content:]
             i += 1
         if count < limit:
             print("\n\nUnfortunately all " + str(

From a36a3782ba691bfc84a1424bf8c6efacb589a9d2 Mon Sep 17 00:00:00 2001
From: Joe Clinton <48254978+Joeclinton1@users.noreply.github.com>
Date: Sun, 9 Feb 2020 17:08:49 +0100
Subject: [PATCH 03/31] Update google_images_download.py

---
 google_images_download/google_images_download.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 4eebe057..bbceba8c 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -751,6 +751,7 @@ def _get_image_objects(self,s):
         object_raw = str(s[start_object:end_object])
         object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
         image_objects = json.loads(object_decode)[31][0][12][2]
+        image_objects = [x for x in image_objects if x[0]==1]
         return image_objects
 
     def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
@@ -760,7 +761,7 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
         i = 0
         count = 1
         image_objects = self._get_image_objects(page)
-        while count < limit+1:
+        while count < limit+1 and i<len(image_objects):
             if len(image_objects) == 0:
                 print("no_links")
                 break

From fbc4a16a7561d3373f9d6bac750cbb3aff90b51f Mon Sep 17 00:00:00 2001
From: Joe Clinton <48254978+Joeclinton1@users.noreply.github.com>
Date: Mon, 10 Feb 2020 08:02:02 +0100
Subject: [PATCH 04/31] Fix more none type errors

This system is not very flexible, it seems google does not keep the same positions of target items, so sometimes it doens't work. I added a try-except just in case there are more problems
---
 .../google_images_download.py                 | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index bbceba8c..d7cf91e6 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -6,7 +6,6 @@
 
 # Import Libraries
 import sys
-import ast
 version = (3, 0)
 cur_version = sys.version_info
 if cur_version >= version:  # If the Current Version of Python is 3.0 or above
@@ -275,15 +274,21 @@ def format_object(self,object):
         data = object[1]
         main = data[3]
         info = data[9]
+        if info is None:
+            info = data[11]
         formatted_object = {}
-        formatted_object['image_height'] = main[2]
-        formatted_object['image_width'] = main[1]
-        formatted_object['image_link'] = main[0]
-        formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):]
-        formatted_object['image_description'] = info['2003'][3]
-        formatted_object['image_host'] = info['183836587'][0]
-        formatted_object['image_source'] = info['2003'][2]
-        formatted_object['image_thumbnail_url'] = data[2][0]
+        try:
+            formatted_object['image_height'] = main[2]
+            formatted_object['image_width'] = main[1]
+            formatted_object['image_link'] = main[0]
+            formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):]
+            formatted_object['image_description'] = info['2003'][3]
+            formatted_object['image_host'] = info['183836587'][0]
+            formatted_object['image_source'] = info['2003'][2]
+            formatted_object['image_thumbnail_url'] = data[2][0]
+        except Exception as e:
+            print(e)
+            return None
         return formatted_object
 
 
@@ -1015,5 +1020,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
-# In[ ]:

From ef577fc0f7a8558073a9a8bc227fdaca0136b2ab Mon Sep 17 00:00:00 2001
From: Alexey Voinov <alexey.v.voinov@gmail.com>
Date: Sat, 14 Mar 2020 22:38:18 +0100
Subject: [PATCH 05/31] Fix download of >100 items

It is based on patch by https://github.com/Joeclinton1, but for some
reason we get escaped string when getting the results page directly
(limit < 101) and unescaped one when getting the results page using
selenium. This is not the most elegant solution, but it works for me.
---
 google_images_download/google_images_download.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index d7cf91e6..3dfc609a 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -750,11 +750,17 @@ def _get_next_item(self,s):
 
     # Getting all links with the help of '_images_get_next_image'
     def _get_image_objects(self,s):
-        start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10
-        start_object = s.find('[', start_line + 1)
-        end_object = s.find('</script>', start_object + 1) - 4
-        object_raw = str(s[start_object:end_object])
-        object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
+        start_line = s.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
+        if start_line == -11:
+            start_line = s.find("AF_initDataCallback({key: 'ds:1'") - 10
+            start_object = s.find('[', start_line + 1)
+            end_object = s.find('</script>', start_object + 1) - 4
+            object_decode = str(s[start_object:end_object])
+        else:
+            start_object = s.find('[', start_line + 1)
+            end_object = s.find('</script>', start_object + 1) - 4
+            object_raw = str(s[start_object:end_object])
+            object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
         image_objects = json.loads(object_decode)[31][0][12][2]
         image_objects = [x for x in image_objects if x[0]==1]
         return image_objects

From 90e52a4a35c3861fddcc751439064909b166da7d Mon Sep 17 00:00:00 2001
From: Alexey Voinov <alexey.v.voinov@gmail.com>
Date: Tue, 24 Mar 2020 09:02:20 +0100
Subject: [PATCH 06/31] Intercept ajax calls

---
 .../google_images_download.py                 | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 3dfc609a..de39e04a 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -184,6 +184,38 @@ def download_extended_page(self,url,chromedriver):
 
         # Open the link
         browser.get(url)
+        browser.execute_script("""
+            (function(XHR){
+                "use strict";
+                var open = XHR.prototype.open;
+                var send = XHR.prototype.send;
+                var data = [];
+
+                XHR.prototype.open = function(method, url, async, user, pass) {
+                    this._url = url;
+                    open.call(this, method, url, async, user, pass);
+                }
+
+                XHR.prototype.send = function(data) {
+                    var self = this;
+                    var url = this._url;
+
+                    function stateChanged() {
+                        if (self.readyState == 4) {
+                            console.log("data available for: " + url)
+                            XHR.prototype._data.push(self.response);
+                        }
+                    }
+                    if (url.includes("/batchexecute?")) {
+                        this.addEventListener("readystatechange", stateChanged, false);
+                    }
+                    send.call(this, data);
+                };
+
+                XHR.prototype._data = [];
+            })(XMLHttpRequest);
+        """)
+
         time.sleep(1)
         print("Getting you a lot of images. This may take a few moments...")
 
@@ -207,6 +239,8 @@ def download_extended_page(self,url,chromedriver):
         time.sleep(0.5)
 
         source = browser.page_source #page source
+        ajax = browser.execute_script("return XMLHttpRequest.prototype._data")
+
         #close the browser
         browser.close()
 

From 7db9a4608f584ae6925e3ebff001579dce284b39 Mon Sep 17 00:00:00 2001
From: Alexey Voinov <alexey.v.voinov@gmail.com>
Date: Tue, 24 Mar 2020 20:51:51 +0100
Subject: [PATCH 07/31] Decode data from ajax calls

---
 .../google_images_download.py                 | 68 ++++++++++---------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index de39e04a..bac53f00 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -127,6 +127,28 @@ class googleimagesdownload:
     def __init__(self):
         pass
 
+    def _extract_data_pack(self, page):
+        start_line = page.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
+        start_object = page.find('[', start_line + 1)
+        end_object = page.find('</script>', start_object + 1) - 4
+        object_raw = str(page[start_object:end_object])
+        return bytes(object_raw, "utf-8").decode("unicode_escape")
+
+    def _extract_data_pack_extended(self, page):
+        start_line = page.find("AF_initDataCallback({key: 'ds:1'") - 10
+        start_object = page.find('[', start_line + 1)
+        end_object = page.find('</script>', start_object + 1) - 4
+        return str(page[start_object:end_object])
+
+    def _extract_data_pack_ajax(self, data):
+        lines = data.split('\n')
+        return json.loads(lines[3] + lines[4])[0][2]
+
+    def _image_objects_from_pack(self, data):
+        image_objects = json.loads(data)[31][0][12][2]
+        image_objects = [x for x in image_objects if x[0]==1]
+        return image_objects
+
     # Downloading entire Web Document (Raw Page Content)
     def download_page(self,url):
         version = (3, 0)
@@ -138,7 +160,7 @@ def download_page(self,url):
                 req = urllib.request.Request(url, headers=headers)
                 resp = urllib.request.urlopen(req)
                 respData = str(resp.read())
-                return respData
+                return self._image_objects_from_pack(self._extract_data_pack(respData)), self.get_all_tabs(respData)
             except Exception as e:
                 print("Could not open URL. Please check your internet connection and/or ssl settings \n"
                       "If you are using proxy, make sure your proxy settings is configured correctly")
@@ -154,7 +176,7 @@ def download_page(self,url):
                     context = ssl._create_unverified_context()
                     response = urlopen(req, context=context)
                 page = response.read()
-                return page
+                return self._image_objects_from_pack(self._extract_data_pack(page)), self.get_all_tabs(page)
             except:
                 print("Could not open URL. Please check your internet connection and/or ssl settings \n"
                       "If you are using proxy, make sure your proxy settings is configured correctly")
@@ -239,12 +261,16 @@ def download_extended_page(self,url,chromedriver):
         time.sleep(0.5)
 
         source = browser.page_source #page source
-        ajax = browser.execute_script("return XMLHttpRequest.prototype._data")
+        images = self._image_objects_from_pack(self._extract_data_pack_extended(source))
+
+        ajax_data = browser.execute_script("return XMLHttpRequest.prototype._data")
+        for chunk in ajax_data:
+            images += self._image_objects_from_pack(self._extract_data_pack_ajax(chunk))
 
         #close the browser
         browser.close()
 
-        return source
+        return images, self.get_all_tabs(source)
 
 
     #Correcting the escape characters for python2
@@ -781,31 +807,12 @@ def _get_next_item(self,s):
                     final_object = ""
             return final_object, end_object
 
-
-    # Getting all links with the help of '_images_get_next_image'
-    def _get_image_objects(self,s):
-        start_line = s.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
-        if start_line == -11:
-            start_line = s.find("AF_initDataCallback({key: 'ds:1'") - 10
-            start_object = s.find('[', start_line + 1)
-            end_object = s.find('</script>', start_object + 1) - 4
-            object_decode = str(s[start_object:end_object])
-        else:
-            start_object = s.find('[', start_line + 1)
-            end_object = s.find('</script>', start_object + 1) - 4
-            object_raw = str(s[start_object:end_object])
-            object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
-        image_objects = json.loads(object_decode)[31][0][12][2]
-        image_objects = [x for x in image_objects if x[0]==1]
-        return image_objects
-
-    def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
+    def _get_all_items(self,image_objects,main_directory,dir_name,limit,arguments):
         items = []
         abs_path = []
         errorCount = 0
         i = 0
         count = 1
-        image_objects = self._get_image_objects(page)
         while count < limit+1 and i<len(image_objects):
             if len(image_objects) == 0:
                 print("no_links")
@@ -993,16 +1000,16 @@ def download_executor(self,arguments):
                     url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site'],arguments['safe_search'])      #building main search url
 
                     if limit < 101:
-                        raw_html = self.download_page(url)  # download page
+                        images, tabs = self.download_page(url)  # download page
                     else:
-                        raw_html = self.download_extended_page(url,arguments['chromedriver'])
+                        images, tabs = self.download_extended_page(url,arguments['chromedriver'])
 
                     if not arguments["silent_mode"]:
                         if arguments['no_download']:
                             print("Getting URLs without downloading images...")
                         else:
                             print("Starting Download...")
-                    items,errorCount,abs_path = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments)    #get all image items and download images
+                    items,errorCount,abs_path = self._get_all_items(images,main_directory,dir_name,limit,arguments)    #get all image items and download images
                     paths[pky + search_keyword[i] + sky] = abs_path
 
                     #dumps into a json file
@@ -1019,16 +1026,15 @@ def download_executor(self,arguments):
                     #Related images
                     if arguments['related_images']:
                         print("\nGetting list of related keywords...this may take a few moments")
-                        tabs = self.get_all_tabs(raw_html)
                         for key, value in tabs.items():
                             final_search_term = (search_term + " - " + key)
                             print("\nNow Downloading - " + final_search_term)
                             if limit < 101:
-                                new_raw_html = self.download_page(value)  # download page
+                                images, _ = self.download_page(value)  # download page
                             else:
-                                new_raw_html = self.download_extended_page(value,arguments['chromedriver'])
+                                images, _ = self.download_extended_page(value,arguments['chromedriver'])
                             self.create_directories(main_directory, final_search_term,arguments['thumbnail'],arguments['thumbnail_only'])
-                            self._get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit,arguments)
+                            self._get_all_items(images, main_directory, search_term + " - " + key, limit,arguments)
 
                     i += 1
                     total_errors = total_errors + errorCount

From 2cd68173c961324a2c41c61e8b6f40a49663ce60 Mon Sep 17 00:00:00 2001
From: Alexey Voinov <alexey.v.voinov@gmail.com>
Date: Wed, 25 Mar 2020 09:53:58 +0100
Subject: [PATCH 08/31] Get image format from Content-Type returned by server

---
 .../google_images_download.py                 | 48 ++++++++++++-------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index bac53f00..e5242f62 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -667,29 +667,43 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
 
                 response = urlopen(req, None, timeout)
                 data = response.read()
+                info = response.info()
                 response.close()
 
-                extensions = [".jpg", ".jpeg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico"]
-                # keep everything after the last '/'
-                image_name = str(image_url[(image_url.rfind('/')) + 1:])
-                if format:
-                    if not image_format or image_format != format:
-                        download_status = 'fail'
-                        download_message = "Wrong image format returned. Skipping..."
-                        return_image_name = ''
-                        absolute_path = ''
-                        return download_status, download_message, return_image_name, absolute_path
-
-                if image_format == "" or not image_format or "." + image_format not in extensions:
+                qmark = image_url.rfind('?')
+                if qmark == -1:
+                    qmark = len(image_url)
+                slash = image_url.rfind('/', 0, qmark) + 1
+                image_name = str(image_url[slash:qmark]).lower()
+
+                type = info.get_content_type()
+                if type == "image/jpeg" or type == "image/jpg":
+                    if not image_name.endswith(".jpg") and not image_name.endswith(".jpeg"):
+                        image_name += ".jpg"
+                elif type == "image/png":
+                    if not image_name.endswith(".png"):
+                        image_name += ".png"
+                elif type == "image/webp":
+                    if not image_name.endswith(".webp"):
+                        image_name += ".webp"
+                elif type == "image/gif":
+                    if not image_name.endswith(".gif"):
+                        image_name += ".gif"
+                elif type == "image/bmp" or type == "image/x-windows-bmp":
+                    if not image_name.endswith(".bmp"):
+                        image_name += ".bmp"
+                elif type == "image/x-icon" or type == "image/vnd.microsoft.icon":
+                    if not image_name.endswith(".ico"):
+                        image_name += ".ico"
+                elif type == "image/svg+xml":
+                    if not image_name.endswith(".svg"):
+                        image_name += ".svg"
+                else:
                     download_status = 'fail'
-                    download_message = "Invalid or missing image format. Skipping..."
+                    download_message = "Invalid image format '" + type + "'. Skipping..."
                     return_image_name = ''
                     absolute_path = ''
                     return download_status, download_message, return_image_name, absolute_path
-                elif image_name.lower().find("." + image_format) < 0:
-                    image_name = image_name + "." + image_format
-                else:
-                    image_name = image_name[:image_name.lower().find("." + image_format) + (len(image_format) + 1)]
 
                 # prefix name in image
                 if prefix:

From 068712b4681bddcdc305b8824789361d576793ef Mon Sep 17 00:00:00 2001
From: Joe Clinton <48254978+Joeclinton1@users.noreply.github.com>
Date: Wed, 25 Mar 2020 12:47:31 +0100
Subject: [PATCH 09/31] changed start_line (ds:2 to ds:1)

---
 google_images_download/google_images_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index bbceba8c..345f4c87 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -745,7 +745,7 @@ def _get_next_item(self,s):
 
     # Getting all links with the help of '_images_get_next_image'
     def _get_image_objects(self,s):
-        start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10
+        start_line = s.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
         start_object = s.find('[', start_line + 1)
         end_object = s.find('</script>', start_object + 1) - 4
         object_raw = str(s[start_object:end_object])

From d8dd8a984282065396696322d6c10e7a32ea1cec Mon Sep 17 00:00:00 2001
From: Alexey Voinov <alexey.voinov@solarwinds.com>
Date: Wed, 17 Jun 2020 16:09:09 +0200
Subject: [PATCH 10/31] google changed their format a little. again

---
 google_images_download/google_images_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index e5242f62..b24953e0 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -130,7 +130,7 @@ def __init__(self):
     def _extract_data_pack(self, page):
         start_line = page.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
         start_object = page.find('[', start_line + 1)
-        end_object = page.find('</script>', start_object + 1) - 4
+        end_object = page.find('</script>', start_object + 1) - 5
         object_raw = str(page[start_object:end_object])
         return bytes(object_raw, "utf-8").decode("unicode_escape")
 

From 620e7f54ea29631e5e7d8a2b82a16a40932bf42a Mon Sep 17 00:00:00 2001
From: Joe Clinton <joeclinton1@btinternet.com>
Date: Sat, 27 Jun 2020 15:58:24 +0200
Subject: [PATCH 11/31] removed unused get_next_item function

---
 .../google_images_download.py                 | 407 ++++++++++--------
 1 file changed, 237 insertions(+), 170 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index b24953e0..4a6cf9eb 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -6,6 +6,7 @@
 
 # Import Libraries
 import sys
+
 version = (3, 0)
 cur_version = sys.version_info
 if cur_version >= version:  # If the Current Version of Python is 3.0 or above
@@ -15,6 +16,7 @@
     from urllib.parse import quote
     import http.client
     from http.client import IncompleteRead, BadStatusLine
+
     http.client._MAXHEADERS = 1000
 else:  # If the Current Version of Python is 2.x
     import urllib2
@@ -23,6 +25,7 @@
     from urllib import quote
     import httplib
     from httplib import IncompleteRead, BadStatusLine
+
     httplib._MAXHEADERS = 1000
 import time  # Importing the time library to check the time of code execution
 import os
@@ -39,8 +42,9 @@
              "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image",
              "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
              "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
-             "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering",
-             "offset", "no_download","save_source","silent_mode","ignore_urls"]
+             "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search",
+             "no_numbering",
+             "offset", "no_download", "save_source", "silent_mode", "ignore_urls"]
 
 
 def user_input():
@@ -52,7 +56,7 @@ def user_input():
     if object_check['config_file'] != '':
         records = []
         json_file = json.load(open(config_file_check[0].config_file))
-        for record in range(0,len(json_file['Records'])):
+        for record in range(0, len(json_file['Records'])):
             arguments = {}
             for i in args_list:
                 arguments[i] = None
@@ -64,57 +68,104 @@ def user_input():
         # Taking command line arguments from users
         parser = argparse.ArgumentParser()
         parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False)
-        parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False)
-        parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added after to main keyword', type=str, required=False)
-        parser.add_argument('-pk', '--prefix_keywords', help='comma separated additional words added before main keyword', type=str, required=False)
+        parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str,
+                            required=False)
+        parser.add_argument('-sk', '--suffix_keywords',
+                            help='comma separated additional words added after to main keyword', type=str,
+                            required=False)
+        parser.add_argument('-pk', '--prefix_keywords',
+                            help='comma separated additional words added before main keyword', type=str, required=False)
         parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False)
         parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False,
                             choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico'])
         parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False)
-        parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False)
-        parser.add_argument('-o', '--output_directory', help='download images in a specific main directory', type=str, required=False)
-        parser.add_argument('-i', '--image_directory', help='download images in a specific sub-directory', type=str, required=False)
-        parser.add_argument('-n', '--no_directory', default=False, help='download images in the main directory but no sub-directory', action="store_true")
-        parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int, required=False)
+        parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str,
+                            required=False)
+        parser.add_argument('-o', '--output_directory', help='download images in a specific main directory', type=str,
+                            required=False)
+        parser.add_argument('-i', '--image_directory', help='download images in a specific sub-directory', type=str,
+                            required=False)
+        parser.add_argument('-n', '--no_directory', default=False,
+                            help='download images in the main directory but no sub-directory', action="store_true")
+        parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int,
+                            required=False)
         parser.add_argument('-co', '--color', help='filter on color', type=str, required=False,
-                            choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown'])
+                            choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white',
+                                     'gray', 'black', 'brown'])
         parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False,
                             choices=['full-color', 'black-and-white', 'transparent'])
         parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False,
-                            choices=['labeled-for-reuse-with-modifications','labeled-for-reuse','labeled-for-noncommercial-reuse-with-modification','labeled-for-nocommercial-reuse'])
+                            choices=['labeled-for-reuse-with-modifications', 'labeled-for-reuse',
+                                     'labeled-for-noncommercial-reuse-with-modification',
+                                     'labeled-for-nocommercial-reuse'])
         parser.add_argument('-s', '--size', help='image size', type=str, required=False,
-                            choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP'])
-        parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str, required=False)
+                            choices=['large', 'medium', 'icon', '>400*300', '>640*480', '>800*600', '>1024*768', '>2MP',
+                                     '>4MP', '>6MP', '>8MP', '>10MP', '>12MP', '>15MP', '>20MP', '>40MP', '>70MP'])
+        parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str,
+                            required=False)
         parser.add_argument('-t', '--type', help='image type', type=str, required=False,
-                            choices=['face','photo','clipart','line-drawing','animated'])
+                            choices=['face', 'photo', 'clipart', 'line-drawing', 'animated'])
         parser.add_argument('-w', '--time', help='image age', type=str, required=False,
-                            choices=['past-24-hours','past-7-days','past-month','past-year'])
-        parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False)
-        parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False,
+                            choices=['past-24-hours', 'past-7-days', 'past-month', 'past-year'])
+        parser.add_argument('-wr', '--time_range',
+                            help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}',
+                            type=str, required=False)
+        parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str,
+                            required=False,
                             choices=['tall', 'square', 'wide', 'panoramic'])
-        parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False)
-        parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False)
-        parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true")
-        parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true")
-        parser.add_argument('-pp', '--print_paths', default=False, help="Prints the list of absolute paths of the images",action="store_true")
-        parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true")
-        parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true")
-        parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float)
-        parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true")
-        parser.add_argument('-tho', '--thumbnail_only', default=False, help="Downloads only thumbnail without downloading actual images", action="store_true")
-        parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False,
-                            choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish'])
-        parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False)
+        parser.add_argument('-si', '--similar_images',
+                            help='downloads images very similar to the image URL you provide', type=str, required=False)
+        parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website',
+                            type=str, required=False)
+        parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images",
+                            action="store_true")
+        parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk",
+                            action="store_true")
+        parser.add_argument('-pp', '--print_paths', default=False,
+                            help="Prints the list of absolute paths of the images", action="store_true")
+        parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image",
+                            action="store_true")
+        parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file",
+                            action="store_true")
+        parser.add_argument('-st', '--socket_timeout', default=False,
+                            help="Connection timeout waiting for the image to download", type=float)
+        parser.add_argument('-th', '--thumbnail', default=False,
+                            help="Downloads image thumbnail along with the actual image", action="store_true")
+        parser.add_argument('-tho', '--thumbnail_only', default=False,
+                            help="Downloads only thumbnail without downloading actual images", action="store_true")
+        parser.add_argument('-la', '--language', default=False,
+                            help="Defines the language filter. The search results are authomatically returned in that language",
+                            type=str, required=False,
+                            choices=['Arabic', 'Chinese (Simplified)', 'Chinese (Traditional)', 'Czech', 'Danish',
+                                     'Dutch', 'English', 'Estonian', 'Finnish', 'French', 'German', 'Greek', 'Hebrew',
+                                     'Hungarian', 'Icelandic', 'Italian', 'Japanese', 'Korean', 'Latvian', 'Lithuanian',
+                                     'Norwegian', 'Portuguese', 'Polish', 'Romanian', 'Russian', 'Spanish', 'Swedish',
+                                     'Turkish'])
+        parser.add_argument('-pr', '--prefix', default=False,
+                            help="A word that you would want to prefix in front of each image name", type=str,
+                            required=False)
         parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False)
-        parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False)
-        parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true")
-        parser.add_argument('-sa', '--safe_search', default=False, help="Turns on the safe search filter while searching for images", action="store_true")
-        parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true")
+        parser.add_argument('-cd', '--chromedriver',
+                            help='specify the path to chromedriver executable in your local machine', type=str,
+                            required=False)
+        parser.add_argument('-ri', '--related_images', default=False,
+                            help="Downloads images that are similar to the keyword provided", action="store_true")
+        parser.add_argument('-sa', '--safe_search', default=False,
+                            help="Turns on the safe search filter while searching for images", action="store_true")
+        parser.add_argument('-nn', '--no_numbering', default=False,
+                            help="Allows you to exclude the default numbering of images", action="store_true")
         parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False)
-        parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true")
-        parser.add_argument('-iu', '--ignore_urls', default=False, help="delimited list input of image urls/keywords to ignore", type=str)
-        parser.add_argument('-sil', '--silent_mode', default=False, help="Remains silent. Does not print notification messages on the terminal", action="store_true")
-        parser.add_argument('-is', '--save_source', help="creates a text file containing a list of downloaded images along with source page url", type=str, required=False)
+        parser.add_argument('-nd', '--no_download', default=False,
+                            help="Prints the URLs of the images and/or thumbnails without downloading them",
+                            action="store_true")
+        parser.add_argument('-iu', '--ignore_urls', default=False,
+                            help="delimited list input of image urls/keywords to ignore", type=str)
+        parser.add_argument('-sil', '--silent_mode', default=False,
+                            help="Remains silent. Does not print notification messages on the terminal",
+                            action="store_true")
+        parser.add_argument('-is', '--save_source',
+                            help="creates a text file containing a list of downloaded images along with source page url",
+                            type=str, required=False)
 
         args = parser.parse_args()
         arguments = vars(args)
@@ -146,17 +197,18 @@ def _extract_data_pack_ajax(self, data):
 
     def _image_objects_from_pack(self, data):
         image_objects = json.loads(data)[31][0][12][2]
-        image_objects = [x for x in image_objects if x[0]==1]
+        image_objects = [x for x in image_objects if x[0] == 1]
         return image_objects
 
     # Downloading entire Web Document (Raw Page Content)
-    def download_page(self,url):
+    def download_page(self, url):
         version = (3, 0)
         cur_version = sys.version_info
         if cur_version >= version:  # If the Current Version of Python is 3.0 or above
             try:
                 headers = {}
-                headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+                headers[
+                    'User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
                 req = urllib.request.Request(url, headers=headers)
                 resp = urllib.request.urlopen(req)
                 respData = str(resp.read())
@@ -168,7 +220,8 @@ def download_page(self,url):
         else:  # If the Current Version of Python is 2.x
             try:
                 headers = {}
-                headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
+                headers[
+                    'User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
                 req = urllib2.Request(url, headers=headers)
                 try:
                     response = urllib2.urlopen(req)
@@ -183,9 +236,8 @@ def download_page(self,url):
                 sys.exit()
                 return "Page Not found"
 
-
     # Download Page for more than 100 images
-    def download_extended_page(self,url,chromedriver):
+    def download_extended_page(self, url, chromedriver):
         from selenium import webdriver
         from selenium.webdriver.common.keys import Keys
         if sys.version_info[0] < 3:
@@ -260,35 +312,33 @@ def download_extended_page(self,url,chromedriver):
         print("Reached end of Page.")
         time.sleep(0.5)
 
-        source = browser.page_source #page source
+        source = browser.page_source  # page source
         images = self._image_objects_from_pack(self._extract_data_pack_extended(source))
 
         ajax_data = browser.execute_script("return XMLHttpRequest.prototype._data")
         for chunk in ajax_data:
             images += self._image_objects_from_pack(self._extract_data_pack_ajax(chunk))
 
-        #close the browser
+        # close the browser
         browser.close()
 
         return images, self.get_all_tabs(source)
 
-
-    #Correcting the escape characters for python2
-    def replace_with_byte(self,match):
+    # Correcting the escape characters for python2
+    def replace_with_byte(self, match):
         return chr(int(match.group(0)[1:], 8))
 
-    def repair(self,brokenjson):
+    def repair(self, brokenjson):
         invalid_escape = re.compile(r'\\[0-7]{1,3}')  # up to 3 digits for byte values up to FF
         return invalid_escape.sub(self.replace_with_byte, brokenjson)
 
-
     # Finding 'Next Image' from the given raw page
-    def get_next_tab(self,s):
+    def get_next_tab(self, s):
         start_line = s.find('class="dtviD"')
         if start_line == -1:  # If no links are found then give an error!
             end_quote = 0
             link = "no_tabs"
-            return link,'',end_quote
+            return link, '', end_quote
         else:
             start_line = s.find('class="dtviD"')
             start_content = s.find('href="', start_line + 1)
@@ -307,16 +357,15 @@ def get_next_tab(self,s):
             if chars_end == -1:
                 updated_item_name = (url_item_name[chars + 5:]).replace("+", " ")
             else:
-                updated_item_name = (url_item_name[chars+5:chars_end]).replace("+", " ")
+                updated_item_name = (url_item_name[chars + 5:chars_end]).replace("+", " ")
 
             return url_item, updated_item_name, end_content
 
-
     # Getting all links with the help of '_images_get_next_image'
-    def get_all_tabs(self,page):
+    def get_all_tabs(self, page):
         tabs = {}
         while True:
-            item,item_name,end_content = self.get_next_tab(page)
+            item, item_name, end_content = self.get_next_tab(page)
             if item == "no_tabs":
                 break
             else:
@@ -328,9 +377,8 @@ def get_all_tabs(self,page):
                     page = page[end_content:]
         return tabs
 
-
-    #Format the object in readable format
-    def format_object(self,object):
+    # Format the object in readable format
+    def format_object(self, object):
         data = object[1]
         main = data[3]
         info = data[9]
@@ -341,7 +389,7 @@ def format_object(self,object):
             formatted_object['image_height'] = main[2]
             formatted_object['image_width'] = main[1]
             formatted_object['image_link'] = main[0]
-            formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):]
+            formatted_object['image_format'] = main[0][-1 * (len(main[0]) - main[0].rfind(".") - 1):]
             formatted_object['image_description'] = info['2003'][3]
             formatted_object['image_host'] = info['183836587'][0]
             formatted_object['image_source'] = info['2003'][2]
@@ -351,9 +399,8 @@ def format_object(self,object):
             return None
         return formatted_object
 
-
-    #function to download single image
-    def single_image(self,image_url):
+    # function to download single image
+    def single_image(self, image_url):
         main_directory = "downloads"
         extensions = (".jpg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico")
         url = image_url
@@ -391,14 +438,15 @@ def single_image(self,image_url):
         print("completed ====> " + image_name.encode('raw_unicode_escape').decode('utf-8'))
         return
 
-    def similar_images(self,similar_images):
+    def similar_images(self, similar_images):
         version = (3, 0)
         cur_version = sys.version_info
         if cur_version >= version:  # If the Current Version of Python is 3.0 or above
             try:
                 searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
                 headers = {}
-                headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+                headers[
+                    'User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
 
                 req1 = urllib.request.Request(searchUrl, headers=headers)
                 resp1 = urllib.request.urlopen(req1)
@@ -420,7 +468,8 @@ def similar_images(self,similar_images):
             try:
                 searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
                 headers = {}
-                headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
+                headers[
+                    'User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
 
                 req1 = urllib2.Request(searchUrl, headers=headers)
                 resp1 = urllib2.urlopen(req1)
@@ -435,16 +484,23 @@ def similar_images(self,similar_images):
                 l3 = content.find('/search?sa=X&amp;q=')
                 l4 = content.find(';', l3 + 19)
                 urll2 = content[l3 + 19:l4]
-                return(urll2)
+                return (urll2)
             except:
                 return "Cloud not connect to Google Images endpoint"
 
-    #Building URL parameters
-    def build_url_parameters(self,arguments):
+    # Building URL parameters
+    def build_url_parameters(self, arguments):
         if arguments['language']:
             lang = "&lr="
-            lang_param = {"Arabic":"lang_ar","Chinese (Simplified)":"lang_zh-CN","Chinese (Traditional)":"lang_zh-TW","Czech":"lang_cs","Danish":"lang_da","Dutch":"lang_nl","English":"lang_en","Estonian":"lang_et","Finnish":"lang_fi","French":"lang_fr","German":"lang_de","Greek":"lang_el","Hebrew":"lang_iw ","Hungarian":"lang_hu","Icelandic":"lang_is","Italian":"lang_it","Japanese":"lang_ja","Korean":"lang_ko","Latvian":"lang_lv","Lithuanian":"lang_lt","Norwegian":"lang_no","Portuguese":"lang_pt","Polish":"lang_pl","Romanian":"lang_ro","Russian":"lang_ru","Spanish":"lang_es","Swedish":"lang_sv","Turkish":"lang_tr"}
-            lang_url = lang+lang_param[arguments['language']]
+            lang_param = {"Arabic": "lang_ar", "Chinese (Simplified)": "lang_zh-CN",
+                          "Chinese (Traditional)": "lang_zh-TW", "Czech": "lang_cs", "Danish": "lang_da",
+                          "Dutch": "lang_nl", "English": "lang_en", "Estonian": "lang_et", "Finnish": "lang_fi",
+                          "French": "lang_fr", "German": "lang_de", "Greek": "lang_el", "Hebrew": "lang_iw ",
+                          "Hungarian": "lang_hu", "Icelandic": "lang_is", "Italian": "lang_it", "Japanese": "lang_ja",
+                          "Korean": "lang_ko", "Latvian": "lang_lv", "Lithuanian": "lang_lt", "Norwegian": "lang_no",
+                          "Portuguese": "lang_pt", "Polish": "lang_pl", "Romanian": "lang_ro", "Russian": "lang_ru",
+                          "Spanish": "lang_es", "Swedish": "lang_sv", "Turkish": "lang_tr"}
+            lang_url = lang + lang_param[arguments['language']]
         else:
             lang_url = ''
 
@@ -463,14 +519,34 @@ def build_url_parameters(self,arguments):
 
         built_url = "&tbs="
         counter = 0
-        params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}],
-                  'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}],
-                  'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}],
-                  'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}],
-                  'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clipart':'itp:clipart','line-drawing':'itp:lineart','animated':'itp:animated'}],
-                  'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w','past-month':'qdr:m','past-year':'qdr:y'}],
-                  'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}],
-                  'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico','raw':'ift:craw'}]}
+        params = {'color': [arguments['color'], {'red': 'ic:specific,isc:red', 'orange': 'ic:specific,isc:orange',
+                                                 'yellow': 'ic:specific,isc:yellow', 'green': 'ic:specific,isc:green',
+                                                 'teal': 'ic:specific,isc:teel', 'blue': 'ic:specific,isc:blue',
+                                                 'purple': 'ic:specific,isc:purple', 'pink': 'ic:specific,isc:pink',
+                                                 'white': 'ic:specific,isc:white', 'gray': 'ic:specific,isc:gray',
+                                                 'black': 'ic:specific,isc:black', 'brown': 'ic:specific,isc:brown'}],
+                  'color_type': [arguments['color_type'],
+                                 {'full-color': 'ic:color', 'black-and-white': 'ic:gray', 'transparent': 'ic:trans'}],
+                  'usage_rights': [arguments['usage_rights'],
+                                   {'labeled-for-reuse-with-modifications': 'sur:fmc', 'labeled-for-reuse': 'sur:fc',
+                                    'labeled-for-noncommercial-reuse-with-modification': 'sur:fm',
+                                    'labeled-for-nocommercial-reuse': 'sur:f'}],
+                  'size': [arguments['size'],
+                           {'large': 'isz:l', 'medium': 'isz:m', 'icon': 'isz:i', '>400*300': 'isz:lt,islt:qsvga',
+                            '>640*480': 'isz:lt,islt:vga', '>800*600': 'isz:lt,islt:svga',
+                            '>1024*768': 'visz:lt,islt:xga', '>2MP': 'isz:lt,islt:2mp', '>4MP': 'isz:lt,islt:4mp',
+                            '>6MP': 'isz:lt,islt:6mp', '>8MP': 'isz:lt,islt:8mp', '>10MP': 'isz:lt,islt:10mp',
+                            '>12MP': 'isz:lt,islt:12mp', '>15MP': 'isz:lt,islt:15mp', '>20MP': 'isz:lt,islt:20mp',
+                            '>40MP': 'isz:lt,islt:40mp', '>70MP': 'isz:lt,islt:70mp'}],
+                  'type': [arguments['type'], {'face': 'itp:face', 'photo': 'itp:photo', 'clipart': 'itp:clipart',
+                                               'line-drawing': 'itp:lineart', 'animated': 'itp:animated'}],
+                  'time': [arguments['time'], {'past-24-hours': 'qdr:d', 'past-7-days': 'qdr:w', 'past-month': 'qdr:m',
+                                               'past-year': 'qdr:y'}],
+                  'aspect_ratio': [arguments['aspect_ratio'],
+                                   {'tall': 'iar:t', 'square': 'iar:s', 'wide': 'iar:w', 'panoramic': 'iar:xw'}],
+                  'format': [arguments['format'],
+                             {'jpg': 'ift:jpg', 'gif': 'ift:gif', 'png': 'ift:png', 'bmp': 'ift:bmp', 'svg': 'ift:svg',
+                              'webp': 'webp', 'ico': 'ift:ico', 'raw': 'ift:craw'}]}
         for key, value in params.items():
             if value[0] is not None:
                 ext_param = value[1][value[0]]
@@ -482,13 +558,12 @@ def build_url_parameters(self,arguments):
                 else:
                     built_url = built_url + ',' + ext_param
                     counter += 1
-        built_url = lang_url+built_url+exact_size+time_range
+        built_url = lang_url + built_url + exact_size + time_range
         return built_url
 
-
-    #building main search URL
-    def build_search_url(self,search_term,params,url,similar_images,specific_site,safe_search):
-        #check safe_search
+    # building main search URL
+    def build_search_url(self, search_term, params, url, similar_images, specific_site, safe_search):
+        # check safe_search
         safe_search_string = "&safe=active"
         # check the args and choose the URL
         if url:
@@ -499,20 +574,21 @@ def build_search_url(self,search_term,params,url,similar_images,specific_site,sa
             url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
         elif specific_site:
             url = 'https://www.google.com/search?q=' + quote(
-                search_term.encode('utf-8')) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+                search_term.encode(
+                    'utf-8')) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
         else:
             url = 'https://www.google.com/search?q=' + quote(
-                search_term.encode('utf-8')) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+                search_term.encode(
+                    'utf-8')) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
 
-        #safe search check
+        # safe search check
         if safe_search:
             url = url + safe_search_string
 
         return url
 
-
-    #measures the file size
-    def file_size(self,file_path):
+    # measures the file size
+    def file_size(self, file_path):
         if os.path.isfile(file_path):
             file_info = os.stat(file_path)
             size = file_info.st_size
@@ -522,8 +598,8 @@ def file_size(self,file_path):
                 size /= 1024.0
             return size
 
-    #keywords from file
-    def keywords_from_file(self,file_name):
+    # keywords from file
+    def keywords_from_file(self, file_name):
         search_keyword = []
         with codecs.open(file_name, 'r', encoding='utf-8-sig') as f:
             if '.csv' in file_name:
@@ -545,7 +621,7 @@ def keywords_from_file(self,file_name):
         return search_keyword
 
     # make directories
-    def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
+    def create_directories(self, main_directory, dir_name, thumbnail, thumbnail_only):
         dir_name_thumbnail = dir_name + " - thumbnail"
         # make a search keyword  directory
         try:
@@ -575,13 +651,13 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
             pass
         return
 
-
     # Download Image thumbnails
-    def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src,ignore_urls):
+    def download_image_thumbnail(self, image_url, main_directory, dir_name, return_image_name, print_urls,
+                                 socket_timeout, print_size, no_download, save_source, img_src, ignore_urls):
         if print_urls or no_download:
             print("Image URL: " + image_url)
         if no_download:
-            return "success","Printed url without downloading"
+            return "success", "Printed url without downloading"
         try:
             req = Request(image_url, headers={
                 "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
@@ -604,7 +680,7 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image
                     output_file.close()
                     if save_source:
                         list_path = main_directory + "/" + save_source + ".txt"
-                        list_file = open(list_path,'a')
+                        list_file = open(list_path, 'a')
                         list_file.write(path + '\t' + img_src + '\n')
                         list_file.close()
                 except OSError as e:
@@ -642,9 +718,10 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image
             download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
         return download_status, download_message
 
-
     # Download Images
-    def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format,ignore_urls):
+    def download_image(self, image_url, image_format, main_directory, dir_name, count, print_urls, socket_timeout,
+                       prefix, print_size, no_numbering, no_download, save_source, img_src, silent_mode, thumbnail_only,
+                       format, ignore_urls):
         if not silent_mode:
             if print_urls or no_download:
                 print("Image URL: " + image_url)
@@ -654,7 +731,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
         if thumbnail_only:
             return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url
         if no_download:
-            return "success","Printed url without downloading",None,image_url
+            return "success", "Printed url without downloading", None, image_url
         try:
             req = Request(image_url, headers={
                 "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
@@ -722,7 +799,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
                     output_file.close()
                     if save_source:
                         list_path = main_directory + "/" + save_source + ".txt"
-                        list_file = open(list_path,'a')
+                        list_file = open(list_path, 'a')
                         list_file.write(path + '\t' + img_src + '\n')
                         list_file.close()
                     absolute_path = os.path.abspath(path)
@@ -732,7 +809,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
                     return_image_name = ''
                     absolute_path = ''
 
-                #return image name back to calling method to use it for thumbnail downloads
+                # return image name back to calling method to use it for thumbnail downloads
                 download_status = 'success'
                 download_message = "Completed Image ====> " + prefix + str(count) + "." + image_name
                 return_image_name = prefix + str(count) + "." + image_name
@@ -753,7 +830,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
                 download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
                 return_image_name = ''
                 absolute_path = ''
-                
+
             except BadStatusLine as e:
                 download_status = 'fail'
                 download_message = "BadStatusLine on an image...trying next one..." + " Error: " + str(e)
@@ -790,63 +867,43 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
             return_image_name = ''
             absolute_path = ''
 
-        return download_status,download_message,return_image_name,absolute_path
-
-
-    # Finding 'Next Image' from the given raw page
-    def _get_next_item(self,s):
-        start_line = s.find('rg_meta notranslate')
-        if start_line == -1:  # If no links are found then give an error!
-            end_quote = 0
-            link = "no_links"
-            return link, end_quote
-        else:
-            start_line = s.find('class="rg_meta notranslate">')
-            start_object = s.find('{', start_line + 1)
-            end_object = s.find('</div>', start_object + 1)
-            object_raw = str(s[start_object:end_object])
-            #remove escape characters based on python version
-            version = (3, 0)
-            cur_version = sys.version_info
-            if cur_version >= version: #python3
-                try:
-                    object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
-                    final_object = json.loads(object_decode)
-                except:
-                    final_object = ""
-            else:  #python2
-                try:
-                    final_object = (json.loads(self.repair(object_raw)))
-                except:
-                    final_object = ""
-            return final_object, end_object
+        return download_status, download_message, return_image_name, absolute_path
 
-    def _get_all_items(self,image_objects,main_directory,dir_name,limit,arguments):
+    def _get_all_items(self, image_objects, main_directory, dir_name, limit, arguments):
         items = []
         abs_path = []
         errorCount = 0
         i = 0
         count = 1
-        while count < limit+1 and i<len(image_objects):
+        while count < limit + 1 and i < len(image_objects):
             if len(image_objects) == 0:
                 print("no_links")
                 break
             else:
-                #format the item for readability
+                # format the item for readability
                 object = self.format_object(image_objects[i])
                 if arguments['metadata']:
                     if not arguments["silent_mode"]:
                         print("\nImage Metadata: " + str(object))
 
-                #download the images
-                download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format'],arguments['ignore_urls'])
+                # download the images
+                download_status, download_message, return_image_name, absolute_path = self.download_image(
+                    object['image_link'], object['image_format'], main_directory, dir_name, count,
+                    arguments['print_urls'], arguments['socket_timeout'], arguments['prefix'], arguments['print_size'],
+                    arguments['no_numbering'], arguments['no_download'], arguments['save_source'],
+                    object['image_source'], arguments["silent_mode"], arguments["thumbnail_only"], arguments['format'],
+                    arguments['ignore_urls'])
                 if not arguments["silent_mode"]:
                     print(download_message)
                 if download_status == "success":
 
                     # download image_thumbnails
                     if arguments['thumbnail'] or arguments["thumbnail_only"]:
-                        download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments['ignore_urls'])
+                        download_status, download_message_thumbnail = self.download_image_thumbnail(
+                            object['image_thumbnail_url'], main_directory, dir_name, return_image_name,
+                            arguments['print_urls'], arguments['socket_timeout'], arguments['print_size'],
+                            arguments['no_download'], arguments['save_source'], object['image_source'],
+                            arguments['ignore_urls'])
                         if not arguments["silent_mode"]:
                             print(download_message_thumbnail)
 
@@ -857,19 +914,18 @@ def _get_all_items(self,image_objects,main_directory,dir_name,limit,arguments):
                 else:
                     errorCount += 1
 
-                #delay param
+                # delay param
                 if arguments['delay']:
                     time.sleep(int(arguments['delay']))
             i += 1
         if count < limit:
             print("\n\nUnfortunately all " + str(
                 limit) + " could not be downloaded because some images were not downloadable. " + str(
-                count-1) + " is all we got for this search filter!")
-        return items,errorCount,abs_path
-
+                count - 1) + " is all we got for this search filter!")
+        return items, errorCount, abs_path
 
     # Bulk Download
-    def download(self,arguments):
+    def download(self, arguments):
         paths_agg = {}
         # for input coming from other python files
         if __name__ != "__main__":
@@ -893,7 +949,7 @@ def download(self,arguments):
                         if arguments['print_paths']:
                             print(paths.encode('raw_unicode_escape').decode('utf-8'))
                     total_errors = total_errors + errors
-                return paths_agg,total_errors
+                return paths_agg, total_errors
             # if the calling file contains params directly
             else:
                 paths, errors = self.download_executor(arguments)
@@ -913,7 +969,7 @@ def download(self,arguments):
                     print(paths.encode('raw_unicode_escape').decode('utf-8'))
         return paths_agg, errors
 
-    def download_executor(self,arguments):
+    def download_executor(self, arguments):
         paths = {}
         errorCount = None
         for arg in args_list:
@@ -928,11 +984,13 @@ def download_executor(self,arguments):
 
         # both time and time range should not be allowed in the same query
         if arguments['time'] and arguments['time_range']:
-            raise ValueError('Either time or time range should be used in a query. Both cannot be used at the same time.')
+            raise ValueError(
+                'Either time or time range should be used in a query. Both cannot be used at the same time.')
 
         # both time and time range should not be allowed in the same query
         if arguments['size'] and arguments['exact_size']:
-            raise ValueError('Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.')
+            raise ValueError(
+                'Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.')
 
         # both image directory and no image directory should not be allowed in the same query
         if arguments['image_directory'] and arguments['no_directory']:
@@ -966,7 +1024,7 @@ def download_executor(self,arguments):
 
         # If single_image or url argument not present then keywords is mandatory argument
         if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and \
-                        arguments['keywords'] is None and arguments['keywords_from_file'] is None:
+                arguments['keywords'] is None and arguments['keywords_from_file'] is None:
             print('-------------------------------\n'
                   'Uh oh! Keywords is a required argument \n\n'
                   'Please refer to the documentation on guide to writing queries \n'
@@ -987,11 +1045,12 @@ def download_executor(self,arguments):
             os.environ["https_proxy"] = arguments['proxy']
             ######Initialization Complete
         total_errors = 0
-        for pky in prefix_keywords:                 # 1.for every prefix keywords
-            for sky in suffix_keywords:             # 2.for every suffix keywords
+        for pky in prefix_keywords:  # 1.for every prefix keywords
+            for sky in suffix_keywords:  # 2.for every suffix keywords
                 i = 0
-                while i < len(search_keyword):      # 3.for every main keyword
-                    iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (search_keyword[i]) + (sky)
+                while i < len(search_keyword):  # 3.for every main keyword
+                    iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (
+                    search_keyword[i]) + (sky)
                     if not arguments["silent_mode"]:
                         print(iteration.encode('raw_unicode_escape').decode('utf-8'))
                         print("Evaluating...")
@@ -1004,40 +1063,45 @@ def download_executor(self,arguments):
                     elif arguments['no_directory']:
                         dir_name = ''
                     else:
-                        dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '')   #sub-directory
+                        dir_name = search_term + (
+                            '-' + arguments['color'] if arguments['color'] else '')  # sub-directory
 
                     if not arguments["no_download"]:
-                        self.create_directories(main_directory,dir_name,arguments['thumbnail'],arguments['thumbnail_only'])     #create directories in OS
+                        self.create_directories(main_directory, dir_name, arguments['thumbnail'],
+                                                arguments['thumbnail_only'])  # create directories in OS
 
-                    params = self.build_url_parameters(arguments)     #building URL with params
+                    params = self.build_url_parameters(arguments)  # building URL with params
 
-                    url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site'],arguments['safe_search'])      #building main search url
+                    url = self.build_search_url(search_term, params, arguments['url'], arguments['similar_images'],
+                                                arguments['specific_site'],
+                                                arguments['safe_search'])  # building main search url
 
                     if limit < 101:
                         images, tabs = self.download_page(url)  # download page
                     else:
-                        images, tabs = self.download_extended_page(url,arguments['chromedriver'])
+                        images, tabs = self.download_extended_page(url, arguments['chromedriver'])
 
                     if not arguments["silent_mode"]:
                         if arguments['no_download']:
                             print("Getting URLs without downloading images...")
                         else:
                             print("Starting Download...")
-                    items,errorCount,abs_path = self._get_all_items(images,main_directory,dir_name,limit,arguments)    #get all image items and download images
+                    items, errorCount, abs_path = self._get_all_items(images, main_directory, dir_name, limit,
+                                                                      arguments)  # get all image items and download images
                     paths[pky + search_keyword[i] + sky] = abs_path
 
-                    #dumps into a json file
+                    # dumps into a json file
                     if arguments['extract_metadata']:
                         try:
                             if not os.path.exists("logs"):
                                 os.makedirs("logs")
                         except OSError as e:
                             print(e)
-                        json_file = open("logs/"+search_keyword[i]+".json", "w")
+                        json_file = open("logs/" + search_keyword[i] + ".json", "w")
                         json.dump(items, json_file, indent=4, sort_keys=True)
                         json_file.close()
 
-                    #Related images
+                    # Related images
                     if arguments['related_images']:
                         print("\nGetting list of related keywords...this may take a few moments")
                         for key, value in tabs.items():
@@ -1046,9 +1110,10 @@ def download_executor(self,arguments):
                             if limit < 101:
                                 images, _ = self.download_page(value)  # download page
                             else:
-                                images, _ = self.download_extended_page(value,arguments['chromedriver'])
-                            self.create_directories(main_directory, final_search_term,arguments['thumbnail'],arguments['thumbnail_only'])
-                            self._get_all_items(images, main_directory, search_term + " - " + key, limit,arguments)
+                                images, _ = self.download_extended_page(value, arguments['chromedriver'])
+                            self.create_directories(main_directory, final_search_term, arguments['thumbnail'],
+                                                    arguments['thumbnail_only'])
+                            self._get_all_items(images, main_directory, search_term + " - " + key, limit, arguments)
 
                     i += 1
                     total_errors = total_errors + errorCount
@@ -1056,7 +1121,8 @@ def download_executor(self,arguments):
                         print("\nErrors: " + str(errorCount) + "\n")
         return paths, total_errors
 
-#------------- Main Program -------------#
+
+# ------------- Main Program -------------#
 def main():
     records = user_input()
     total_errors = 0
@@ -1068,7 +1134,7 @@ def main():
             response.single_image(arguments['single_image'])
         else:  # or download multiple images based on keywords/keyphrase search
             response = googleimagesdownload()
-            paths,errors = response.download(arguments)  #wrapping response in a variable just for consistency
+            paths, errors = response.download(arguments)  # wrapping response in a variable just for consistency
             total_errors = total_errors + errors
 
         t1 = time.time()  # stop the timer
@@ -1078,5 +1144,6 @@ def main():
             print("Total errors: " + str(total_errors))
             print("Total time taken: " + str(total_time) + " Seconds")
 
+
 if __name__ == "__main__":
     main()

From bcb2af34a96ab3fc249ac605f2b951227c4aef6a Mon Sep 17 00:00:00 2001
From: Joe Clinton <48254978+Joeclinton1@users.noreply.github.com>
Date: Sun, 6 Sep 2020 15:39:20 +0100
Subject: [PATCH 12/31] Fixed end_object find code

Previously the end_object for the data pack was found by searching for '</script>' and then going 4 characters back, however google in a recent update has added , 'sideChannel: {}});' to the end of the data pack, which throws it off. To fix this the end_object finding script first searches for '</script>' and then searches for the first ']' to the left of that closing script tag. This should be more flexible.
---
 google_images_download/google_images_download.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 4a6cf9eb..2f2dfa01 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -181,14 +181,14 @@ def __init__(self):
     def _extract_data_pack(self, page):
         start_line = page.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
         start_object = page.find('[', start_line + 1)
-        end_object = page.find('</script>', start_object + 1) - 5
+        end_object = page.rfind(']',0,page.find('</script>', start_object + 1)) + 1
         object_raw = str(page[start_object:end_object])
         return bytes(object_raw, "utf-8").decode("unicode_escape")
 
     def _extract_data_pack_extended(self, page):
         start_line = page.find("AF_initDataCallback({key: 'ds:1'") - 10
         start_object = page.find('[', start_line + 1)
-        end_object = page.find('</script>', start_object + 1) - 4
+        end_object = page.rfind(']',0,page.find('</script>', start_object + 1)) + 1
         return str(page[start_object:end_object])
 
     def _extract_data_pack_ajax(self, data):
@@ -196,6 +196,7 @@ def _extract_data_pack_ajax(self, data):
         return json.loads(lines[3] + lines[4])[0][2]
 
     def _image_objects_from_pack(self, data):
+        print(data)
         image_objects = json.loads(data)[31][0][12][2]
         image_objects = [x for x in image_objects if x[0] == 1]
         return image_objects
@@ -214,6 +215,7 @@ def download_page(self, url):
                 respData = str(resp.read())
                 return self._image_objects_from_pack(self._extract_data_pack(respData)), self.get_all_tabs(respData)
             except Exception as e:
+                print(e)
                 print("Could not open URL. Please check your internet connection and/or ssl settings \n"
                       "If you are using proxy, make sure your proxy settings is configured correctly")
                 sys.exit()
@@ -264,16 +266,13 @@ def download_extended_page(self, url, chromedriver):
                 var open = XHR.prototype.open;
                 var send = XHR.prototype.send;
                 var data = [];
-
                 XHR.prototype.open = function(method, url, async, user, pass) {
                     this._url = url;
                     open.call(this, method, url, async, user, pass);
                 }
-
                 XHR.prototype.send = function(data) {
                     var self = this;
                     var url = this._url;
-
                     function stateChanged() {
                         if (self.readyState == 4) {
                             console.log("data available for: " + url)
@@ -285,7 +284,6 @@ def download_extended_page(self, url, chromedriver):
                     }
                     send.call(this, data);
                 };
-
                 XHR.prototype._data = [];
             })(XMLHttpRequest);
         """)

From 58a190b584bd690c6eaab745d519179a11c7c484 Mon Sep 17 00:00:00 2001
From: Joe Clinton <48254978+Joeclinton1@users.noreply.github.com>
Date: Sun, 6 Sep 2020 16:05:35 +0100
Subject: [PATCH 13/31] Improved exception handling

Previously if the data unpacking failed it would tell the user that the URL could not be opened. But this is the wrong exception. So i fixed this by splitting up the data un packing and url opening into seperate parts so each can have their own exception. This should make it easier to identify what has gone wrong.
---
 google_images_download/google_images_download.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 2f2dfa01..ab00f783 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -181,7 +181,7 @@ def __init__(self):
     def _extract_data_pack(self, page):
         start_line = page.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
         start_object = page.find('[', start_line + 1)
-        end_object = page.rfind(']',0,page.find('</script>', start_object + 1)) + 1
+        end_object = page.rfind(']',0,page.find('</script>', start_object + 1))+1
         object_raw = str(page[start_object:end_object])
         return bytes(object_raw, "utf-8").decode("unicode_escape")
 
@@ -196,7 +196,6 @@ def _extract_data_pack_ajax(self, data):
         return json.loads(lines[3] + lines[4])[0][2]
 
     def _image_objects_from_pack(self, data):
-        print(data)
         image_objects = json.loads(data)[31][0][12][2]
         image_objects = [x for x in image_objects if x[0] == 1]
         return image_objects
@@ -213,9 +212,7 @@ def download_page(self, url):
                 req = urllib.request.Request(url, headers=headers)
                 resp = urllib.request.urlopen(req)
                 respData = str(resp.read())
-                return self._image_objects_from_pack(self._extract_data_pack(respData)), self.get_all_tabs(respData)
-            except Exception as e:
-                print(e)
+            except:
                 print("Could not open URL. Please check your internet connection and/or ssl settings \n"
                       "If you are using proxy, make sure your proxy settings is configured correctly")
                 sys.exit()
@@ -230,13 +227,18 @@ def download_page(self, url):
                 except URLError:  # Handling SSL certificate failed
                     context = ssl._create_unverified_context()
                     response = urlopen(req, context=context)
-                page = response.read()
-                return self._image_objects_from_pack(self._extract_data_pack(page)), self.get_all_tabs(page)
+                respData = response.read()
             except:
                 print("Could not open URL. Please check your internet connection and/or ssl settings \n"
                       "If you are using proxy, make sure your proxy settings is configured correctly")
                 sys.exit()
                 return "Page Not found"
+        try:
+            return self._image_objects_from_pack(self._extract_data_pack(respData)), self.get_all_tabs(respData)
+        except Exception as e:
+            print(e)
+            print('Image objects data unpacking failed. Please leave a comment with the above error at https://github.com/hardikvasa/google-images-download/pull/298')
+            sys.exit()
 
     # Download Page for more than 100 images
     def download_extended_page(self, url, chromedriver):

From aa817df6701b4c9fd64cea1f517d863a9a558693 Mon Sep 17 00:00:00 2001
From: Joe Clinton <joeclinton1@btinternet.com>
Date: Sun, 31 Jan 2021 16:29:03 +0000
Subject: [PATCH 14/31] Updated user agent to use newer browser.

---
 README.rst                                       | 4 ++--
 docs/arguments.rst                               | 2 +-
 docs/index.rst                                   | 4 ++--
 google_images_download/google_images_download.py | 9 +++------
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/README.rst b/README.rst
index b1e08f65..14d86228 100644
--- a/README.rst
+++ b/README.rst
@@ -23,7 +23,7 @@ The original creators of the images own the copyrights.
 
 Images published in the United States are automatically copyrighted by their owners,
 even if they do not explicitly carry a copyright warning.
-You may not reproduce copyright images without their owner's permission,
+You may not reproduce copyright images without their owner'self permission,
 except in "fair use" cases,
-or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits.
+or you could risk running into lawyer'self warnings, cease-and-desist letters, and copyright suits.
 Please be very careful before its usage! Use this script/code only for educational purposes.
diff --git a/docs/arguments.rst b/docs/arguments.rst
index e18035c9..ead86ab1 100644
--- a/docs/arguments.rst
+++ b/docs/arguments.rst
@@ -85,7 +85,7 @@ Link to `Documentation Homepage <https://google-images-download.readthedocs.io/e
 |                   |             | * `labeled-for-noncommercial-reuse-with-modification`,                                                                        |
 |                   |             | * `labeled-for-nocommercial-reuse`                                                                                            |
 +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
-| size              | s           | Denotes the relative size of the image to be downloaded.                                                                      |
+| size              | self           | Denotes the relative size of the image to be downloaded.                                                                      |
 |                   |             |                                                                                                                               |
 |                   |             | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP,                |
 |                   |             | >12MP, >15MP, >20MP, >40MP, >70MP`                                                                                            |
diff --git a/docs/index.rst b/docs/index.rst
index 3e276d00..001fb252 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -127,7 +127,7 @@ Disclaimer
 
    Images published in the United States are automatically copyrighted by their owners,
    even if they do not explicitly carry a copyright warning.
-   You may not reproduce copyright images without their owner's permission,
+   You may not reproduce copyright images without their owner'self permission,
    except in "fair use" cases,
-   or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits.
+   or you could risk running into lawyer'self warnings, cease-and-desist letters, and copyright suits.
    Please be very careful before its usage!
diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index ab00f783..49dcee77 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -204,11 +204,11 @@ def _image_objects_from_pack(self, data):
     def download_page(self, url):
         version = (3, 0)
         cur_version = sys.version_info
+        headers = {}
+        headers[
+            'User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36"
         if cur_version >= version:  # If the Current Version of Python is 3.0 or above
             try:
-                headers = {}
-                headers[
-                    'User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
                 req = urllib.request.Request(url, headers=headers)
                 resp = urllib.request.urlopen(req)
                 respData = str(resp.read())
@@ -218,9 +218,6 @@ def download_page(self, url):
                 sys.exit()
         else:  # If the Current Version of Python is 2.x
             try:
-                headers = {}
-                headers[
-                    'User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
                 req = urllib2.Request(url, headers=headers)
                 try:
                     response = urllib2.urlopen(req)

From 2a310f1b3386d63dfd0661e45c513ac2e4e38f2b Mon Sep 17 00:00:00 2001
From: explosion1206 <40578236+estuhr1206@users.noreply.github.com>
Date: Tue, 25 May 2021 02:26:21 -0400
Subject: [PATCH 15/31] Add files via upload

---
 google_images_download.py | 1152 +++++++++++++++++++++++++++++++++++++
 1 file changed, 1152 insertions(+)
 create mode 100644 google_images_download.py

diff --git a/google_images_download.py b/google_images_download.py
new file mode 100644
index 00000000..51bb251b
--- /dev/null
+++ b/google_images_download.py
@@ -0,0 +1,1152 @@
+#!/usr/bin/env python
+# In[ ]:
+#  coding: utf-8
+
+###### Searching and Downloading Google Images to the local disk ######
+
+# Import Libraries
+import sys
+
+version = (3, 0)
+cur_version = sys.version_info
+if cur_version >= version:  # If the Current Version of Python is 3.0 or above
+    import urllib.request
+    from urllib.request import Request, urlopen
+    from urllib.request import URLError, HTTPError
+    from urllib.parse import quote
+    import http.client
+    from http.client import IncompleteRead, BadStatusLine
+
+    http.client._MAXHEADERS = 1000
+else:  # If the Current Version of Python is 2.x
+    import urllib2
+    from urllib2 import Request, urlopen
+    from urllib2 import URLError, HTTPError
+    from urllib import quote
+    import httplib
+    from httplib import IncompleteRead, BadStatusLine
+
+    httplib._MAXHEADERS = 1000
+import time  # Importing the time library to check the time of code execution
+import os
+import argparse
+import ssl
+import datetime
+import json
+import re
+import codecs
+import socket
+
+args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords",
+             "limit", "format", "color", "color_type", "usage_rights", "size",
+             "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image",
+             "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
+             "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
+             "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search",
+             "no_numbering",
+             "offset", "no_download", "save_source", "silent_mode", "ignore_urls"]
+
+
+def user_input():
+    config = argparse.ArgumentParser()
+    config.add_argument('-cf', '--config_file', help='config file name', default='', type=str, required=False)
+    config_file_check = config.parse_known_args()
+    object_check = vars(config_file_check[0])
+
+    if object_check['config_file'] != '':
+        records = []
+        json_file = json.load(open(config_file_check[0].config_file))
+        for record in range(0, len(json_file['Records'])):
+            arguments = {}
+            for i in args_list:
+                arguments[i] = None
+            for key, value in json_file['Records'][record].items():
+                arguments[key] = value
+            records.append(arguments)
+        records_count = len(records)
+    else:
+        # Taking command line arguments from users
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False)
+        parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str,
+                            required=False)
+        parser.add_argument('-sk', '--suffix_keywords',
+                            help='comma separated additional words added after to main keyword', type=str,
+                            required=False)
+        parser.add_argument('-pk', '--prefix_keywords',
+                            help='comma separated additional words added before main keyword', type=str, required=False)
+        parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False)
+        parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False,
+                            choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico'])
+        parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False)
+        parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str,
+                            required=False)
+        parser.add_argument('-o', '--output_directory', help='download images in a specific main directory', type=str,
+                            required=False)
+        parser.add_argument('-i', '--image_directory', help='download images in a specific sub-directory', type=str,
+                            required=False)
+        parser.add_argument('-n', '--no_directory', default=False,
+                            help='download images in the main directory but no sub-directory', action="store_true")
+        parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int,
+                            required=False)
+        parser.add_argument('-co', '--color', help='filter on color', type=str, required=False,
+                            choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white',
+                                     'gray', 'black', 'brown'])
+        parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False,
+                            choices=['full-color', 'black-and-white', 'transparent'])
+        parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False,
+                            choices=['labeled-for-reuse-with-modifications', 'labeled-for-reuse',
+                                     'labeled-for-noncommercial-reuse-with-modification',
+                                     'labeled-for-nocommercial-reuse'])
+        parser.add_argument('-s', '--size', help='image size', type=str, required=False,
+                            choices=['large', 'medium', 'icon', '>400*300', '>640*480', '>800*600', '>1024*768', '>2MP',
+                                     '>4MP', '>6MP', '>8MP', '>10MP', '>12MP', '>15MP', '>20MP', '>40MP', '>70MP'])
+        parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str,
+                            required=False)
+        parser.add_argument('-t', '--type', help='image type', type=str, required=False,
+                            choices=['face', 'photo', 'clipart', 'line-drawing', 'animated'])
+        parser.add_argument('-w', '--time', help='image age', type=str, required=False,
+                            choices=['past-24-hours', 'past-7-days', 'past-month', 'past-year'])
+        parser.add_argument('-wr', '--time_range',
+                            help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}',
+                            type=str, required=False)
+        parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str,
+                            required=False,
+                            choices=['tall', 'square', 'wide', 'panoramic'])
+        parser.add_argument('-si', '--similar_images',
+                            help='downloads images very similar to the image URL you provide', type=str, required=False)
+        parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website',
+                            type=str, required=False)
+        parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images",
+                            action="store_true")
+        parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk",
+                            action="store_true")
+        parser.add_argument('-pp', '--print_paths', default=False,
+                            help="Prints the list of absolute paths of the images", action="store_true")
+        parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image",
+                            action="store_true")
+        parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file",
+                            action="store_true")
+        parser.add_argument('-st', '--socket_timeout', default=False,
+                            help="Connection timeout waiting for the image to download", type=float)
+        parser.add_argument('-th', '--thumbnail', default=False,
+                            help="Downloads image thumbnail along with the actual image", action="store_true")
+        parser.add_argument('-tho', '--thumbnail_only', default=False,
+                            help="Downloads only thumbnail without downloading actual images", action="store_true")
+        parser.add_argument('-la', '--language', default=False,
+                            help="Defines the language filter. The search results are authomatically returned in that language",
+                            type=str, required=False,
+                            choices=['Arabic', 'Chinese (Simplified)', 'Chinese (Traditional)', 'Czech', 'Danish',
+                                     'Dutch', 'English', 'Estonian', 'Finnish', 'French', 'German', 'Greek', 'Hebrew',
+                                     'Hungarian', 'Icelandic', 'Italian', 'Japanese', 'Korean', 'Latvian', 'Lithuanian',
+                                     'Norwegian', 'Portuguese', 'Polish', 'Romanian', 'Russian', 'Spanish', 'Swedish',
+                                     'Turkish'])
+        parser.add_argument('-pr', '--prefix', default=False,
+                            help="A word that you would want to prefix in front of each image name", type=str,
+                            required=False)
+        parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False)
+        parser.add_argument('-cd', '--chromedriver',
+                            help='specify the path to chromedriver executable in your local machine', type=str,
+                            required=False)
+        parser.add_argument('-ri', '--related_images', default=False,
+                            help="Downloads images that are similar to the keyword provided", action="store_true")
+        parser.add_argument('-sa', '--safe_search', default=False,
+                            help="Turns on the safe search filter while searching for images", action="store_true")
+        parser.add_argument('-nn', '--no_numbering', default=False,
+                            help="Allows you to exclude the default numbering of images", action="store_true")
+        parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False)
+        parser.add_argument('-nd', '--no_download', default=False,
+                            help="Prints the URLs of the images and/or thumbnails without downloading them",
+                            action="store_true")
+        parser.add_argument('-iu', '--ignore_urls', default=False,
+                            help="delimited list input of image urls/keywords to ignore", type=str)
+        parser.add_argument('-sil', '--silent_mode', default=False,
+                            help="Remains silent. Does not print notification messages on the terminal",
+                            action="store_true")
+        parser.add_argument('-is', '--save_source',
+                            help="creates a text file containing a list of downloaded images along with source page url",
+                            type=str, required=False)
+
+        args = parser.parse_args()
+        arguments = vars(args)
+        records = []
+        records.append(arguments)
+    return records
+
+
+class googleimagesdownload:
+    def __init__(self):
+        pass
+
+    def _extract_data_pack(self, page):
+        start_line = page.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
+        start_object = page.find('[', start_line + 1)
+        end_object = page.rfind(']',0,page.find('</script>', start_object + 1))+1
+        object_raw = str(page[start_object:end_object])
+        return bytes(object_raw, "utf-8").decode("unicode_escape")
+
+    def _extract_data_pack_extended(self, page):
+        start_line = page.find("AF_initDataCallback({key: 'ds:1'") - 10
+        start_object = page.find('[', start_line + 1)
+        end_object = page.rfind(']',0,page.find('</script>', start_object + 1)) + 1
+        return str(page[start_object:end_object])
+
+    def _extract_data_pack_ajax(self, data):
+        lines = data.split('\n')
+        return json.loads(lines[3] + lines[4])[0][2]
+
+    def _image_objects_from_pack(self, data):
+        image_objects = json.loads(data)[31][0][12][2]
+        image_objects = [x for x in image_objects if x[0] == 1]
+        return image_objects
+
+    # Downloading entire Web Document (Raw Page Content)
+    def download_page(self, url):
+        version = (3, 0)
+        cur_version = sys.version_info
+        headers = {}
+        headers[
+            'User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36"
+        if cur_version >= version:  # If the Current Version of Python is 3.0 or above
+            try:
+                req = urllib.request.Request(url, headers=headers)
+                resp = urllib.request.urlopen(req)
+                respData = str(resp.read())
+            except:
+                print("Could not open URL. Please check your internet connection and/or ssl settings \n"
+                      "If you are using proxy, make sure your proxy settings is configured correctly")
+                sys.exit()
+        else:  # If the Current Version of Python is 2.x
+            try:
+                req = urllib2.Request(url, headers=headers)
+                try:
+                    response = urllib2.urlopen(req)
+                except URLError:  # Handling SSL certificate failed
+                    context = ssl._create_unverified_context()
+                    response = urlopen(req, context=context)
+                respData = response.read()
+            except:
+                print("Could not open URL. Please check your internet connection and/or ssl settings \n"
+                      "If you are using proxy, make sure your proxy settings is configured correctly")
+                sys.exit()
+                return "Page Not found"
+        try:
+            return self._image_objects_from_pack(self._extract_data_pack(respData)), self.get_all_tabs(respData)
+        except Exception as e:
+            print(e)
+            print('Image objects data unpacking failed. Please leave a comment with the above error at https://github.com/hardikvasa/google-images-download/pull/298')
+            sys.exit()
+
+    # Download Page for more than 100 images
+    def download_extended_page(self, url, chromedriver):
+        from selenium import webdriver
+        from selenium.webdriver.common.keys import Keys
+        if sys.version_info[0] < 3:
+            reload(sys)
+            sys.setdefaultencoding('utf8')
+        options = webdriver.ChromeOptions()
+        options.add_argument('--no-sandbox')
+        options.add_argument("--headless")
+
+        try:
+            browser = webdriver.Chrome(chromedriver, chrome_options=options)
+        except Exception as e:
+            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
+                  "argument to specify the path to the executable.) or google chrome browser is not "
+                  "installed on your machine (exception: %s)" % e)
+            sys.exit()
+        browser.set_window_size(1024, 768)
+
+        # Open the link
+        browser.get(url)
+        browser.execute_script("""
+            (function(XHR){
+                "use strict";
+                var open = XHR.prototype.open;
+                var send = XHR.prototype.send;
+                var data = [];
+                XHR.prototype.open = function(method, url, async, user, pass) {
+                    this._url = url;
+                    open.call(this, method, url, async, user, pass);
+                }
+                XHR.prototype.send = function(data) {
+                    var self = this;
+                    var url = this._url;
+                    function stateChanged() {
+                        if (self.readyState == 4) {
+                            console.log("data available for: " + url)
+                            XHR.prototype._data.push(self.response);
+                        }
+                    }
+                    if (url.includes("/batchexecute?")) {
+                        this.addEventListener("readystatechange", stateChanged, false);
+                    }
+                    send.call(this, data);
+                };
+                XHR.prototype._data = [];
+            })(XMLHttpRequest);
+        """)
+
+        time.sleep(1)
+        print("Getting you a lot of images. This may take a few moments...")
+
+        element = browser.find_element_by_tag_name("body")
+        # Scroll down
+        for i in range(30):
+            element.send_keys(Keys.PAGE_DOWN)
+            time.sleep(0.3)
+
+        try:
+            browser.find_element_by_id("smb").click()
+            for i in range(50):
+                element.send_keys(Keys.PAGE_DOWN)
+                time.sleep(0.3)  # bot id protection
+        except:
+            for i in range(10):
+                element.send_keys(Keys.PAGE_DOWN)
+                time.sleep(0.3)  # bot id protection
+
+        print("Reached end of Page.")
+        time.sleep(0.5)
+
+        source = browser.page_source  # page source
+        images = self._image_objects_from_pack(self._extract_data_pack_extended(source))
+
+        ajax_data = browser.execute_script("return XMLHttpRequest.prototype._data")
+        for chunk in ajax_data:
+            images += self._image_objects_from_pack(self._extract_data_pack_ajax(chunk))
+
+        # close the browser
+        browser.close()
+
+        return images, self.get_all_tabs(source)
+
+    # Correcting the escape characters for python2
+    def replace_with_byte(self, match):
+        return chr(int(match.group(0)[1:], 8))
+
+    def repair(self, brokenjson):
+        invalid_escape = re.compile(r'\\[0-7]{1,3}')  # up to 3 digits for byte values up to FF
+        return invalid_escape.sub(self.replace_with_byte, brokenjson)
+
+    # Finding 'Next Image' from the given raw page
+    def get_next_tab(self, s):
+        start_line = s.find('class="dtviD"')
+        if start_line == -1:  # If no links are found then give an error!
+            end_quote = 0
+            link = "no_tabs"
+            return link, '', end_quote
+        else:
+            start_line = s.find('class="dtviD"')
+            start_content = s.find('href="', start_line + 1)
+            end_content = s.find('">', start_content + 1)
+            url_item = "https://www.google.com" + str(s[start_content + 6:end_content])
+            url_item = url_item.replace('&amp;', '&')
+
+            start_line_2 = s.find('class="dtviD"')
+            s = s.replace('&amp;', '&')
+            start_content_2 = s.find(':', start_line_2 + 1)
+            end_content_2 = s.find('&usg=', start_content_2 + 1)
+            url_item_name = str(s[start_content_2 + 1:end_content_2])
+
+            chars = url_item_name.find(',g_1:')
+            chars_end = url_item_name.find(":", chars + 6)
+            if chars_end == -1:
+                updated_item_name = (url_item_name[chars + 5:]).replace("+", " ")
+            else:
+                updated_item_name = (url_item_name[chars + 5:chars_end]).replace("+", " ")
+
+            return url_item, updated_item_name, end_content
+
+    # Getting all links with the help of '_images_get_next_image'
+    def get_all_tabs(self, page):
+        tabs = {}
+        while True:
+            item, item_name, end_content = self.get_next_tab(page)
+            if item == "no_tabs":
+                break
+            else:
+                if len(item_name) > 100 or item_name == "background-color":
+                    break
+                else:
+                    tabs[item_name] = item  # Append all the links in the list named 'Links'
+                    time.sleep(0.1)  # Timer could be used to slow down the request for image downloads
+                    page = page[end_content:]
+        return tabs
+
+    # Format the object in readable format
+    def format_object(self, object):
+        data = object[1]
+        main = data[3]
+        info = data[9]
+        if info is None:
+            info = data[11]
+        formatted_object = {}
+        try:
+            formatted_object['image_height'] = main[2]
+            formatted_object['image_width'] = main[1]
+            formatted_object['image_link'] = main[0]
+            formatted_object['image_format'] = main[0][-1 * (len(main[0]) - main[0].rfind(".") - 1):]
+            formatted_object['image_description'] = info['2003'][3]
+            formatted_object['image_host'] = info['183836587'][0]
+            formatted_object['image_source'] = info['2003'][2]
+            formatted_object['image_thumbnail_url'] = data[2][0]
+        except Exception as e:
+            print(e)
+            return None
+        return formatted_object
+
+    # function to download single image
+    def single_image(self, image_url):
+        main_directory = "downloads"
+        extensions = (".jpg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico")
+        url = image_url
+        try:
+            os.makedirs(main_directory)
+        except OSError as e:
+            if e.errno != 17:
+                raise
+            pass
+        req = Request(url, headers={
+            "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+
+        response = urlopen(req, None, 10)
+        data = response.read()
+        response.close()
+
+        image_name = str(url[(url.rfind('/')) + 1:])
+        if '?' in image_name:
+            image_name = image_name[:image_name.find('?')]
+        # if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name:
+        if any(map(lambda extension: extension in image_name, extensions)):
+            file_name = main_directory + "/" + image_name
+        else:
+            file_name = main_directory + "/" + image_name + ".jpg"
+            image_name = image_name + ".jpg"
+
+        try:
+            output_file = open(file_name, 'wb')
+            output_file.write(data)
+            output_file.close()
+        except IOError as e:
+            raise e
+        except OSError as e:
+            raise e
+        print("completed ====> " + image_name.encode('raw_unicode_escape').decode('utf-8'))
+        return
+
+    def similar_images(self, similar_images):
+        version = (3, 0)
+        cur_version = sys.version_info
+        if cur_version >= version:  # If the Current Version of Python is 3.0 or above
+            try:
+                searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
+                headers = {}
+                headers[
+                    'User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+
+                req1 = urllib.request.Request(searchUrl, headers=headers)
+                resp1 = urllib.request.urlopen(req1)
+                content = str(resp1.read())
+                l1 = content.find('AMhZZ')
+                l2 = content.find('&', l1)
+                urll = content[l1:l2]
+
+                newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
+                req2 = urllib.request.Request(newurl, headers=headers)
+                resp2 = urllib.request.urlopen(req2)
+                l3 = content.find('/search?sa=X&amp;q=')
+                l4 = content.find(';', l3 + 19)
+                urll2 = content[l3 + 19:l4]
+                return urll2
+            except:
+                return "Cloud not connect to Google Images endpoint"
+        else:  # If the Current Version of Python is 2.x
+            try:
+                searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
+                headers = {}
+                headers[
+                    'User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
+
+                req1 = urllib2.Request(searchUrl, headers=headers)
+                resp1 = urllib2.urlopen(req1)
+                content = str(resp1.read())
+                l1 = content.find('AMhZZ')
+                l2 = content.find('&', l1)
+                urll = content[l1:l2]
+
+                newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
+                req2 = urllib2.Request(newurl, headers=headers)
+                resp2 = urllib2.urlopen(req2)
+                l3 = content.find('/search?sa=X&amp;q=')
+                l4 = content.find(';', l3 + 19)
+                urll2 = content[l3 + 19:l4]
+                return (urll2)
+            except:
+                return "Cloud not connect to Google Images endpoint"
+
+    # Building URL parameters
+    def build_url_parameters(self, arguments):
+        if arguments['language']:
+            lang = "&lr="
+            lang_param = {"Arabic": "lang_ar", "Chinese (Simplified)": "lang_zh-CN",
+                          "Chinese (Traditional)": "lang_zh-TW", "Czech": "lang_cs", "Danish": "lang_da",
+                          "Dutch": "lang_nl", "English": "lang_en", "Estonian": "lang_et", "Finnish": "lang_fi",
+                          "French": "lang_fr", "German": "lang_de", "Greek": "lang_el", "Hebrew": "lang_iw ",
+                          "Hungarian": "lang_hu", "Icelandic": "lang_is", "Italian": "lang_it", "Japanese": "lang_ja",
+                          "Korean": "lang_ko", "Latvian": "lang_lv", "Lithuanian": "lang_lt", "Norwegian": "lang_no",
+                          "Portuguese": "lang_pt", "Polish": "lang_pl", "Romanian": "lang_ro", "Russian": "lang_ru",
+                          "Spanish": "lang_es", "Swedish": "lang_sv", "Turkish": "lang_tr"}
+            lang_url = lang + lang_param[arguments['language']]
+        else:
+            lang_url = ''
+
+        if arguments['time_range']:
+            json_acceptable_string = arguments['time_range'].replace("'", "\"")
+            d = json.loads(json_acceptable_string)
+            time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_max']
+        else:
+            time_range = ''
+
+        if arguments['exact_size']:
+            size_array = [x.strip() for x in arguments['exact_size'].split(',')]
+            exact_size = ",isz:ex,iszw:" + str(size_array[0]) + ",iszh:" + str(size_array[1])
+        else:
+            exact_size = ''
+
+        built_url = "&tbs="
+        counter = 0
+        params = {'color': [arguments['color'], {'red': 'ic:specific,isc:red', 'orange': 'ic:specific,isc:orange',
+                                                 'yellow': 'ic:specific,isc:yellow', 'green': 'ic:specific,isc:green',
+                                                 'teal': 'ic:specific,isc:teel', 'blue': 'ic:specific,isc:blue',
+                                                 'purple': 'ic:specific,isc:purple', 'pink': 'ic:specific,isc:pink',
+                                                 'white': 'ic:specific,isc:white', 'gray': 'ic:specific,isc:gray',
+                                                 'black': 'ic:specific,isc:black', 'brown': 'ic:specific,isc:brown'}],
+                  'color_type': [arguments['color_type'],
+                                 {'full-color': 'ic:color', 'black-and-white': 'ic:gray', 'transparent': 'ic:trans'}],
+                  'usage_rights': [arguments['usage_rights'],
+                                   {'labeled-for-reuse-with-modifications': 'sur:fmc', 'labeled-for-reuse': 'sur:fc',
+                                    'labeled-for-noncommercial-reuse-with-modification': 'sur:fm',
+                                    'labeled-for-nocommercial-reuse': 'sur:f'}],
+                  'size': [arguments['size'],
+                           {'large': 'isz:l', 'medium': 'isz:m', 'icon': 'isz:i', '>400*300': 'isz:lt,islt:qsvga',
+                            '>640*480': 'isz:lt,islt:vga', '>800*600': 'isz:lt,islt:svga',
+                            '>1024*768': 'visz:lt,islt:xga', '>2MP': 'isz:lt,islt:2mp', '>4MP': 'isz:lt,islt:4mp',
+                            '>6MP': 'isz:lt,islt:6mp', '>8MP': 'isz:lt,islt:8mp', '>10MP': 'isz:lt,islt:10mp',
+                            '>12MP': 'isz:lt,islt:12mp', '>15MP': 'isz:lt,islt:15mp', '>20MP': 'isz:lt,islt:20mp',
+                            '>40MP': 'isz:lt,islt:40mp', '>70MP': 'isz:lt,islt:70mp'}],
+                  'type': [arguments['type'], {'face': 'itp:face', 'photo': 'itp:photo', 'clipart': 'itp:clipart',
+                                               'line-drawing': 'itp:lineart', 'animated': 'itp:animated'}],
+                  'time': [arguments['time'], {'past-24-hours': 'qdr:d', 'past-7-days': 'qdr:w', 'past-month': 'qdr:m',
+                                               'past-year': 'qdr:y'}],
+                  'aspect_ratio': [arguments['aspect_ratio'],
+                                   {'tall': 'iar:t', 'square': 'iar:s', 'wide': 'iar:w', 'panoramic': 'iar:xw'}],
+                  'format': [arguments['format'],
+                             {'jpg': 'ift:jpg', 'gif': 'ift:gif', 'png': 'ift:png', 'bmp': 'ift:bmp', 'svg': 'ift:svg',
+                              'webp': 'webp', 'ico': 'ift:ico', 'raw': 'ift:craw'}]}
+        for key, value in params.items():
+            if value[0] is not None:
+                ext_param = value[1][value[0]]
+                # counter will tell if it is first param added or not
+                if counter == 0:
+                    # add it to the built url
+                    built_url = built_url + ext_param
+                    counter += 1
+                else:
+                    built_url = built_url + ',' + ext_param
+                    counter += 1
+        built_url = lang_url + built_url + exact_size + time_range
+        return built_url
+
+    # building main search URL
+    def build_search_url(self, search_term, params, url, similar_images, specific_site, safe_search):
+        # check safe_search
+        safe_search_string = "&safe=active"
+        # check the args and choose the URL
+        if url:
+            url = url
+        elif similar_images:
+            print(similar_images)
+            keywordem = self.similar_images(similar_images)
+            url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+        elif specific_site:
+            url = 'https://www.google.com/search?q=' + quote(
+                search_term.encode(
+                    'utf-8')) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+        else:
+            url = 'https://www.google.com/search?q=' + quote(
+                search_term.encode(
+                    'utf-8')) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+
+        # safe search check
+        if safe_search:
+            url = url + safe_search_string
+
+        return url
+
+    # measures the file size
+    def file_size(self, file_path):
+        if os.path.isfile(file_path):
+            file_info = os.stat(file_path)
+            size = file_info.st_size
+            for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
+                if size < 1024.0:
+                    return "%3.1f %s" % (size, x)
+                size /= 1024.0
+            return size
+
+    # keywords from file
+    def keywords_from_file(self, file_name):
+        search_keyword = []
+        with codecs.open(file_name, 'r', encoding='utf-8-sig') as f:
+            if '.csv' in file_name:
+                for line in f:
+                    if line in ['\n', '\r\n']:
+                        pass
+                    else:
+                        search_keyword.append(line.replace('\n', '').replace('\r', ''))
+            elif '.txt' in file_name:
+                for line in f:
+                    if line in ['\n', '\r\n']:
+                        pass
+                    else:
+                        search_keyword.append(line.replace('\n', '').replace('\r', ''))
+            else:
+                print("Invalid file type: Valid file types are either .txt or .csv \n"
+                      "exiting...")
+                sys.exit()
+        return search_keyword
+
+    # make directories
+    def create_directories(self, main_directory, dir_name, thumbnail, thumbnail_only):
+        dir_name_thumbnail = dir_name + " - thumbnail"
+        # make a search keyword  directory
+        try:
+            if not os.path.exists(main_directory):
+                os.makedirs(main_directory)
+                time.sleep(0.15)
+                path = (dir_name)
+                sub_directory = os.path.join(main_directory, path)
+                if not os.path.exists(sub_directory):
+                    os.makedirs(sub_directory)
+                if thumbnail or thumbnail_only:
+                    sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
+                    if not os.path.exists(sub_directory_thumbnail):
+                        os.makedirs(sub_directory_thumbnail)
+            else:
+                path = (dir_name)
+                sub_directory = os.path.join(main_directory, path)
+                if not os.path.exists(sub_directory):
+                    os.makedirs(sub_directory)
+                if thumbnail or thumbnail_only:
+                    sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
+                    if not os.path.exists(sub_directory_thumbnail):
+                        os.makedirs(sub_directory_thumbnail)
+        except OSError as e:
+            if e.errno != 17:
+                raise
+            pass
+        return
+
+    # Download Image thumbnails
+    def download_image_thumbnail(self, image_url, main_directory, dir_name, return_image_name, print_urls,
+                                 socket_timeout, print_size, no_download, save_source, img_src, ignore_urls):
+        if print_urls or no_download:
+            print("Image URL: " + image_url)
+        if no_download:
+            return "success", "Printed url without downloading"
+        try:
+            req = Request(image_url, headers={
+                "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+            try:
+                # timeout time to download an image
+                if socket_timeout:
+                    timeout = float(socket_timeout)
+                else:
+                    timeout = 10
+
+                response = urlopen(req, None, timeout)
+                data = response.read()
+                response.close()
+
+                path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name
+
+                try:
+                    output_file = open(path, 'wb')
+                    output_file.write(data)
+                    output_file.close()
+                    if save_source:
+                        list_path = main_directory + "/" + save_source + ".txt"
+                        list_file = open(list_path, 'a')
+                        list_file.write(path + '\t' + img_src + '\n')
+                        list_file.close()
+                except OSError as e:
+                    download_status = 'fail'
+                    download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
+                except IOError as e:
+                    download_status = 'fail'
+                    download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+
+                download_status = 'success'
+                download_message = "Completed Image Thumbnail ====> " + return_image_name
+
+                # image size parameter
+                if print_size:
+                    print("Image Size: " + str(self.file_size(path)))
+
+            except UnicodeEncodeError as e:
+                download_status = 'fail'
+                download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
+
+        except HTTPError as e:  # If there is any HTTPError
+            download_status = 'fail'
+            download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
+
+        except URLError as e:
+            download_status = 'fail'
+            download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+
+        except ssl.CertificateError as e:
+            download_status = 'fail'
+            download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
+
+        except IOError as e:  # If there is any IOError
+            download_status = 'fail'
+            download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+        return download_status, download_message
+
+    # Download Images
+    def download_image(self, image_url, image_format, main_directory, dir_name, count, print_urls, socket_timeout,
+                       prefix, print_size, no_numbering, no_download, save_source, img_src, silent_mode, thumbnail_only,
+                       format, ignore_urls):
+        if not silent_mode:
+            if print_urls or no_download:
+                print("Image URL: " + image_url)
+        if ignore_urls:
+            if any(url in image_url for url in ignore_urls.split(',')):
+                return "fail", "Image ignored due to 'ignore url' parameter", None, image_url
+        if thumbnail_only:
+            return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url
+        if no_download:
+            return "success", "Printed url without downloading", None, image_url
+        try:
+            req = Request(image_url, headers={
+                "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+            try:
+                # timeout time to download an image
+                if socket_timeout:
+                    timeout = float(socket_timeout)
+                else:
+                    timeout = 10
+
+                response = urlopen(req, None, timeout)
+                data = response.read()
+                info = response.info()
+                response.close()
+
+                qmark = image_url.rfind('?')
+                if qmark == -1:
+                    qmark = len(image_url)
+                slash = image_url.rfind('/', 0, qmark) + 1
+                image_name = str(image_url[slash:qmark]).lower()
+
+                type = info.get_content_type()
+                if type == "image/jpeg" or type == "image/jpg":
+                    if not image_name.endswith(".jpg") and not image_name.endswith(".jpeg"):
+                        image_name += ".jpg"
+                elif type == "image/png":
+                    if not image_name.endswith(".png"):
+                        image_name += ".png"
+                elif type == "image/webp":
+                    if not image_name.endswith(".webp"):
+                        image_name += ".webp"
+                elif type == "image/gif":
+                    if not image_name.endswith(".gif"):
+                        image_name += ".gif"
+                elif type == "image/bmp" or type == "image/x-windows-bmp":
+                    if not image_name.endswith(".bmp"):
+                        image_name += ".bmp"
+                elif type == "image/x-icon" or type == "image/vnd.microsoft.icon":
+                    if not image_name.endswith(".ico"):
+                        image_name += ".ico"
+                elif type == "image/svg+xml":
+                    if not image_name.endswith(".svg"):
+                        image_name += ".svg"
+                else:
+                    download_status = 'fail'
+                    download_message = "Invalid image format '" + type + "'. Skipping..."
+                    return_image_name = ''
+                    absolute_path = ''
+                    return download_status, download_message, return_image_name, absolute_path
+
+                # prefix name in image
+                if prefix:
+                    prefix = prefix + " "
+                else:
+                    prefix = ''
+
+                if no_numbering:
+                    path = main_directory + "/" + dir_name + "/" + prefix + image_name
+                else:
+                    path = main_directory + "/" + dir_name + "/" + prefix + str(count) + "." + image_name
+
+                try:
+                    output_file = open(path, 'wb')
+                    output_file.write(data)
+                    output_file.close()
+                    if save_source:
+                        list_path = main_directory + "/" + save_source + ".txt"
+                        list_file = open(list_path, 'a')
+                        list_file.write(path + '\t' + img_src + '\n')
+                        list_file.close()
+                    absolute_path = os.path.abspath(path)
+                except OSError as e:
+                    download_status = 'fail'
+                    download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
+                    return_image_name = ''
+                    absolute_path = ''
+
+                # return image name back to calling method to use it for thumbnail downloads
+                download_status = 'success'
+                download_message = "Completed Image ====> " + prefix + str(count) + "." + image_name
+                return_image_name = prefix + str(count) + "." + image_name
+
+                # image size parameter
+                if not silent_mode:
+                    if print_size:
+                        print("Image Size: " + str(self.file_size(path)))
+
+            except UnicodeEncodeError as e:
+                download_status = 'fail'
+                download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
+                return_image_name = ''
+                absolute_path = ''
+
+            except URLError as e:
+                download_status = 'fail'
+                download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+                return_image_name = ''
+                absolute_path = ''
+
+            except BadStatusLine as e:
+                download_status = 'fail'
+                download_message = "BadStatusLine on an image...trying next one..." + " Error: " + str(e)
+                return_image_name = ''
+                absolute_path = ''
+
+        except HTTPError as e:  # If there is any HTTPError
+            download_status = 'fail'
+            download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except URLError as e:
+            download_status = 'fail'
+            download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except ssl.CertificateError as e:
+            download_status = 'fail'
+            download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except IOError as e:  # If there is any IOError
+            download_status = 'fail'
+            download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except IncompleteRead as e:
+            download_status = 'fail'
+            download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        return download_status, download_message, return_image_name, absolute_path
+
+    def _get_all_items(self, image_objects, main_directory, dir_name, limit, arguments):
+        items = []
+        abs_path = []
+        errorCount = 0
+        i = 0
+        count = 1
+        while count < limit + 1 and i < len(image_objects):
+            if len(image_objects) == 0:
+                print("no_links")
+                break
+            #code added here to attempt to implement offset correctly
+            #was "count < int(arguments['offset'])" in hardikvasa code, this seems
+            # to be contrary to the implementation details. 
+            elif arguments['offset'] and count <= int(arguments['offset']):
+                    count += 1
+                    #page = page[end_content:]
+            else:
+                # format the item for readability
+                object = self.format_object(image_objects[i])
+                if arguments['metadata']:
+                    if not arguments["silent_mode"]:
+                        print("\nImage Metadata: " + str(object))
+
+                # download the images
+                download_status, download_message, return_image_name, absolute_path = self.download_image(
+                    object['image_link'], object['image_format'], main_directory, dir_name, count,
+                    arguments['print_urls'], arguments['socket_timeout'], arguments['prefix'], arguments['print_size'],
+                    arguments['no_numbering'], arguments['no_download'], arguments['save_source'],
+                    object['image_source'], arguments["silent_mode"], arguments["thumbnail_only"], arguments['format'],
+                    arguments['ignore_urls'])
+                if not arguments["silent_mode"]:
+                    print(download_message)
+                if download_status == "success":
+
+                    # download image_thumbnails
+                    if arguments['thumbnail'] or arguments["thumbnail_only"]:
+                        download_status, download_message_thumbnail = self.download_image_thumbnail(
+                            object['image_thumbnail_url'], main_directory, dir_name, return_image_name,
+                            arguments['print_urls'], arguments['socket_timeout'], arguments['print_size'],
+                            arguments['no_download'], arguments['save_source'], object['image_source'],
+                            arguments['ignore_urls'])
+                        if not arguments["silent_mode"]:
+                            print(download_message_thumbnail)
+
+                    count += 1
+                    object['image_filename'] = return_image_name
+                    items.append(object)  # Append all the links in the list named 'Links'
+                    abs_path.append(absolute_path)
+                else:
+                    errorCount += 1
+
+                # delay param
+                if arguments['delay']:
+                    time.sleep(int(arguments['delay']))
+            i += 1
+        if count < limit:
+            print("\n\nUnfortunately all " + str(
+                limit) + " could not be downloaded because some images were not downloadable. " + str(
+                count - 1) + " is all we got for this search filter!")
+        return items, errorCount, abs_path
+
+    # Bulk Download
+    def download(self, arguments):
+        paths_agg = {}
+        # for input coming from other python files
+        if __name__ != "__main__":
+            # if the calling file contains config_file param
+            if 'config_file' in arguments:
+                records = []
+                json_file = json.load(open(arguments['config_file']))
+                for record in range(0, len(json_file['Records'])):
+                    arguments = {}
+                    for i in args_list:
+                        arguments[i] = None
+                    for key, value in json_file['Records'][record].items():
+                        arguments[key] = value
+                    records.append(arguments)
+                total_errors = 0
+                for rec in records:
+                    paths, errors = self.download_executor(rec)
+                    for i in paths:
+                        paths_agg[i] = paths[i]
+                    if not arguments["silent_mode"]:
+                        if arguments['print_paths']:
+                            print(paths.encode('raw_unicode_escape').decode('utf-8'))
+                    total_errors = total_errors + errors
+                return paths_agg, total_errors
+            # if the calling file contains params directly
+            else:
+                paths, errors = self.download_executor(arguments)
+                for i in paths:
+                    paths_agg[i] = paths[i]
+                if not arguments["silent_mode"]:
+                    if arguments['print_paths']:
+                        print(paths.encode('raw_unicode_escape').decode('utf-8'))
+                return paths_agg, errors
+        # for input coming from CLI
+        else:
+            paths, errors = self.download_executor(arguments)
+            for i in paths:
+                paths_agg[i] = paths[i]
+            if not arguments["silent_mode"]:
+                if arguments['print_paths']:
+                    print(paths.encode('raw_unicode_escape').decode('utf-8'))
+        return paths_agg, errors
+
+    def download_executor(self, arguments):
+        paths = {}
+        errorCount = None
+        for arg in args_list:
+            if arg not in arguments:
+                arguments[arg] = None
+        ######Initialization and Validation of user arguments
+        if arguments['keywords']:
+            search_keyword = [str(item) for item in arguments['keywords'].split(',')]
+
+        if arguments['keywords_from_file']:
+            search_keyword = self.keywords_from_file(arguments['keywords_from_file'])
+
+        # both time and time range should not be allowed in the same query
+        if arguments['time'] and arguments['time_range']:
+            raise ValueError(
+                'Either time or time range should be used in a query. Both cannot be used at the same time.')
+
+        # both time and time range should not be allowed in the same query
+        if arguments['size'] and arguments['exact_size']:
+            raise ValueError(
+                'Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.')
+
+        # both image directory and no image directory should not be allowed in the same query
+        if arguments['image_directory'] and arguments['no_directory']:
+            raise ValueError('You can either specify image directory or specify no image directory, not both!')
+
+        # Additional words added to keywords
+        if arguments['suffix_keywords']:
+            suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')]
+        else:
+            suffix_keywords = ['']
+
+        # Additional words added to keywords
+        if arguments['prefix_keywords']:
+            prefix_keywords = [str(sk) + " " for sk in arguments['prefix_keywords'].split(',')]
+        else:
+            prefix_keywords = ['']
+
+        # Setting limit on number of images to be downloaded
+        if arguments['limit']:
+            limit = int(arguments['limit'])
+        else:
+            limit = 100
+
+        if arguments['url']:
+            current_time = str(datetime.datetime.now()).split('.')[0]
+            search_keyword = [current_time.replace(":", "_")]
+
+        if arguments['similar_images']:
+            current_time = str(datetime.datetime.now()).split('.')[0]
+            search_keyword = [current_time.replace(":", "_")]
+
+        # If single_image or url argument not present then keywords is mandatory argument
+        if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and \
+                arguments['keywords'] is None and arguments['keywords_from_file'] is None:
+            print('-------------------------------\n'
+                  'Uh oh! Keywords is a required argument \n\n'
+                  'Please refer to the documentation on guide to writing queries \n'
+                  'https://github.com/hardikvasa/google-images-download#examples'
+                  '\n\nexiting!\n'
+                  '-------------------------------')
+            sys.exit()
+
+        # If this argument is present, set the custom output directory
+        if arguments['output_directory']:
+            main_directory = arguments['output_directory']
+        else:
+            main_directory = "downloads"
+
+        # Proxy settings
+        if arguments['proxy']:
+            os.environ["http_proxy"] = arguments['proxy']
+            os.environ["https_proxy"] = arguments['proxy']
+            ######Initialization Complete
+        total_errors = 0
+        for pky in prefix_keywords:  # 1.for every prefix keywords
+            for sky in suffix_keywords:  # 2.for every suffix keywords
+                i = 0
+                while i < len(search_keyword):  # 3.for every main keyword
+                    iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (
+                    search_keyword[i]) + (sky)
+                    if not arguments["silent_mode"]:
+                        print(iteration.encode('raw_unicode_escape').decode('utf-8'))
+                        print("Evaluating...")
+                    else:
+                        print("Downloading images for: " + (pky) + (search_keyword[i]) + (sky) + " ...")
+                    search_term = pky + search_keyword[i] + sky
+
+                    if arguments['image_directory']:
+                        dir_name = arguments['image_directory']
+                    elif arguments['no_directory']:
+                        dir_name = ''
+                    else:
+                        dir_name = search_term + (
+                            '-' + arguments['color'] if arguments['color'] else '')  # sub-directory
+
+                    if not arguments["no_download"]:
+                        self.create_directories(main_directory, dir_name, arguments['thumbnail'],
+                                                arguments['thumbnail_only'])  # create directories in OS
+
+                    params = self.build_url_parameters(arguments)  # building URL with params
+
+                    url = self.build_search_url(search_term, params, arguments['url'], arguments['similar_images'],
+                                                arguments['specific_site'],
+                                                arguments['safe_search'])  # building main search url
+
+                    if limit < 101:
+                        images, tabs = self.download_page(url)  # download page
+                    else:
+                        images, tabs = self.download_extended_page(url, arguments['chromedriver'])
+
+                    if not arguments["silent_mode"]:
+                        if arguments['no_download']:
+                            print("Getting URLs without downloading images...")
+                        else:
+                            print("Starting Download...")
+                    items, errorCount, abs_path = self._get_all_items(images, main_directory, dir_name, limit,
+                                                                      arguments)  # get all image items and download images
+                    paths[pky + search_keyword[i] + sky] = abs_path
+
+                    # dumps into a json file
+                    if arguments['extract_metadata']:
+                        try:
+                            if not os.path.exists("logs"):
+                                os.makedirs("logs")
+                        except OSError as e:
+                            print(e)
+                        json_file = open("logs/" + search_keyword[i] + ".json", "w")
+                        json.dump(items, json_file, indent=4, sort_keys=True)
+                        json_file.close()
+
+                    # Related images
+                    if arguments['related_images']:
+                        print("\nGetting list of related keywords...this may take a few moments")
+                        for key, value in tabs.items():
+                            final_search_term = (search_term + " - " + key)
+                            print("\nNow Downloading - " + final_search_term)
+                            if limit < 101:
+                                images, _ = self.download_page(value)  # download page
+                            else:
+                                images, _ = self.download_extended_page(value, arguments['chromedriver'])
+                            self.create_directories(main_directory, final_search_term, arguments['thumbnail'],
+                                                    arguments['thumbnail_only'])
+                            self._get_all_items(images, main_directory, search_term + " - " + key, limit, arguments)
+
+                    i += 1
+                    total_errors = total_errors + errorCount
+                    if not arguments["silent_mode"]:
+                        print("\nErrors: " + str(errorCount) + "\n")
+        return paths, total_errors
+
+
+# ------------- Main Program -------------#
+def main():
+    records = user_input()
+    total_errors = 0
+    t0 = time.time()  # start the timer
+    for arguments in records:
+
+        if arguments['single_image']:  # Download Single Image using a URL
+            response = googleimagesdownload()
+            response.single_image(arguments['single_image'])
+        else:  # or download multiple images based on keywords/keyphrase search
+            response = googleimagesdownload()
+            paths, errors = response.download(arguments)  # wrapping response in a variable just for consistency
+            total_errors = total_errors + errors
+
+        t1 = time.time()  # stop the timer
+        total_time = t1 - t0  # Calculating the total time required to crawl, find and download all the links of 60,000 images
+        if not arguments["silent_mode"]:
+            print("\nEverything downloaded!")
+            print("Total errors: " + str(total_errors))
+            print("Total time taken: " + str(total_time) + " Seconds")
+
+
+if __name__ == "__main__":
+    main()

From c17c55d66a49b804bd3b4510efcae8a23233bf0e Mon Sep 17 00:00:00 2001
From: explosion1206 <40578236+estuhr1206@users.noreply.github.com>
Date: Tue, 25 May 2021 02:29:51 -0400
Subject: [PATCH 16/31] Delete google_images_download.py

just added to wrong directory by accident
---
 google_images_download.py | 1152 -------------------------------------
 1 file changed, 1152 deletions(-)
 delete mode 100644 google_images_download.py

diff --git a/google_images_download.py b/google_images_download.py
deleted file mode 100644
index 51bb251b..00000000
--- a/google_images_download.py
+++ /dev/null
@@ -1,1152 +0,0 @@
-#!/usr/bin/env python
-# In[ ]:
-#  coding: utf-8
-
-###### Searching and Downloading Google Images to the local disk ######
-
-# Import Libraries
-import sys
-
-version = (3, 0)
-cur_version = sys.version_info
-if cur_version >= version:  # If the Current Version of Python is 3.0 or above
-    import urllib.request
-    from urllib.request import Request, urlopen
-    from urllib.request import URLError, HTTPError
-    from urllib.parse import quote
-    import http.client
-    from http.client import IncompleteRead, BadStatusLine
-
-    http.client._MAXHEADERS = 1000
-else:  # If the Current Version of Python is 2.x
-    import urllib2
-    from urllib2 import Request, urlopen
-    from urllib2 import URLError, HTTPError
-    from urllib import quote
-    import httplib
-    from httplib import IncompleteRead, BadStatusLine
-
-    httplib._MAXHEADERS = 1000
-import time  # Importing the time library to check the time of code execution
-import os
-import argparse
-import ssl
-import datetime
-import json
-import re
-import codecs
-import socket
-
-args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords",
-             "limit", "format", "color", "color_type", "usage_rights", "size",
-             "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image",
-             "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
-             "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
-             "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search",
-             "no_numbering",
-             "offset", "no_download", "save_source", "silent_mode", "ignore_urls"]
-
-
-def user_input():
-    config = argparse.ArgumentParser()
-    config.add_argument('-cf', '--config_file', help='config file name', default='', type=str, required=False)
-    config_file_check = config.parse_known_args()
-    object_check = vars(config_file_check[0])
-
-    if object_check['config_file'] != '':
-        records = []
-        json_file = json.load(open(config_file_check[0].config_file))
-        for record in range(0, len(json_file['Records'])):
-            arguments = {}
-            for i in args_list:
-                arguments[i] = None
-            for key, value in json_file['Records'][record].items():
-                arguments[key] = value
-            records.append(arguments)
-        records_count = len(records)
-    else:
-        # Taking command line arguments from users
-        parser = argparse.ArgumentParser()
-        parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False)
-        parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str,
-                            required=False)
-        parser.add_argument('-sk', '--suffix_keywords',
-                            help='comma separated additional words added after to main keyword', type=str,
-                            required=False)
-        parser.add_argument('-pk', '--prefix_keywords',
-                            help='comma separated additional words added before main keyword', type=str, required=False)
-        parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False)
-        parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False,
-                            choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico'])
-        parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False)
-        parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str,
-                            required=False)
-        parser.add_argument('-o', '--output_directory', help='download images in a specific main directory', type=str,
-                            required=False)
-        parser.add_argument('-i', '--image_directory', help='download images in a specific sub-directory', type=str,
-                            required=False)
-        parser.add_argument('-n', '--no_directory', default=False,
-                            help='download images in the main directory but no sub-directory', action="store_true")
-        parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int,
-                            required=False)
-        parser.add_argument('-co', '--color', help='filter on color', type=str, required=False,
-                            choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white',
-                                     'gray', 'black', 'brown'])
-        parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False,
-                            choices=['full-color', 'black-and-white', 'transparent'])
-        parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False,
-                            choices=['labeled-for-reuse-with-modifications', 'labeled-for-reuse',
-                                     'labeled-for-noncommercial-reuse-with-modification',
-                                     'labeled-for-nocommercial-reuse'])
-        parser.add_argument('-s', '--size', help='image size', type=str, required=False,
-                            choices=['large', 'medium', 'icon', '>400*300', '>640*480', '>800*600', '>1024*768', '>2MP',
-                                     '>4MP', '>6MP', '>8MP', '>10MP', '>12MP', '>15MP', '>20MP', '>40MP', '>70MP'])
-        parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str,
-                            required=False)
-        parser.add_argument('-t', '--type', help='image type', type=str, required=False,
-                            choices=['face', 'photo', 'clipart', 'line-drawing', 'animated'])
-        parser.add_argument('-w', '--time', help='image age', type=str, required=False,
-                            choices=['past-24-hours', 'past-7-days', 'past-month', 'past-year'])
-        parser.add_argument('-wr', '--time_range',
-                            help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}',
-                            type=str, required=False)
-        parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str,
-                            required=False,
-                            choices=['tall', 'square', 'wide', 'panoramic'])
-        parser.add_argument('-si', '--similar_images',
-                            help='downloads images very similar to the image URL you provide', type=str, required=False)
-        parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website',
-                            type=str, required=False)
-        parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images",
-                            action="store_true")
-        parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk",
-                            action="store_true")
-        parser.add_argument('-pp', '--print_paths', default=False,
-                            help="Prints the list of absolute paths of the images", action="store_true")
-        parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image",
-                            action="store_true")
-        parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file",
-                            action="store_true")
-        parser.add_argument('-st', '--socket_timeout', default=False,
-                            help="Connection timeout waiting for the image to download", type=float)
-        parser.add_argument('-th', '--thumbnail', default=False,
-                            help="Downloads image thumbnail along with the actual image", action="store_true")
-        parser.add_argument('-tho', '--thumbnail_only', default=False,
-                            help="Downloads only thumbnail without downloading actual images", action="store_true")
-        parser.add_argument('-la', '--language', default=False,
-                            help="Defines the language filter. The search results are authomatically returned in that language",
-                            type=str, required=False,
-                            choices=['Arabic', 'Chinese (Simplified)', 'Chinese (Traditional)', 'Czech', 'Danish',
-                                     'Dutch', 'English', 'Estonian', 'Finnish', 'French', 'German', 'Greek', 'Hebrew',
-                                     'Hungarian', 'Icelandic', 'Italian', 'Japanese', 'Korean', 'Latvian', 'Lithuanian',
-                                     'Norwegian', 'Portuguese', 'Polish', 'Romanian', 'Russian', 'Spanish', 'Swedish',
-                                     'Turkish'])
-        parser.add_argument('-pr', '--prefix', default=False,
-                            help="A word that you would want to prefix in front of each image name", type=str,
-                            required=False)
-        parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False)
-        parser.add_argument('-cd', '--chromedriver',
-                            help='specify the path to chromedriver executable in your local machine', type=str,
-                            required=False)
-        parser.add_argument('-ri', '--related_images', default=False,
-                            help="Downloads images that are similar to the keyword provided", action="store_true")
-        parser.add_argument('-sa', '--safe_search', default=False,
-                            help="Turns on the safe search filter while searching for images", action="store_true")
-        parser.add_argument('-nn', '--no_numbering', default=False,
-                            help="Allows you to exclude the default numbering of images", action="store_true")
-        parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False)
-        parser.add_argument('-nd', '--no_download', default=False,
-                            help="Prints the URLs of the images and/or thumbnails without downloading them",
-                            action="store_true")
-        parser.add_argument('-iu', '--ignore_urls', default=False,
-                            help="delimited list input of image urls/keywords to ignore", type=str)
-        parser.add_argument('-sil', '--silent_mode', default=False,
-                            help="Remains silent. Does not print notification messages on the terminal",
-                            action="store_true")
-        parser.add_argument('-is', '--save_source',
-                            help="creates a text file containing a list of downloaded images along with source page url",
-                            type=str, required=False)
-
-        args = parser.parse_args()
-        arguments = vars(args)
-        records = []
-        records.append(arguments)
-    return records
-
-
-class googleimagesdownload:
-    def __init__(self):
-        pass
-
-    def _extract_data_pack(self, page):
-        start_line = page.find("AF_initDataCallback({key: \\'ds:1\\'") - 10
-        start_object = page.find('[', start_line + 1)
-        end_object = page.rfind(']',0,page.find('</script>', start_object + 1))+1
-        object_raw = str(page[start_object:end_object])
-        return bytes(object_raw, "utf-8").decode("unicode_escape")
-
-    def _extract_data_pack_extended(self, page):
-        start_line = page.find("AF_initDataCallback({key: 'ds:1'") - 10
-        start_object = page.find('[', start_line + 1)
-        end_object = page.rfind(']',0,page.find('</script>', start_object + 1)) + 1
-        return str(page[start_object:end_object])
-
-    def _extract_data_pack_ajax(self, data):
-        lines = data.split('\n')
-        return json.loads(lines[3] + lines[4])[0][2]
-
-    def _image_objects_from_pack(self, data):
-        image_objects = json.loads(data)[31][0][12][2]
-        image_objects = [x for x in image_objects if x[0] == 1]
-        return image_objects
-
-    # Downloading entire Web Document (Raw Page Content)
-    def download_page(self, url):
-        version = (3, 0)
-        cur_version = sys.version_info
-        headers = {}
-        headers[
-            'User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36"
-        if cur_version >= version:  # If the Current Version of Python is 3.0 or above
-            try:
-                req = urllib.request.Request(url, headers=headers)
-                resp = urllib.request.urlopen(req)
-                respData = str(resp.read())
-            except:
-                print("Could not open URL. Please check your internet connection and/or ssl settings \n"
-                      "If you are using proxy, make sure your proxy settings is configured correctly")
-                sys.exit()
-        else:  # If the Current Version of Python is 2.x
-            try:
-                req = urllib2.Request(url, headers=headers)
-                try:
-                    response = urllib2.urlopen(req)
-                except URLError:  # Handling SSL certificate failed
-                    context = ssl._create_unverified_context()
-                    response = urlopen(req, context=context)
-                respData = response.read()
-            except:
-                print("Could not open URL. Please check your internet connection and/or ssl settings \n"
-                      "If you are using proxy, make sure your proxy settings is configured correctly")
-                sys.exit()
-                return "Page Not found"
-        try:
-            return self._image_objects_from_pack(self._extract_data_pack(respData)), self.get_all_tabs(respData)
-        except Exception as e:
-            print(e)
-            print('Image objects data unpacking failed. Please leave a comment with the above error at https://github.com/hardikvasa/google-images-download/pull/298')
-            sys.exit()
-
-    # Download Page for more than 100 images
-    def download_extended_page(self, url, chromedriver):
-        from selenium import webdriver
-        from selenium.webdriver.common.keys import Keys
-        if sys.version_info[0] < 3:
-            reload(sys)
-            sys.setdefaultencoding('utf8')
-        options = webdriver.ChromeOptions()
-        options.add_argument('--no-sandbox')
-        options.add_argument("--headless")
-
-        try:
-            browser = webdriver.Chrome(chromedriver, chrome_options=options)
-        except Exception as e:
-            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
-                  "argument to specify the path to the executable.) or google chrome browser is not "
-                  "installed on your machine (exception: %s)" % e)
-            sys.exit()
-        browser.set_window_size(1024, 768)
-
-        # Open the link
-        browser.get(url)
-        browser.execute_script("""
-            (function(XHR){
-                "use strict";
-                var open = XHR.prototype.open;
-                var send = XHR.prototype.send;
-                var data = [];
-                XHR.prototype.open = function(method, url, async, user, pass) {
-                    this._url = url;
-                    open.call(this, method, url, async, user, pass);
-                }
-                XHR.prototype.send = function(data) {
-                    var self = this;
-                    var url = this._url;
-                    function stateChanged() {
-                        if (self.readyState == 4) {
-                            console.log("data available for: " + url)
-                            XHR.prototype._data.push(self.response);
-                        }
-                    }
-                    if (url.includes("/batchexecute?")) {
-                        this.addEventListener("readystatechange", stateChanged, false);
-                    }
-                    send.call(this, data);
-                };
-                XHR.prototype._data = [];
-            })(XMLHttpRequest);
-        """)
-
-        time.sleep(1)
-        print("Getting you a lot of images. This may take a few moments...")
-
-        element = browser.find_element_by_tag_name("body")
-        # Scroll down
-        for i in range(30):
-            element.send_keys(Keys.PAGE_DOWN)
-            time.sleep(0.3)
-
-        try:
-            browser.find_element_by_id("smb").click()
-            for i in range(50):
-                element.send_keys(Keys.PAGE_DOWN)
-                time.sleep(0.3)  # bot id protection
-        except:
-            for i in range(10):
-                element.send_keys(Keys.PAGE_DOWN)
-                time.sleep(0.3)  # bot id protection
-
-        print("Reached end of Page.")
-        time.sleep(0.5)
-
-        source = browser.page_source  # page source
-        images = self._image_objects_from_pack(self._extract_data_pack_extended(source))
-
-        ajax_data = browser.execute_script("return XMLHttpRequest.prototype._data")
-        for chunk in ajax_data:
-            images += self._image_objects_from_pack(self._extract_data_pack_ajax(chunk))
-
-        # close the browser
-        browser.close()
-
-        return images, self.get_all_tabs(source)
-
-    # Correcting the escape characters for python2
-    def replace_with_byte(self, match):
-        return chr(int(match.group(0)[1:], 8))
-
-    def repair(self, brokenjson):
-        invalid_escape = re.compile(r'\\[0-7]{1,3}')  # up to 3 digits for byte values up to FF
-        return invalid_escape.sub(self.replace_with_byte, brokenjson)
-
-    # Finding 'Next Image' from the given raw page
-    def get_next_tab(self, s):
-        start_line = s.find('class="dtviD"')
-        if start_line == -1:  # If no links are found then give an error!
-            end_quote = 0
-            link = "no_tabs"
-            return link, '', end_quote
-        else:
-            start_line = s.find('class="dtviD"')
-            start_content = s.find('href="', start_line + 1)
-            end_content = s.find('">', start_content + 1)
-            url_item = "https://www.google.com" + str(s[start_content + 6:end_content])
-            url_item = url_item.replace('&amp;', '&')
-
-            start_line_2 = s.find('class="dtviD"')
-            s = s.replace('&amp;', '&')
-            start_content_2 = s.find(':', start_line_2 + 1)
-            end_content_2 = s.find('&usg=', start_content_2 + 1)
-            url_item_name = str(s[start_content_2 + 1:end_content_2])
-
-            chars = url_item_name.find(',g_1:')
-            chars_end = url_item_name.find(":", chars + 6)
-            if chars_end == -1:
-                updated_item_name = (url_item_name[chars + 5:]).replace("+", " ")
-            else:
-                updated_item_name = (url_item_name[chars + 5:chars_end]).replace("+", " ")
-
-            return url_item, updated_item_name, end_content
-
-    # Getting all links with the help of '_images_get_next_image'
-    def get_all_tabs(self, page):
-        tabs = {}
-        while True:
-            item, item_name, end_content = self.get_next_tab(page)
-            if item == "no_tabs":
-                break
-            else:
-                if len(item_name) > 100 or item_name == "background-color":
-                    break
-                else:
-                    tabs[item_name] = item  # Append all the links in the list named 'Links'
-                    time.sleep(0.1)  # Timer could be used to slow down the request for image downloads
-                    page = page[end_content:]
-        return tabs
-
-    # Format the object in readable format
-    def format_object(self, object):
-        data = object[1]
-        main = data[3]
-        info = data[9]
-        if info is None:
-            info = data[11]
-        formatted_object = {}
-        try:
-            formatted_object['image_height'] = main[2]
-            formatted_object['image_width'] = main[1]
-            formatted_object['image_link'] = main[0]
-            formatted_object['image_format'] = main[0][-1 * (len(main[0]) - main[0].rfind(".") - 1):]
-            formatted_object['image_description'] = info['2003'][3]
-            formatted_object['image_host'] = info['183836587'][0]
-            formatted_object['image_source'] = info['2003'][2]
-            formatted_object['image_thumbnail_url'] = data[2][0]
-        except Exception as e:
-            print(e)
-            return None
-        return formatted_object
-
-    # function to download single image
-    def single_image(self, image_url):
-        main_directory = "downloads"
-        extensions = (".jpg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico")
-        url = image_url
-        try:
-            os.makedirs(main_directory)
-        except OSError as e:
-            if e.errno != 17:
-                raise
-            pass
-        req = Request(url, headers={
-            "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
-
-        response = urlopen(req, None, 10)
-        data = response.read()
-        response.close()
-
-        image_name = str(url[(url.rfind('/')) + 1:])
-        if '?' in image_name:
-            image_name = image_name[:image_name.find('?')]
-        # if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name:
-        if any(map(lambda extension: extension in image_name, extensions)):
-            file_name = main_directory + "/" + image_name
-        else:
-            file_name = main_directory + "/" + image_name + ".jpg"
-            image_name = image_name + ".jpg"
-
-        try:
-            output_file = open(file_name, 'wb')
-            output_file.write(data)
-            output_file.close()
-        except IOError as e:
-            raise e
-        except OSError as e:
-            raise e
-        print("completed ====> " + image_name.encode('raw_unicode_escape').decode('utf-8'))
-        return
-
-    def similar_images(self, similar_images):
-        version = (3, 0)
-        cur_version = sys.version_info
-        if cur_version >= version:  # If the Current Version of Python is 3.0 or above
-            try:
-                searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
-                headers = {}
-                headers[
-                    'User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
-
-                req1 = urllib.request.Request(searchUrl, headers=headers)
-                resp1 = urllib.request.urlopen(req1)
-                content = str(resp1.read())
-                l1 = content.find('AMhZZ')
-                l2 = content.find('&', l1)
-                urll = content[l1:l2]
-
-                newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
-                req2 = urllib.request.Request(newurl, headers=headers)
-                resp2 = urllib.request.urlopen(req2)
-                l3 = content.find('/search?sa=X&amp;q=')
-                l4 = content.find(';', l3 + 19)
-                urll2 = content[l3 + 19:l4]
-                return urll2
-            except:
-                return "Cloud not connect to Google Images endpoint"
-        else:  # If the Current Version of Python is 2.x
-            try:
-                searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
-                headers = {}
-                headers[
-                    'User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
-
-                req1 = urllib2.Request(searchUrl, headers=headers)
-                resp1 = urllib2.urlopen(req1)
-                content = str(resp1.read())
-                l1 = content.find('AMhZZ')
-                l2 = content.find('&', l1)
-                urll = content[l1:l2]
-
-                newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
-                req2 = urllib2.Request(newurl, headers=headers)
-                resp2 = urllib2.urlopen(req2)
-                l3 = content.find('/search?sa=X&amp;q=')
-                l4 = content.find(';', l3 + 19)
-                urll2 = content[l3 + 19:l4]
-                return (urll2)
-            except:
-                return "Cloud not connect to Google Images endpoint"
-
-    # Building URL parameters
-    def build_url_parameters(self, arguments):
-        if arguments['language']:
-            lang = "&lr="
-            lang_param = {"Arabic": "lang_ar", "Chinese (Simplified)": "lang_zh-CN",
-                          "Chinese (Traditional)": "lang_zh-TW", "Czech": "lang_cs", "Danish": "lang_da",
-                          "Dutch": "lang_nl", "English": "lang_en", "Estonian": "lang_et", "Finnish": "lang_fi",
-                          "French": "lang_fr", "German": "lang_de", "Greek": "lang_el", "Hebrew": "lang_iw ",
-                          "Hungarian": "lang_hu", "Icelandic": "lang_is", "Italian": "lang_it", "Japanese": "lang_ja",
-                          "Korean": "lang_ko", "Latvian": "lang_lv", "Lithuanian": "lang_lt", "Norwegian": "lang_no",
-                          "Portuguese": "lang_pt", "Polish": "lang_pl", "Romanian": "lang_ro", "Russian": "lang_ru",
-                          "Spanish": "lang_es", "Swedish": "lang_sv", "Turkish": "lang_tr"}
-            lang_url = lang + lang_param[arguments['language']]
-        else:
-            lang_url = ''
-
-        if arguments['time_range']:
-            json_acceptable_string = arguments['time_range'].replace("'", "\"")
-            d = json.loads(json_acceptable_string)
-            time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_max']
-        else:
-            time_range = ''
-
-        if arguments['exact_size']:
-            size_array = [x.strip() for x in arguments['exact_size'].split(',')]
-            exact_size = ",isz:ex,iszw:" + str(size_array[0]) + ",iszh:" + str(size_array[1])
-        else:
-            exact_size = ''
-
-        built_url = "&tbs="
-        counter = 0
-        params = {'color': [arguments['color'], {'red': 'ic:specific,isc:red', 'orange': 'ic:specific,isc:orange',
-                                                 'yellow': 'ic:specific,isc:yellow', 'green': 'ic:specific,isc:green',
-                                                 'teal': 'ic:specific,isc:teel', 'blue': 'ic:specific,isc:blue',
-                                                 'purple': 'ic:specific,isc:purple', 'pink': 'ic:specific,isc:pink',
-                                                 'white': 'ic:specific,isc:white', 'gray': 'ic:specific,isc:gray',
-                                                 'black': 'ic:specific,isc:black', 'brown': 'ic:specific,isc:brown'}],
-                  'color_type': [arguments['color_type'],
-                                 {'full-color': 'ic:color', 'black-and-white': 'ic:gray', 'transparent': 'ic:trans'}],
-                  'usage_rights': [arguments['usage_rights'],
-                                   {'labeled-for-reuse-with-modifications': 'sur:fmc', 'labeled-for-reuse': 'sur:fc',
-                                    'labeled-for-noncommercial-reuse-with-modification': 'sur:fm',
-                                    'labeled-for-nocommercial-reuse': 'sur:f'}],
-                  'size': [arguments['size'],
-                           {'large': 'isz:l', 'medium': 'isz:m', 'icon': 'isz:i', '>400*300': 'isz:lt,islt:qsvga',
-                            '>640*480': 'isz:lt,islt:vga', '>800*600': 'isz:lt,islt:svga',
-                            '>1024*768': 'visz:lt,islt:xga', '>2MP': 'isz:lt,islt:2mp', '>4MP': 'isz:lt,islt:4mp',
-                            '>6MP': 'isz:lt,islt:6mp', '>8MP': 'isz:lt,islt:8mp', '>10MP': 'isz:lt,islt:10mp',
-                            '>12MP': 'isz:lt,islt:12mp', '>15MP': 'isz:lt,islt:15mp', '>20MP': 'isz:lt,islt:20mp',
-                            '>40MP': 'isz:lt,islt:40mp', '>70MP': 'isz:lt,islt:70mp'}],
-                  'type': [arguments['type'], {'face': 'itp:face', 'photo': 'itp:photo', 'clipart': 'itp:clipart',
-                                               'line-drawing': 'itp:lineart', 'animated': 'itp:animated'}],
-                  'time': [arguments['time'], {'past-24-hours': 'qdr:d', 'past-7-days': 'qdr:w', 'past-month': 'qdr:m',
-                                               'past-year': 'qdr:y'}],
-                  'aspect_ratio': [arguments['aspect_ratio'],
-                                   {'tall': 'iar:t', 'square': 'iar:s', 'wide': 'iar:w', 'panoramic': 'iar:xw'}],
-                  'format': [arguments['format'],
-                             {'jpg': 'ift:jpg', 'gif': 'ift:gif', 'png': 'ift:png', 'bmp': 'ift:bmp', 'svg': 'ift:svg',
-                              'webp': 'webp', 'ico': 'ift:ico', 'raw': 'ift:craw'}]}
-        for key, value in params.items():
-            if value[0] is not None:
-                ext_param = value[1][value[0]]
-                # counter will tell if it is first param added or not
-                if counter == 0:
-                    # add it to the built url
-                    built_url = built_url + ext_param
-                    counter += 1
-                else:
-                    built_url = built_url + ',' + ext_param
-                    counter += 1
-        built_url = lang_url + built_url + exact_size + time_range
-        return built_url
-
-    # building main search URL
-    def build_search_url(self, search_term, params, url, similar_images, specific_site, safe_search):
-        # check safe_search
-        safe_search_string = "&safe=active"
-        # check the args and choose the URL
-        if url:
-            url = url
-        elif similar_images:
-            print(similar_images)
-            keywordem = self.similar_images(similar_images)
-            url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
-        elif specific_site:
-            url = 'https://www.google.com/search?q=' + quote(
-                search_term.encode(
-                    'utf-8')) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
-        else:
-            url = 'https://www.google.com/search?q=' + quote(
-                search_term.encode(
-                    'utf-8')) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
-
-        # safe search check
-        if safe_search:
-            url = url + safe_search_string
-
-        return url
-
-    # measures the file size
-    def file_size(self, file_path):
-        if os.path.isfile(file_path):
-            file_info = os.stat(file_path)
-            size = file_info.st_size
-            for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
-                if size < 1024.0:
-                    return "%3.1f %s" % (size, x)
-                size /= 1024.0
-            return size
-
-    # keywords from file
-    def keywords_from_file(self, file_name):
-        search_keyword = []
-        with codecs.open(file_name, 'r', encoding='utf-8-sig') as f:
-            if '.csv' in file_name:
-                for line in f:
-                    if line in ['\n', '\r\n']:
-                        pass
-                    else:
-                        search_keyword.append(line.replace('\n', '').replace('\r', ''))
-            elif '.txt' in file_name:
-                for line in f:
-                    if line in ['\n', '\r\n']:
-                        pass
-                    else:
-                        search_keyword.append(line.replace('\n', '').replace('\r', ''))
-            else:
-                print("Invalid file type: Valid file types are either .txt or .csv \n"
-                      "exiting...")
-                sys.exit()
-        return search_keyword
-
-    # make directories
-    def create_directories(self, main_directory, dir_name, thumbnail, thumbnail_only):
-        dir_name_thumbnail = dir_name + " - thumbnail"
-        # make a search keyword  directory
-        try:
-            if not os.path.exists(main_directory):
-                os.makedirs(main_directory)
-                time.sleep(0.15)
-                path = (dir_name)
-                sub_directory = os.path.join(main_directory, path)
-                if not os.path.exists(sub_directory):
-                    os.makedirs(sub_directory)
-                if thumbnail or thumbnail_only:
-                    sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
-                    if not os.path.exists(sub_directory_thumbnail):
-                        os.makedirs(sub_directory_thumbnail)
-            else:
-                path = (dir_name)
-                sub_directory = os.path.join(main_directory, path)
-                if not os.path.exists(sub_directory):
-                    os.makedirs(sub_directory)
-                if thumbnail or thumbnail_only:
-                    sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
-                    if not os.path.exists(sub_directory_thumbnail):
-                        os.makedirs(sub_directory_thumbnail)
-        except OSError as e:
-            if e.errno != 17:
-                raise
-            pass
-        return
-
-    # Download Image thumbnails
-    def download_image_thumbnail(self, image_url, main_directory, dir_name, return_image_name, print_urls,
-                                 socket_timeout, print_size, no_download, save_source, img_src, ignore_urls):
-        if print_urls or no_download:
-            print("Image URL: " + image_url)
-        if no_download:
-            return "success", "Printed url without downloading"
-        try:
-            req = Request(image_url, headers={
-                "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
-            try:
-                # timeout time to download an image
-                if socket_timeout:
-                    timeout = float(socket_timeout)
-                else:
-                    timeout = 10
-
-                response = urlopen(req, None, timeout)
-                data = response.read()
-                response.close()
-
-                path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name
-
-                try:
-                    output_file = open(path, 'wb')
-                    output_file.write(data)
-                    output_file.close()
-                    if save_source:
-                        list_path = main_directory + "/" + save_source + ".txt"
-                        list_file = open(list_path, 'a')
-                        list_file.write(path + '\t' + img_src + '\n')
-                        list_file.close()
-                except OSError as e:
-                    download_status = 'fail'
-                    download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
-                except IOError as e:
-                    download_status = 'fail'
-                    download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
-
-                download_status = 'success'
-                download_message = "Completed Image Thumbnail ====> " + return_image_name
-
-                # image size parameter
-                if print_size:
-                    print("Image Size: " + str(self.file_size(path)))
-
-            except UnicodeEncodeError as e:
-                download_status = 'fail'
-                download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
-
-        except HTTPError as e:  # If there is any HTTPError
-            download_status = 'fail'
-            download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
-
-        except URLError as e:
-            download_status = 'fail'
-            download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
-
-        except ssl.CertificateError as e:
-            download_status = 'fail'
-            download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
-
-        except IOError as e:  # If there is any IOError
-            download_status = 'fail'
-            download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
-        return download_status, download_message
-
-    # Download Images
-    def download_image(self, image_url, image_format, main_directory, dir_name, count, print_urls, socket_timeout,
-                       prefix, print_size, no_numbering, no_download, save_source, img_src, silent_mode, thumbnail_only,
-                       format, ignore_urls):
-        if not silent_mode:
-            if print_urls or no_download:
-                print("Image URL: " + image_url)
-        if ignore_urls:
-            if any(url in image_url for url in ignore_urls.split(',')):
-                return "fail", "Image ignored due to 'ignore url' parameter", None, image_url
-        if thumbnail_only:
-            return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url
-        if no_download:
-            return "success", "Printed url without downloading", None, image_url
-        try:
-            req = Request(image_url, headers={
-                "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
-            try:
-                # timeout time to download an image
-                if socket_timeout:
-                    timeout = float(socket_timeout)
-                else:
-                    timeout = 10
-
-                response = urlopen(req, None, timeout)
-                data = response.read()
-                info = response.info()
-                response.close()
-
-                qmark = image_url.rfind('?')
-                if qmark == -1:
-                    qmark = len(image_url)
-                slash = image_url.rfind('/', 0, qmark) + 1
-                image_name = str(image_url[slash:qmark]).lower()
-
-                type = info.get_content_type()
-                if type == "image/jpeg" or type == "image/jpg":
-                    if not image_name.endswith(".jpg") and not image_name.endswith(".jpeg"):
-                        image_name += ".jpg"
-                elif type == "image/png":
-                    if not image_name.endswith(".png"):
-                        image_name += ".png"
-                elif type == "image/webp":
-                    if not image_name.endswith(".webp"):
-                        image_name += ".webp"
-                elif type == "image/gif":
-                    if not image_name.endswith(".gif"):
-                        image_name += ".gif"
-                elif type == "image/bmp" or type == "image/x-windows-bmp":
-                    if not image_name.endswith(".bmp"):
-                        image_name += ".bmp"
-                elif type == "image/x-icon" or type == "image/vnd.microsoft.icon":
-                    if not image_name.endswith(".ico"):
-                        image_name += ".ico"
-                elif type == "image/svg+xml":
-                    if not image_name.endswith(".svg"):
-                        image_name += ".svg"
-                else:
-                    download_status = 'fail'
-                    download_message = "Invalid image format '" + type + "'. Skipping..."
-                    return_image_name = ''
-                    absolute_path = ''
-                    return download_status, download_message, return_image_name, absolute_path
-
-                # prefix name in image
-                if prefix:
-                    prefix = prefix + " "
-                else:
-                    prefix = ''
-
-                if no_numbering:
-                    path = main_directory + "/" + dir_name + "/" + prefix + image_name
-                else:
-                    path = main_directory + "/" + dir_name + "/" + prefix + str(count) + "." + image_name
-
-                try:
-                    output_file = open(path, 'wb')
-                    output_file.write(data)
-                    output_file.close()
-                    if save_source:
-                        list_path = main_directory + "/" + save_source + ".txt"
-                        list_file = open(list_path, 'a')
-                        list_file.write(path + '\t' + img_src + '\n')
-                        list_file.close()
-                    absolute_path = os.path.abspath(path)
-                except OSError as e:
-                    download_status = 'fail'
-                    download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
-                    return_image_name = ''
-                    absolute_path = ''
-
-                # return image name back to calling method to use it for thumbnail downloads
-                download_status = 'success'
-                download_message = "Completed Image ====> " + prefix + str(count) + "." + image_name
-                return_image_name = prefix + str(count) + "." + image_name
-
-                # image size parameter
-                if not silent_mode:
-                    if print_size:
-                        print("Image Size: " + str(self.file_size(path)))
-
-            except UnicodeEncodeError as e:
-                download_status = 'fail'
-                download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
-                return_image_name = ''
-                absolute_path = ''
-
-            except URLError as e:
-                download_status = 'fail'
-                download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
-                return_image_name = ''
-                absolute_path = ''
-
-            except BadStatusLine as e:
-                download_status = 'fail'
-                download_message = "BadStatusLine on an image...trying next one..." + " Error: " + str(e)
-                return_image_name = ''
-                absolute_path = ''
-
-        except HTTPError as e:  # If there is any HTTPError
-            download_status = 'fail'
-            download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
-            return_image_name = ''
-            absolute_path = ''
-
-        except URLError as e:
-            download_status = 'fail'
-            download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
-            return_image_name = ''
-            absolute_path = ''
-
-        except ssl.CertificateError as e:
-            download_status = 'fail'
-            download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
-            return_image_name = ''
-            absolute_path = ''
-
-        except IOError as e:  # If there is any IOError
-            download_status = 'fail'
-            download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
-            return_image_name = ''
-            absolute_path = ''
-
-        except IncompleteRead as e:
-            download_status = 'fail'
-            download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
-            return_image_name = ''
-            absolute_path = ''
-
-        return download_status, download_message, return_image_name, absolute_path
-
-    def _get_all_items(self, image_objects, main_directory, dir_name, limit, arguments):
-        items = []
-        abs_path = []
-        errorCount = 0
-        i = 0
-        count = 1
-        while count < limit + 1 and i < len(image_objects):
-            if len(image_objects) == 0:
-                print("no_links")
-                break
-            #code added here to attempt to implement offset correctly
-            #was "count < int(arguments['offset'])" in hardikvasa code, this seems
-            # to be contrary to the implementation details. 
-            elif arguments['offset'] and count <= int(arguments['offset']):
-                    count += 1
-                    #page = page[end_content:]
-            else:
-                # format the item for readability
-                object = self.format_object(image_objects[i])
-                if arguments['metadata']:
-                    if not arguments["silent_mode"]:
-                        print("\nImage Metadata: " + str(object))
-
-                # download the images
-                download_status, download_message, return_image_name, absolute_path = self.download_image(
-                    object['image_link'], object['image_format'], main_directory, dir_name, count,
-                    arguments['print_urls'], arguments['socket_timeout'], arguments['prefix'], arguments['print_size'],
-                    arguments['no_numbering'], arguments['no_download'], arguments['save_source'],
-                    object['image_source'], arguments["silent_mode"], arguments["thumbnail_only"], arguments['format'],
-                    arguments['ignore_urls'])
-                if not arguments["silent_mode"]:
-                    print(download_message)
-                if download_status == "success":
-
-                    # download image_thumbnails
-                    if arguments['thumbnail'] or arguments["thumbnail_only"]:
-                        download_status, download_message_thumbnail = self.download_image_thumbnail(
-                            object['image_thumbnail_url'], main_directory, dir_name, return_image_name,
-                            arguments['print_urls'], arguments['socket_timeout'], arguments['print_size'],
-                            arguments['no_download'], arguments['save_source'], object['image_source'],
-                            arguments['ignore_urls'])
-                        if not arguments["silent_mode"]:
-                            print(download_message_thumbnail)
-
-                    count += 1
-                    object['image_filename'] = return_image_name
-                    items.append(object)  # Append all the links in the list named 'Links'
-                    abs_path.append(absolute_path)
-                else:
-                    errorCount += 1
-
-                # delay param
-                if arguments['delay']:
-                    time.sleep(int(arguments['delay']))
-            i += 1
-        if count < limit:
-            print("\n\nUnfortunately all " + str(
-                limit) + " could not be downloaded because some images were not downloadable. " + str(
-                count - 1) + " is all we got for this search filter!")
-        return items, errorCount, abs_path
-
-    # Bulk Download
-    def download(self, arguments):
-        paths_agg = {}
-        # for input coming from other python files
-        if __name__ != "__main__":
-            # if the calling file contains config_file param
-            if 'config_file' in arguments:
-                records = []
-                json_file = json.load(open(arguments['config_file']))
-                for record in range(0, len(json_file['Records'])):
-                    arguments = {}
-                    for i in args_list:
-                        arguments[i] = None
-                    for key, value in json_file['Records'][record].items():
-                        arguments[key] = value
-                    records.append(arguments)
-                total_errors = 0
-                for rec in records:
-                    paths, errors = self.download_executor(rec)
-                    for i in paths:
-                        paths_agg[i] = paths[i]
-                    if not arguments["silent_mode"]:
-                        if arguments['print_paths']:
-                            print(paths.encode('raw_unicode_escape').decode('utf-8'))
-                    total_errors = total_errors + errors
-                return paths_agg, total_errors
-            # if the calling file contains params directly
-            else:
-                paths, errors = self.download_executor(arguments)
-                for i in paths:
-                    paths_agg[i] = paths[i]
-                if not arguments["silent_mode"]:
-                    if arguments['print_paths']:
-                        print(paths.encode('raw_unicode_escape').decode('utf-8'))
-                return paths_agg, errors
-        # for input coming from CLI
-        else:
-            paths, errors = self.download_executor(arguments)
-            for i in paths:
-                paths_agg[i] = paths[i]
-            if not arguments["silent_mode"]:
-                if arguments['print_paths']:
-                    print(paths.encode('raw_unicode_escape').decode('utf-8'))
-        return paths_agg, errors
-
-    def download_executor(self, arguments):
-        paths = {}
-        errorCount = None
-        for arg in args_list:
-            if arg not in arguments:
-                arguments[arg] = None
-        ######Initialization and Validation of user arguments
-        if arguments['keywords']:
-            search_keyword = [str(item) for item in arguments['keywords'].split(',')]
-
-        if arguments['keywords_from_file']:
-            search_keyword = self.keywords_from_file(arguments['keywords_from_file'])
-
-        # both time and time range should not be allowed in the same query
-        if arguments['time'] and arguments['time_range']:
-            raise ValueError(
-                'Either time or time range should be used in a query. Both cannot be used at the same time.')
-
-        # both time and time range should not be allowed in the same query
-        if arguments['size'] and arguments['exact_size']:
-            raise ValueError(
-                'Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.')
-
-        # both image directory and no image directory should not be allowed in the same query
-        if arguments['image_directory'] and arguments['no_directory']:
-            raise ValueError('You can either specify image directory or specify no image directory, not both!')
-
-        # Additional words added to keywords
-        if arguments['suffix_keywords']:
-            suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')]
-        else:
-            suffix_keywords = ['']
-
-        # Additional words added to keywords
-        if arguments['prefix_keywords']:
-            prefix_keywords = [str(sk) + " " for sk in arguments['prefix_keywords'].split(',')]
-        else:
-            prefix_keywords = ['']
-
-        # Setting limit on number of images to be downloaded
-        if arguments['limit']:
-            limit = int(arguments['limit'])
-        else:
-            limit = 100
-
-        if arguments['url']:
-            current_time = str(datetime.datetime.now()).split('.')[0]
-            search_keyword = [current_time.replace(":", "_")]
-
-        if arguments['similar_images']:
-            current_time = str(datetime.datetime.now()).split('.')[0]
-            search_keyword = [current_time.replace(":", "_")]
-
-        # If single_image or url argument not present then keywords is mandatory argument
-        if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and \
-                arguments['keywords'] is None and arguments['keywords_from_file'] is None:
-            print('-------------------------------\n'
-                  'Uh oh! Keywords is a required argument \n\n'
-                  'Please refer to the documentation on guide to writing queries \n'
-                  'https://github.com/hardikvasa/google-images-download#examples'
-                  '\n\nexiting!\n'
-                  '-------------------------------')
-            sys.exit()
-
-        # If this argument is present, set the custom output directory
-        if arguments['output_directory']:
-            main_directory = arguments['output_directory']
-        else:
-            main_directory = "downloads"
-
-        # Proxy settings
-        if arguments['proxy']:
-            os.environ["http_proxy"] = arguments['proxy']
-            os.environ["https_proxy"] = arguments['proxy']
-            ######Initialization Complete
-        total_errors = 0
-        for pky in prefix_keywords:  # 1.for every prefix keywords
-            for sky in suffix_keywords:  # 2.for every suffix keywords
-                i = 0
-                while i < len(search_keyword):  # 3.for every main keyword
-                    iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (
-                    search_keyword[i]) + (sky)
-                    if not arguments["silent_mode"]:
-                        print(iteration.encode('raw_unicode_escape').decode('utf-8'))
-                        print("Evaluating...")
-                    else:
-                        print("Downloading images for: " + (pky) + (search_keyword[i]) + (sky) + " ...")
-                    search_term = pky + search_keyword[i] + sky
-
-                    if arguments['image_directory']:
-                        dir_name = arguments['image_directory']
-                    elif arguments['no_directory']:
-                        dir_name = ''
-                    else:
-                        dir_name = search_term + (
-                            '-' + arguments['color'] if arguments['color'] else '')  # sub-directory
-
-                    if not arguments["no_download"]:
-                        self.create_directories(main_directory, dir_name, arguments['thumbnail'],
-                                                arguments['thumbnail_only'])  # create directories in OS
-
-                    params = self.build_url_parameters(arguments)  # building URL with params
-
-                    url = self.build_search_url(search_term, params, arguments['url'], arguments['similar_images'],
-                                                arguments['specific_site'],
-                                                arguments['safe_search'])  # building main search url
-
-                    if limit < 101:
-                        images, tabs = self.download_page(url)  # download page
-                    else:
-                        images, tabs = self.download_extended_page(url, arguments['chromedriver'])
-
-                    if not arguments["silent_mode"]:
-                        if arguments['no_download']:
-                            print("Getting URLs without downloading images...")
-                        else:
-                            print("Starting Download...")
-                    items, errorCount, abs_path = self._get_all_items(images, main_directory, dir_name, limit,
-                                                                      arguments)  # get all image items and download images
-                    paths[pky + search_keyword[i] + sky] = abs_path
-
-                    # dumps into a json file
-                    if arguments['extract_metadata']:
-                        try:
-                            if not os.path.exists("logs"):
-                                os.makedirs("logs")
-                        except OSError as e:
-                            print(e)
-                        json_file = open("logs/" + search_keyword[i] + ".json", "w")
-                        json.dump(items, json_file, indent=4, sort_keys=True)
-                        json_file.close()
-
-                    # Related images
-                    if arguments['related_images']:
-                        print("\nGetting list of related keywords...this may take a few moments")
-                        for key, value in tabs.items():
-                            final_search_term = (search_term + " - " + key)
-                            print("\nNow Downloading - " + final_search_term)
-                            if limit < 101:
-                                images, _ = self.download_page(value)  # download page
-                            else:
-                                images, _ = self.download_extended_page(value, arguments['chromedriver'])
-                            self.create_directories(main_directory, final_search_term, arguments['thumbnail'],
-                                                    arguments['thumbnail_only'])
-                            self._get_all_items(images, main_directory, search_term + " - " + key, limit, arguments)
-
-                    i += 1
-                    total_errors = total_errors + errorCount
-                    if not arguments["silent_mode"]:
-                        print("\nErrors: " + str(errorCount) + "\n")
-        return paths, total_errors
-
-
-# ------------- Main Program -------------#
-def main():
-    records = user_input()
-    total_errors = 0
-    t0 = time.time()  # start the timer
-    for arguments in records:
-
-        if arguments['single_image']:  # Download Single Image using a URL
-            response = googleimagesdownload()
-            response.single_image(arguments['single_image'])
-        else:  # or download multiple images based on keywords/keyphrase search
-            response = googleimagesdownload()
-            paths, errors = response.download(arguments)  # wrapping response in a variable just for consistency
-            total_errors = total_errors + errors
-
-        t1 = time.time()  # stop the timer
-        total_time = t1 - t0  # Calculating the total time required to crawl, find and download all the links of 60,000 images
-        if not arguments["silent_mode"]:
-            print("\nEverything downloaded!")
-            print("Total errors: " + str(total_errors))
-            print("Total time taken: " + str(total_time) + " Seconds")
-
-
-if __name__ == "__main__":
-    main()

From 4c5e6a4d52c9c6082ae00e9eb4727e0e8d3a855c Mon Sep 17 00:00:00 2001
From: explosion1206 <40578236+estuhr1206@users.noreply.github.com>
Date: Tue, 25 May 2021 02:30:26 -0400
Subject: [PATCH 17/31] Add files via upload

---
 google_images_download/google_images_download.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 49dcee77..51bb251b 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -876,6 +876,12 @@ def _get_all_items(self, image_objects, main_directory, dir_name, limit, argumen
             if len(image_objects) == 0:
                 print("no_links")
                 break
+            #code added here to attempt to implement offset correctly
+            #was "count < int(arguments['offset'])" in hardikvasa code, this seems
+            # to be contrary to the implementation details. 
+            elif arguments['offset'] and count <= int(arguments['offset']):
+                    count += 1
+                    #page = page[end_content:]
             else:
                 # format the item for readability
                 object = self.format_object(image_objects[i])

From 2f9f80193f4bb7218090a94f3616eadb624bb40b Mon Sep 17 00:00:00 2001
From: Nicolas Grosjean <NicolasGrosjean@users.noreply.github.com>
Date: Wed, 16 Jun 2021 16:06:15 +0200
Subject: [PATCH 18/31] Get more than 400 images

Fix clicking on the "Show more results" button with Selenium.

- The button has no more "smb" id
- We need to do more scroll down before clicking
---
 google_images_download/google_images_download.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 51bb251b..1c5e47d6 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -292,12 +292,12 @@ def download_extended_page(self, url, chromedriver):
 
         element = browser.find_element_by_tag_name("body")
         # Scroll down
-        for i in range(30):
+        for i in range(50):
             element.send_keys(Keys.PAGE_DOWN)
             time.sleep(0.3)
 
         try:
-            browser.find_element_by_id("smb").click()
+            browser.find_element_by_xpath('//input[@value="Show more results"]').click()
             for i in range(50):
                 element.send_keys(Keys.PAGE_DOWN)
                 time.sleep(0.3)  # bot id protection

From df2e289aa2e1c0273ebcd67300ecdcf50f5dc90e Mon Sep 17 00:00:00 2001
From: Matthew LeHew <matthewlehew@gmail.com>
Date: Wed, 30 Jun 2021 10:47:24 -0400
Subject: [PATCH 19/31] Fix JSONDecodeError: Extra Data

This may have been caused by Google changing their Ajax response. Looking at the response, lines[4] only contained a single number and not any JSON. Removing it and simply pulling from lines[3] seems to fix the issue. The problem only manifested when downloading more than 100 images, which required launching ChromeDriver.
---
 google_images_download/google_images_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 51bb251b..bb941bf2 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -193,7 +193,7 @@ def _extract_data_pack_extended(self, page):
 
     def _extract_data_pack_ajax(self, data):
         lines = data.split('\n')
-        return json.loads(lines[3] + lines[4])[0][2]
+        return json.loads(lines[3])[0][2]
 
     def _image_objects_from_pack(self, data):
         image_objects = json.loads(data)[31][0][12][2]

From a8e28e265b83a63c5c8294bacd044251a9aa4d03 Mon Sep 17 00:00:00 2001
From: Nicolas Grosjean <NicolasGrosjean@users.noreply.github.com>
Date: Wed, 25 Aug 2021 11:52:28 +0200
Subject: [PATCH 20/31] Manage API change

We extracted images from json.loads(data)[31][0]... because in json.loads(data)[31] was a list of 1 value.
Now json.loads(data)[31] is a list of 2 values and we want the last.
So replacing 0 by -1 manage this new case and the old one if Google revert this change.
---
 google_images_download/google_images_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 51bb251b..0cc3057d 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -196,7 +196,7 @@ def _extract_data_pack_ajax(self, data):
         return json.loads(lines[3] + lines[4])[0][2]
 
     def _image_objects_from_pack(self, data):
-        image_objects = json.loads(data)[31][0][12][2]
+        image_objects = json.loads(data)[31][-1][12][2]
         image_objects = [x for x in image_objects if x[0] == 1]
         return image_objects
 

From 375b6bb6141a658777f87b7bf9f6a8444e07305e Mon Sep 17 00:00:00 2001
From: Nicolas Grosjean <NicolasGrosjean@users.noreply.github.com>
Date: Mon, 20 Sep 2021 17:18:37 +0200
Subject: [PATCH 21/31] Fix time_range argument

The time range feature has changed, I used this tweet thread to fix it : https://twitter.com/i/events/1174066444029419520.

We can imagine work on the time_range format to avoid changing the "API".
---
 .../google_images_download.py                 | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 51bb251b..ad8443d1 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -108,7 +108,7 @@ def user_input():
         parser.add_argument('-w', '--time', help='image age', type=str, required=False,
                             choices=['past-24-hours', 'past-7-days', 'past-month', 'past-year'])
         parser.add_argument('-wr', '--time_range',
-                            help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}',
+                            help='time range for the age of the image. should be in the format {"time_min":"YYYY-MM-DD","time_max":"YYYY-MM-DD"}',
                             type=str, required=False)
         parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str,
                             required=False,
@@ -501,13 +501,6 @@ def build_url_parameters(self, arguments):
         else:
             lang_url = ''
 
-        if arguments['time_range']:
-            json_acceptable_string = arguments['time_range'].replace("'", "\"")
-            d = json.loads(json_acceptable_string)
-            time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_max']
-        else:
-            time_range = ''
-
         if arguments['exact_size']:
             size_array = [x.strip() for x in arguments['exact_size'].split(',')]
             exact_size = ",isz:ex,iszw:" + str(size_array[0]) + ",iszh:" + str(size_array[1])
@@ -555,7 +548,7 @@ def build_url_parameters(self, arguments):
                 else:
                     built_url = built_url + ',' + ext_param
                     counter += 1
-        built_url = lang_url + built_url + exact_size + time_range
+        built_url = lang_url + built_url + exact_size
         return built_url
 
     # building main search URL
@@ -1046,6 +1039,14 @@ def download_executor(self, arguments):
         if arguments['proxy']:
             os.environ["http_proxy"] = arguments['proxy']
             os.environ["https_proxy"] = arguments['proxy']
+
+        # Add time range to keywords if asked
+        time_range = ''
+        if arguments['time_range']:
+            json_acceptable_string = arguments['time_range'].replace("'", "\"")
+            d = json.loads(json_acceptable_string)
+            time_range = ' after:' + d['time_min'] + ' before:' + d['time_max']
+
             ######Initialization Complete
         total_errors = 0
         for pky in prefix_keywords:  # 1.for every prefix keywords
@@ -1059,7 +1060,7 @@ def download_executor(self, arguments):
                         print("Evaluating...")
                     else:
                         print("Downloading images for: " + (pky) + (search_keyword[i]) + (sky) + " ...")
-                    search_term = pky + search_keyword[i] + sky
+                    search_term = pky + search_keyword[i] + sky + time_range
 
                     if arguments['image_directory']:
                         dir_name = arguments['image_directory']

From a0c18fd9386e6f256cc8bbbd3abff523138aee32 Mon Sep 17 00:00:00 2001
From: Nicolas Grosjean <NicolasGrosjean@users.noreply.github.com>
Date: Wed, 22 Sep 2021 16:39:06 +0200
Subject: [PATCH 22/31] Remove time range from directoriy names

It is not very useful to have the time range expression in the image directory names.
---
 google_images_download/google_images_download.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index ad8443d1..d9e2ca7b 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -1060,7 +1060,7 @@ def download_executor(self, arguments):
                         print("Evaluating...")
                     else:
                         print("Downloading images for: " + (pky) + (search_keyword[i]) + (sky) + " ...")
-                    search_term = pky + search_keyword[i] + sky + time_range
+                    search_term = pky + search_keyword[i] + sky
 
                     if arguments['image_directory']:
                         dir_name = arguments['image_directory']
@@ -1076,6 +1076,7 @@ def download_executor(self, arguments):
 
                     params = self.build_url_parameters(arguments)  # building URL with params
 
+                    search_term += time_range
                     url = self.build_search_url(search_term, params, arguments['url'], arguments['similar_images'],
                                                 arguments['specific_site'],
                                                 arguments['safe_search'])  # building main search url

From c773e1c7d12f30c24e793e1963035386764a6248 Mon Sep 17 00:00:00 2001
From: Joe Clinton <joeclinton1@btinternet.com>
Date: Sun, 26 Sep 2021 15:37:58 +0200
Subject: [PATCH 23/31] Fix "None type error" by changing location of
 image_host string in info object.

---
 google_images_download/google_images_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index e5769978..bbfbd47a 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -388,7 +388,7 @@ def format_object(self, object):
             formatted_object['image_link'] = main[0]
             formatted_object['image_format'] = main[0][-1 * (len(main[0]) - main[0].rfind(".") - 1):]
             formatted_object['image_description'] = info['2003'][3]
-            formatted_object['image_host'] = info['183836587'][0]
+            formatted_object['image_host'] = info['2003'][17]
             formatted_object['image_source'] = info['2003'][2]
             formatted_object['image_thumbnail_url'] = data[2][0]
         except Exception as e:

From 36e5c06681a7b2ae751d288c7fc1f26529846a89 Mon Sep 17 00:00:00 2001
From: Nicolas Grosjean <NicolasGrosjean@users.noreply.github.com>
Date: Wed, 23 Feb 2022 09:12:10 +0100
Subject: [PATCH 24/31] Fix exact_size parameter #11

Update the url building to the new way to get the exact image size thanks to this article :
https://www.labnol.org/internet/google-image-size-search/26902/
---
 google_images_download/google_images_download.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index bbfbd47a..2920c813 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -501,11 +501,6 @@ def build_url_parameters(self, arguments):
         else:
             lang_url = ''
 
-        if arguments['exact_size']:
-            size_array = [x.strip() for x in arguments['exact_size'].split(',')]
-            exact_size = ",isz:ex,iszw:" + str(size_array[0]) + ",iszh:" + str(size_array[1])
-        else:
-            exact_size = ''
 
         built_url = "&tbs="
         counter = 0
@@ -548,7 +543,7 @@ def build_url_parameters(self, arguments):
                 else:
                     built_url = built_url + ',' + ext_param
                     counter += 1
-        built_url = lang_url + built_url + exact_size
+        built_url = lang_url + built_url
         return built_url
 
     # building main search URL
@@ -1047,6 +1042,11 @@ def download_executor(self, arguments):
             d = json.loads(json_acceptable_string)
             time_range = ' after:' + d['time_min'] + ' before:' + d['time_max']
 
+        exact_size = ''
+        if arguments['exact_size']:
+            size_array = [x.strip() for x in arguments['exact_size'].split(',')]
+            exact_size = " imagesize:" + str(size_array[0]) + "x" + str(size_array[1])
+
             ######Initialization Complete
         total_errors = 0
         for pky in prefix_keywords:  # 1.for every prefix keywords
@@ -1076,7 +1076,7 @@ def download_executor(self, arguments):
 
                     params = self.build_url_parameters(arguments)  # building URL with params
 
-                    search_term += time_range
+                    search_term += time_range + exact_size
                     url = self.build_search_url(search_term, params, arguments['url'], arguments['similar_images'],
                                                 arguments['specific_site'],
                                                 arguments['safe_search'])  # building main search url

From cf190d8650ba79e63a44f9d1cdf386da1b023258 Mon Sep 17 00:00:00 2001
From: Lex Vorona <voronaam@gmail.com>
Date: Fri, 5 Aug 2022 13:11:58 -0700
Subject: [PATCH 25/31] Support Firefox

---
 .../google_images_download.py                 | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 2920c813..2fae83f4 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -42,7 +42,7 @@
              "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image",
              "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
              "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
-             "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search",
+             "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "browser", "related_images", "safe_search",
              "no_numbering",
              "offset", "no_download", "save_source", "silent_mode", "ignore_urls"]
 
@@ -148,6 +148,9 @@ def user_input():
         parser.add_argument('-cd', '--chromedriver',
                             help='specify the path to chromedriver executable in your local machine', type=str,
                             required=False)
+        parser.add_argument('-wb', '--browser',
+                            help='Specify which driver to use', type=str,
+                            required=False)
         parser.add_argument('-ri', '--related_images', default=False,
                             help="Downloads images that are similar to the keyword provided", action="store_true")
         parser.add_argument('-sa', '--safe_search', default=False,
@@ -238,7 +241,7 @@ def download_page(self, url):
             sys.exit()
 
     # Download Page for more than 100 images
-    def download_extended_page(self, url, chromedriver):
+    def download_extended_page(self, url, chromedriver, browser):
         from selenium import webdriver
         from selenium.webdriver.common.keys import Keys
         if sys.version_info[0] < 3:
@@ -248,13 +251,16 @@ def download_extended_page(self, url, chromedriver):
         options.add_argument('--no-sandbox')
         options.add_argument("--headless")
 
-        try:
-            browser = webdriver.Chrome(chromedriver, chrome_options=options)
-        except Exception as e:
-            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
-                  "argument to specify the path to the executable.) or google chrome browser is not "
-                  "installed on your machine (exception: %s)" % e)
-            sys.exit()
+        if browser == 'Firefox':
+            browser = webdriver.Firefox()
+        else:
+            try:
+                browser = webdriver.Chrome(chromedriver, chrome_options=options)
+            except Exception as e:
+                print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
+                      "argument to specify the path to the executable.) or google chrome browser is not "
+                      "installed on your machine (exception: %s)" % e)
+                sys.exit()
         browser.set_window_size(1024, 768)
 
         # Open the link
@@ -1084,7 +1090,7 @@ def download_executor(self, arguments):
                     if limit < 101:
                         images, tabs = self.download_page(url)  # download page
                     else:
-                        images, tabs = self.download_extended_page(url, arguments['chromedriver'])
+                        images, tabs = self.download_extended_page(url, arguments['chromedriver'], arguments['browser'])
 
                     if not arguments["silent_mode"]:
                         if arguments['no_download']:
@@ -1115,7 +1121,7 @@ def download_executor(self, arguments):
                             if limit < 101:
                                 images, _ = self.download_page(value)  # download page
                             else:
-                                images, _ = self.download_extended_page(value, arguments['chromedriver'])
+                                images, _ = self.download_extended_page(value, arguments['chromedriver'], arguments['browser'])
                             self.create_directories(main_directory, final_search_term, arguments['thumbnail'],
                                                     arguments['thumbnail_only'])
                             self._get_all_items(images, main_directory, search_term + " - " + key, limit, arguments)

From dcb4619af4246973896bf633fd7c809b6d74f208 Mon Sep 17 00:00:00 2001
From: Joe Clinton <joeclinton1@btinternet.com>
Date: Thu, 18 Aug 2022 14:18:25 +0200
Subject: [PATCH 26/31] Bypass "Before you continue" and ignore empty ajax data

---
 .../google_images_download.py                 | 13 +++++++++++--
 tests/test_google_images_download.py          | 19 +++++++++++--------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 2fae83f4..a5b08eb6 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -6,6 +6,7 @@
 
 # Import Libraries
 import sys
+import selenium.common.exceptions
 
 version = (3, 0)
 cur_version = sys.version_info
@@ -294,6 +295,14 @@ def download_extended_page(self, url, chromedriver, browser):
         """)
 
         time.sleep(1)
+
+        # Bypass "Before you continue" if it appears
+        try:
+            browser.find_element_by_css_selector("[aria-label='Accept all']").click()
+            time.sleep(1)
+        except selenium.common.exceptions.NoSuchElementException:
+            pass
+
         print("Getting you a lot of images. This may take a few moments...")
 
         element = browser.find_element_by_tag_name("body")
@@ -318,8 +327,8 @@ def download_extended_page(self, url, chromedriver, browser):
         source = browser.page_source  # page source
         images = self._image_objects_from_pack(self._extract_data_pack_extended(source))
 
-        ajax_data = browser.execute_script("return XMLHttpRequest.prototype._data")
-        for chunk in ajax_data:
+        ajax_data = browser.execute_script("return XMLHttpRequest.prototype._data") # I think this is broken
+        for chunk in ajax_data if ajax_data else []:
             images += self._image_objects_from_pack(self._extract_data_pack_ajax(chunk))
 
         # close the browser
diff --git a/tests/test_google_images_download.py b/tests/test_google_images_download.py
index ec62afd0..d16a76bc 100644
--- a/tests/test_google_images_download.py
+++ b/tests/test_google_images_download.py
@@ -15,26 +15,27 @@ def silent_remove_of_file(file):
 
 def test_download_images_to_default_location():
     start_time = time.time()
-    argumnets = {
+    arguments = {
         "keywords": "Polar bears",
-        "limit": 5,
-        "print_urls": False
+        "limit":101,
+        "print_urls": False,
+        "chromedriver": 'C:/Program Files (x86)/chromedriver/chromedriver.exe'
     }
     try:
-        temp = argumnets['output_folder']
+        temp = arguments['output_folder']
     except KeyError:
         pass
     else:
         assert False, "This test checks download to default location yet an output folder was provided"
 
-    output_folder_path = os.path.join(os.path.realpath('.'), 'downloads', '{}'.format(argumnets['keywords']))
+    output_folder_path = os.path.join(os.path.realpath('.'), 'downloads', '{}'.format(arguments['keywords']))
     if os.path.exists(output_folder_path):
         start_amount_of_files_in_output_folder = len([name for name in os.listdir(output_folder_path) if os.path.isfile(os.path.join(output_folder_path, name)) and os.path.getctime(os.path.join(output_folder_path, name)) < start_time])
     else:
         start_amount_of_files_in_output_folder = 0
 
     response = google_images_download.googleimagesdownload()
-    response.download(argumnets)
+    response.download(arguments)
     files_modified_after_test_started = [name for name in os.listdir(output_folder_path) if os.path.isfile(os.path.join(output_folder_path, name)) and os.path.getmtime(os.path.join(output_folder_path, name)) > start_time]
     end_amount_of_files_in_output_folder = len(files_modified_after_test_started)
     print(f"Files downloaded by test {__name__}:")
@@ -43,11 +44,13 @@ def test_download_images_to_default_location():
 
 
     # assert end_amount_of_files_in_output_folder - start_amount_of_files_in_output_folder == argumnets['limit']
-    assert end_amount_of_files_in_output_folder == argumnets['limit']
+    assert end_amount_of_files_in_output_folder == arguments['limit']
 
     print(f"Cleaning up all files downloaded by test {__name__}...")
     for file in files_modified_after_test_started:
         if silent_remove_of_file(os.path.join(output_folder_path, file)):
             print(f"Deleted {os.path.join(output_folder_path, file)}")
         else:
-            print(f"Failed to delete {os.path.join(output_folder_path, file)}")
\ No newline at end of file
+            print(f"Failed to delete {os.path.join(output_folder_path, file)}")
+
+test_download_images_to_default_location()
\ No newline at end of file

From dffca0899d391a4673181fce29284e4f65daca9a Mon Sep 17 00:00:00 2001
From: Ellis Brown <ellisbrown@cmu.edu>
Date: Fri, 23 Sep 2022 23:31:33 +0000
Subject: [PATCH 27/31] fix breaking change due to google's response format

---
 google_images_download/google_images_download.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index a5b08eb6..5985f0e9 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -199,9 +199,18 @@ def _extract_data_pack_ajax(self, data):
         lines = data.split('\n')
         return json.loads(lines[3])[0][2]
 
-    def _image_objects_from_pack(self, data):
-        image_objects = json.loads(data)[31][-1][12][2]
-        image_objects = [x for x in image_objects if x[0] == 1]
+    @staticmethod
+    def _image_objects_from_pack(data):
+        image_data = json.loads(data)
+        # NOTE: google sometimes changes their format, breaking this. set a breakpoint here to find the correct index
+        idx = 56
+        grid = image_data[idx][-1][0][-1][-1][0]
+        image_objects = []
+        for item in grid:
+            obj = list(item[0][0].values())[0]
+            # ads and carousels will be empty
+            if obj:
+                image_objects.append(obj)
         return image_objects
 
     # Downloading entire Web Document (Raw Page Content)

From 3f58a9a99589e17ee5ce718a27cbd56441a1fd49 Mon Sep 17 00:00:00 2001
From: Ellis Brown <ellisbrown@cmu.edu>
Date: Fri, 23 Sep 2022 23:37:03 +0000
Subject: [PATCH 28/31] update error message to point to this PR

---
 google_images_download/google_images_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 5985f0e9..4f1e31d9 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -247,7 +247,7 @@ def download_page(self, url):
             return self._image_objects_from_pack(self._extract_data_pack(respData)), self.get_all_tabs(respData)
         except Exception as e:
             print(e)
-            print('Image objects data unpacking failed. Please leave a comment with the above error at https://github.com/hardikvasa/google-images-download/pull/298')
+            print('Image objects data unpacking failed. Please leave a comment with the above error at https://github.com/Joeclinton1/google-images-download/pull/26')
             sys.exit()
 
     # Download Page for more than 100 images

From 219b850f41de9cdc71531fbbf4cbed2ae5eaf3bf Mon Sep 17 00:00:00 2001
From: Ellis Brown <ellisbrown@cmu.edu>
Date: Sat, 24 Sep 2022 01:32:50 +0000
Subject: [PATCH 29/31] fix chromium downloads

---
 .../google_images_download.py                 |  5 +++--
 tests/test_google_images_download.py          | 20 +++++++++++--------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 4f1e31d9..c5d82f24 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -209,8 +209,9 @@ def _image_objects_from_pack(data):
         for item in grid:
             obj = list(item[0][0].values())[0]
             # ads and carousels will be empty
-            if obj:
-                image_objects.append(obj)
+            if not obj or not obj[1]:
+                continue
+            image_objects.append(obj)
         return image_objects
 
     # Downloading entire Web Document (Raw Page Content)
diff --git a/tests/test_google_images_download.py b/tests/test_google_images_download.py
index d16a76bc..e9089945 100644
--- a/tests/test_google_images_download.py
+++ b/tests/test_google_images_download.py
@@ -1,3 +1,4 @@
+import argparse
 from google_images_download import google_images_download
 import os, errno
 import time
@@ -13,14 +14,8 @@ def silent_remove_of_file(file):
     return True
 
 
-def test_download_images_to_default_location():
+def test_download_images_to_default_location(arguments: dict):
     start_time = time.time()
-    arguments = {
-        "keywords": "Polar bears",
-        "limit":101,
-        "print_urls": False,
-        "chromedriver": 'C:/Program Files (x86)/chromedriver/chromedriver.exe'
-    }
     try:
         temp = arguments['output_folder']
     except KeyError:
@@ -53,4 +48,13 @@ def test_download_images_to_default_location():
         else:
             print(f"Failed to delete {os.path.join(output_folder_path, file)}")
 
-test_download_images_to_default_location()
\ No newline at end of file
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-k', '--keywords', type=str, help='delimited list input', default="Polar bears")
+    parser.add_argument('-l', '--limit', type=int, help='delimited list input', default=101)
+    parser.add_argument('-u', '--print_urls', action='store_true', help='print the URLs of the images')
+    parser.add_argument('-c', '--chromedriver', type=str, help='path to chromedriver executable in your local machine', default='C:/Program Files (x86)/chromedriver/chromedriver.exe')
+    args = parser.parse_args()
+    print(f"testing with args: {args}")
+
+    test_download_images_to_default_location(vars(args))

From 1421a434c0557e7a71e87732c913b9c8d11a92b7 Mon Sep 17 00:00:00 2001
From: Ellis Brown <ellisbrown@cmu.edu>
Date: Mon, 26 Sep 2022 21:42:39 +0000
Subject: [PATCH 30/31] fix again after new update 9/26

---
 google_images_download/google_images_download.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index c5d82f24..247c8b71 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -203,11 +203,9 @@ def _extract_data_pack_ajax(self, data):
     def _image_objects_from_pack(data):
         image_data = json.loads(data)
         # NOTE: google sometimes changes their format, breaking this. set a breakpoint here to find the correct index
-        idx = 56
-        grid = image_data[idx][-1][0][-1][-1][0]
+        grid = image_data[31][0][12][2]
         image_objects = []
-        for item in grid:
-            obj = list(item[0][0].values())[0]
+        for obj in grid:
             # ads and carousels will be empty
             if not obj or not obj[1]:
                 continue

From 2e117f3043aa404630d4d4129e22066697f8814c Mon Sep 17 00:00:00 2001
From: Ellis Brown <ellisbrown@cmu.edu>
Date: Fri, 30 Sep 2022 05:11:40 +0000
Subject: [PATCH 31/31] revert rollback from 9/26

---
 google_images_download/google_images_download.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
index 247c8b71..eac2a5bf 100755
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -203,9 +203,10 @@ def _extract_data_pack_ajax(self, data):
     def _image_objects_from_pack(data):
         image_data = json.loads(data)
         # NOTE: google sometimes changes their format, breaking this. set a breakpoint here to find the correct index
-        grid = image_data[31][0][12][2]
+        grid = image_data[56][-1][0][-1][-1][0]
         image_objects = []
-        for obj in grid:
+        for item in grid:
+            obj = list(item[0][0].values())[0]
             # ads and carousels will be empty
             if not obj or not obj[1]:
                 continue