From f3a131607cfd28789f7818931c6d8fb22d72cc34 Mon Sep 17 00:00:00 2001
From: Adek Maulana <adek@techdro.id>
Date: Sat, 22 Feb 2020 18:27:06 +0700
Subject: [PATCH] scrappers: .img: fallback to temporary solution

Unfortunately, it appears the google image formatting has been changed
this is a temporary solution from "https://github.com/hardikvasa/google-images-download/pull/298"

Change-Id: Iadcfa995e6b7c6229505ec0872810876575d738e
---
 userbot/google_images_download.py | 1018 +++++++++++++++++++++++++++++
 userbot/modules/scrapers.py       |    4 +-
 2 files changed, 1020 insertions(+), 2 deletions(-)
 create mode 100644 userbot/google_images_download.py

diff --git a/userbot/google_images_download.py b/userbot/google_images_download.py
new file mode 100644
index 00000000..4eebe057
--- /dev/null
+++ b/userbot/google_images_download.py
@@ -0,0 +1,1018 @@
+#!/usr/bin/env python
+# In[ ]:
+#  coding: utf-8
+
+###### Searching and Downloading Google Images to the local disk ######
+
+# Import Libraries
+import sys
+import ast
+version = (3, 0)
+cur_version = sys.version_info
+if cur_version >= version:  # If the Current Version of Python is 3.0 or above
+    import urllib.request
+    from urllib.request import Request, urlopen
+    from urllib.request import URLError, HTTPError
+    from urllib.parse import quote
+    import http.client
+    from http.client import IncompleteRead, BadStatusLine
+    http.client._MAXHEADERS = 1000
+else:  # If the Current Version of Python is 2.x
+    import urllib2
+    from urllib2 import Request, urlopen
+    from urllib2 import URLError, HTTPError
+    from urllib import quote
+    import httplib
+    from httplib import IncompleteRead, BadStatusLine
+    httplib._MAXHEADERS = 1000
+import time  # Importing the time library to check the time of code execution
+import os
+import argparse
+import ssl
+import datetime
+import json
+import re
+import codecs
+import socket
+
+args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords",
+             "limit", "format", "color", "color_type", "usage_rights", "size",
+             "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image",
+             "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
+             "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
+             "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering",
+             "offset", "no_download","save_source","silent_mode","ignore_urls"]
+
+
+def user_input():
+    config = argparse.ArgumentParser()
+    config.add_argument('-cf', '--config_file', help='config file name', default='', type=str, required=False)
+    config_file_check = config.parse_known_args()
+    object_check = vars(config_file_check[0])
+
+    if object_check['config_file'] != '':
+        records = []
+        json_file = json.load(open(config_file_check[0].config_file))
+        for record in range(0,len(json_file['Records'])):
+            arguments = {}
+            for i in args_list:
+                arguments[i] = None
+            for key, value in json_file['Records'][record].items():
+                arguments[key] = value
+            records.append(arguments)
+        records_count = len(records)
+    else:
+        # Taking command line arguments from users
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False)
+        parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False)
+        parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added after to main keyword', type=str, required=False)
+        parser.add_argument('-pk', '--prefix_keywords', help='comma separated additional words added before main keyword', type=str, required=False)
+        parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False)
+        parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False,
+                            choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico'])
+        parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False)
+        parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False)
+        parser.add_argument('-o', '--output_directory', help='download images in a specific main directory', type=str, required=False)
+        parser.add_argument('-i', '--image_directory', help='download images in a specific sub-directory', type=str, required=False)
+        parser.add_argument('-n', '--no_directory', default=False, help='download images in the main directory but no sub-directory', action="store_true")
+        parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int, required=False)
+        parser.add_argument('-co', '--color', help='filter on color', type=str, required=False,
+                            choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown'])
+        parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False,
+                            choices=['full-color', 'black-and-white', 'transparent'])
+        parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False,
+                            choices=['labeled-for-reuse-with-modifications','labeled-for-reuse','labeled-for-noncommercial-reuse-with-modification','labeled-for-nocommercial-reuse'])
+        parser.add_argument('-s', '--size', help='image size', type=str, required=False,
+                            choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP'])
+        parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str, required=False)
+        parser.add_argument('-t', '--type', help='image type', type=str, required=False,
+                            choices=['face','photo','clipart','line-drawing','animated'])
+        parser.add_argument('-w', '--time', help='image age', type=str, required=False,
+                            choices=['past-24-hours','past-7-days','past-month','past-year'])
+        parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False)
+        parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False,
+                            choices=['tall', 'square', 'wide', 'panoramic'])
+        parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False)
+        parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False)
+        parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true")
+        parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true")
+        parser.add_argument('-pp', '--print_paths', default=False, help="Prints the list of absolute paths of the images",action="store_true")
+        parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true")
+        parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true")
+        parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float)
+        parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true")
+        parser.add_argument('-tho', '--thumbnail_only', default=False, help="Downloads only thumbnail without downloading actual images", action="store_true")
+        parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False,
+                            choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish'])
+        parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False)
+        parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False)
+        parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False)
+        parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true")
+        parser.add_argument('-sa', '--safe_search', default=False, help="Turns on the safe search filter while searching for images", action="store_true")
+        parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true")
+        parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False)
+        parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true")
+        parser.add_argument('-iu', '--ignore_urls', default=False, help="delimited list input of image urls/keywords to ignore", type=str)
+        parser.add_argument('-sil', '--silent_mode', default=False, help="Remains silent. Does not print notification messages on the terminal", action="store_true")
+        parser.add_argument('-is', '--save_source', help="creates a text file containing a list of downloaded images along with source page url", type=str, required=False)
+
+        args = parser.parse_args()
+        arguments = vars(args)
+        records = []
+        records.append(arguments)
+    return records
+
+
+class googleimagesdownload:
+    def __init__(self):
+        pass
+
+    # Downloading entire Web Document (Raw Page Content)
+    def download_page(self,url):
+        version = (3, 0)
+        cur_version = sys.version_info
+        if cur_version >= version:  # If the Current Version of Python is 3.0 or above
+            try:
+                headers = {}
+                headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+                req = urllib.request.Request(url, headers=headers)
+                resp = urllib.request.urlopen(req)
+                respData = str(resp.read())
+                return respData
+            except Exception as e:
+                print("Could not open URL. Please check your internet connection and/or ssl settings \n"
+                      "If you are using proxy, make sure your proxy settings is configured correctly")
+                sys.exit()
+        else:  # If the Current Version of Python is 2.x
+            try:
+                headers = {}
+                headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
+                req = urllib2.Request(url, headers=headers)
+                try:
+                    response = urllib2.urlopen(req)
+                except URLError:  # Handling SSL certificate failed
+                    context = ssl._create_unverified_context()
+                    response = urlopen(req, context=context)
+                page = response.read()
+                return page
+            except:
+                print("Could not open URL. Please check your internet connection and/or ssl settings \n"
+                      "If you are using proxy, make sure your proxy settings is configured correctly")
+                sys.exit()
+                return "Page Not found"
+
+
+    # Download Page for more than 100 images
+    def download_extended_page(self,url,chromedriver):
+        from selenium import webdriver
+        from selenium.webdriver.common.keys import Keys
+        if sys.version_info[0] < 3:
+            reload(sys)
+            sys.setdefaultencoding('utf8')
+        options = webdriver.ChromeOptions()
+        options.add_argument('--no-sandbox')
+        options.add_argument("--headless")
+
+        try:
+            browser = webdriver.Chrome(chromedriver, chrome_options=options)
+        except Exception as e:
+            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
+                  "argument to specify the path to the executable.) or google chrome browser is not "
+                  "installed on your machine (exception: %s)" % e)
+            sys.exit()
+        browser.set_window_size(1024, 768)
+
+        # Open the link
+        browser.get(url)
+        time.sleep(1)
+        print("Getting you a lot of images. This may take a few moments...")
+
+        element = browser.find_element_by_tag_name("body")
+        # Scroll down
+        for i in range(30):
+            element.send_keys(Keys.PAGE_DOWN)
+            time.sleep(0.3)
+
+        try:
+            browser.find_element_by_id("smb").click()
+            for i in range(50):
+                element.send_keys(Keys.PAGE_DOWN)
+                time.sleep(0.3)  # bot id protection
+        except:
+            for i in range(10):
+                element.send_keys(Keys.PAGE_DOWN)
+                time.sleep(0.3)  # bot id protection
+
+        print("Reached end of Page.")
+        time.sleep(0.5)
+
+        source = browser.page_source #page source
+        #close the browser
+        browser.close()
+
+        return source
+
+
+    #Correcting the escape characters for python2
+    def replace_with_byte(self,match):
+        return chr(int(match.group(0)[1:], 8))
+
+    def repair(self,brokenjson):
+        invalid_escape = re.compile(r'\\[0-7]{1,3}')  # up to 3 digits for byte values up to FF
+        return invalid_escape.sub(self.replace_with_byte, brokenjson)
+
+
+    # Finding 'Next Image' from the given raw page
+    def get_next_tab(self,s):
+        start_line = s.find('class="dtviD"')
+        if start_line == -1:  # If no links are found then give an error!
+            end_quote = 0
+            link = "no_tabs"
+            return link,'',end_quote
+        else:
+            start_line = s.find('class="dtviD"')
+            start_content = s.find('href="', start_line + 1)
+            end_content = s.find('">', start_content + 1)
+            url_item = "https://www.google.com" + str(s[start_content + 6:end_content])
+            url_item = url_item.replace('&amp;', '&')
+
+            start_line_2 = s.find('class="dtviD"')
+            s = s.replace('&amp;', '&')
+            start_content_2 = s.find(':', start_line_2 + 1)
+            end_content_2 = s.find('&usg=', start_content_2 + 1)
+            url_item_name = str(s[start_content_2 + 1:end_content_2])
+
+            chars = url_item_name.find(',g_1:')
+            chars_end = url_item_name.find(":", chars + 6)
+            if chars_end == -1:
+                updated_item_name = (url_item_name[chars + 5:]).replace("+", " ")
+            else:
+                updated_item_name = (url_item_name[chars+5:chars_end]).replace("+", " ")
+
+            return url_item, updated_item_name, end_content
+
+
+    # Getting all links with the help of '_images_get_next_image'
+    def get_all_tabs(self,page):
+        tabs = {}
+        while True:
+            item,item_name,end_content = self.get_next_tab(page)
+            if item == "no_tabs":
+                break
+            else:
+                if len(item_name) > 100 or item_name == "background-color":
+                    break
+                else:
+                    tabs[item_name] = item  # Append all the links in the list named 'Links'
+                    time.sleep(0.1)  # Timer could be used to slow down the request for image downloads
+                    page = page[end_content:]
+        return tabs
+
+
+    #Format the object in readable format
+    def format_object(self,object):
+        data = object[1]
+        main = data[3]
+        info = data[9]
+        formatted_object = {}
+        formatted_object['image_height'] = main[2]
+        formatted_object['image_width'] = main[1]
+        formatted_object['image_link'] = main[0]
+        formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):]
+        formatted_object['image_description'] = info['2003'][3]
+        formatted_object['image_host'] = info['183836587'][0]
+        formatted_object['image_source'] = info['2003'][2]
+        formatted_object['image_thumbnail_url'] = data[2][0]
+        return formatted_object
+
+
+    #function to download single image
+    def single_image(self,image_url):
+        main_directory = "downloads"
+        extensions = (".jpg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico")
+        url = image_url
+        try:
+            os.makedirs(main_directory)
+        except OSError as e:
+            if e.errno != 17:
+                raise
+            pass
+        req = Request(url, headers={
+            "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+
+        response = urlopen(req, None, 10)
+        data = response.read()
+        response.close()
+
+        image_name = str(url[(url.rfind('/')) + 1:])
+        if '?' in image_name:
+            image_name = image_name[:image_name.find('?')]
+        # if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name:
+        if any(map(lambda extension: extension in image_name, extensions)):
+            file_name = main_directory + "/" + image_name
+        else:
+            file_name = main_directory + "/" + image_name + ".jpg"
+            image_name = image_name + ".jpg"
+
+        try:
+            output_file = open(file_name, 'wb')
+            output_file.write(data)
+            output_file.close()
+        except IOError as e:
+            raise e
+        except OSError as e:
+            raise e
+        print("completed ====> " + image_name.encode('raw_unicode_escape').decode('utf-8'))
+        return
+
+    def similar_images(self,similar_images):
+        version = (3, 0)
+        cur_version = sys.version_info
+        if cur_version >= version:  # If the Current Version of Python is 3.0 or above
+            try:
+                searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
+                headers = {}
+                headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+
+                req1 = urllib.request.Request(searchUrl, headers=headers)
+                resp1 = urllib.request.urlopen(req1)
+                content = str(resp1.read())
+                l1 = content.find('AMhZZ')
+                l2 = content.find('&', l1)
+                urll = content[l1:l2]
+
+                newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
+                req2 = urllib.request.Request(newurl, headers=headers)
+                resp2 = urllib.request.urlopen(req2)
+                l3 = content.find('/search?sa=X&amp;q=')
+                l4 = content.find(';', l3 + 19)
+                urll2 = content[l3 + 19:l4]
+                return urll2
+            except:
+                return "Cloud not connect to Google Images endpoint"
+        else:  # If the Current Version of Python is 2.x
+            try:
+                searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
+                headers = {}
+                headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
+
+                req1 = urllib2.Request(searchUrl, headers=headers)
+                resp1 = urllib2.urlopen(req1)
+                content = str(resp1.read())
+                l1 = content.find('AMhZZ')
+                l2 = content.find('&', l1)
+                urll = content[l1:l2]
+
+                newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
+                req2 = urllib2.Request(newurl, headers=headers)
+                resp2 = urllib2.urlopen(req2)
+                l3 = content.find('/search?sa=X&amp;q=')
+                l4 = content.find(';', l3 + 19)
+                urll2 = content[l3 + 19:l4]
+                return(urll2)
+            except:
+                return "Cloud not connect to Google Images endpoint"
+
+    #Building URL parameters
+    def build_url_parameters(self,arguments):
+        if arguments['language']:
+            lang = "&lr="
+            lang_param = {"Arabic":"lang_ar","Chinese (Simplified)":"lang_zh-CN","Chinese (Traditional)":"lang_zh-TW","Czech":"lang_cs","Danish":"lang_da","Dutch":"lang_nl","English":"lang_en","Estonian":"lang_et","Finnish":"lang_fi","French":"lang_fr","German":"lang_de","Greek":"lang_el","Hebrew":"lang_iw ","Hungarian":"lang_hu","Icelandic":"lang_is","Italian":"lang_it","Japanese":"lang_ja","Korean":"lang_ko","Latvian":"lang_lv","Lithuanian":"lang_lt","Norwegian":"lang_no","Portuguese":"lang_pt","Polish":"lang_pl","Romanian":"lang_ro","Russian":"lang_ru","Spanish":"lang_es","Swedish":"lang_sv","Turkish":"lang_tr"}
+            lang_url = lang+lang_param[arguments['language']]
+        else:
+            lang_url = ''
+
+        if arguments['time_range']:
+            json_acceptable_string = arguments['time_range'].replace("'", "\"")
+            d = json.loads(json_acceptable_string)
+            time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_max']
+        else:
+            time_range = ''
+
+        if arguments['exact_size']:
+            size_array = [x.strip() for x in arguments['exact_size'].split(',')]
+            exact_size = ",isz:ex,iszw:" + str(size_array[0]) + ",iszh:" + str(size_array[1])
+        else:
+            exact_size = ''
+
+        built_url = "&tbs="
+        counter = 0
+        params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}],
+                  'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}],
+                  'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}],
+                  'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}],
+                  'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clipart':'itp:clipart','line-drawing':'itp:lineart','animated':'itp:animated'}],
+                  'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w','past-month':'qdr:m','past-year':'qdr:y'}],
+                  'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}],
+                  'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico','raw':'ift:craw'}]}
+        for key, value in params.items():
+            if value[0] is not None:
+                ext_param = value[1][value[0]]
+                # counter will tell if it is first param added or not
+                if counter == 0:
+                    # add it to the built url
+                    built_url = built_url + ext_param
+                    counter += 1
+                else:
+                    built_url = built_url + ',' + ext_param
+                    counter += 1
+        built_url = lang_url+built_url+exact_size+time_range
+        return built_url
+
+
+    #building main search URL
+    def build_search_url(self,search_term,params,url,similar_images,specific_site,safe_search):
+        #check safe_search
+        safe_search_string = "&safe=active"
+        # check the args and choose the URL
+        if url:
+            url = url
+        elif similar_images:
+            print(similar_images)
+            keywordem = self.similar_images(similar_images)
+            url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+        elif specific_site:
+            url = 'https://www.google.com/search?q=' + quote(
+                search_term.encode('utf-8')) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+        else:
+            url = 'https://www.google.com/search?q=' + quote(
+                search_term.encode('utf-8')) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+
+        #safe search check
+        if safe_search:
+            url = url + safe_search_string
+
+        return url
+
+
+    #measures the file size
+    def file_size(self,file_path):
+        if os.path.isfile(file_path):
+            file_info = os.stat(file_path)
+            size = file_info.st_size
+            for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
+                if size < 1024.0:
+                    return "%3.1f %s" % (size, x)
+                size /= 1024.0
+            return size
+
+    #keywords from file
+    def keywords_from_file(self,file_name):
+        search_keyword = []
+        with codecs.open(file_name, 'r', encoding='utf-8-sig') as f:
+            if '.csv' in file_name:
+                for line in f:
+                    if line in ['\n', '\r\n']:
+                        pass
+                    else:
+                        search_keyword.append(line.replace('\n', '').replace('\r', ''))
+            elif '.txt' in file_name:
+                for line in f:
+                    if line in ['\n', '\r\n']:
+                        pass
+                    else:
+                        search_keyword.append(line.replace('\n', '').replace('\r', ''))
+            else:
+                print("Invalid file type: Valid file types are either .txt or .csv \n"
+                      "exiting...")
+                sys.exit()
+        return search_keyword
+
+    # make directories
+    def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
+        dir_name_thumbnail = dir_name + " - thumbnail"
+        # make a search keyword  directory
+        try:
+            if not os.path.exists(main_directory):
+                os.makedirs(main_directory)
+                time.sleep(0.15)
+                path = (dir_name)
+                sub_directory = os.path.join(main_directory, path)
+                if not os.path.exists(sub_directory):
+                    os.makedirs(sub_directory)
+                if thumbnail or thumbnail_only:
+                    sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
+                    if not os.path.exists(sub_directory_thumbnail):
+                        os.makedirs(sub_directory_thumbnail)
+            else:
+                path = (dir_name)
+                sub_directory = os.path.join(main_directory, path)
+                if not os.path.exists(sub_directory):
+                    os.makedirs(sub_directory)
+                if thumbnail or thumbnail_only:
+                    sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
+                    if not os.path.exists(sub_directory_thumbnail):
+                        os.makedirs(sub_directory_thumbnail)
+        except OSError as e:
+            if e.errno != 17:
+                raise
+            pass
+        return
+
+
+    # Download Image thumbnails
+    def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src,ignore_urls):
+        if print_urls or no_download:
+            print("Image URL: " + image_url)
+        if no_download:
+            return "success","Printed url without downloading"
+        try:
+            req = Request(image_url, headers={
+                "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+            try:
+                # timeout time to download an image
+                if socket_timeout:
+                    timeout = float(socket_timeout)
+                else:
+                    timeout = 10
+
+                response = urlopen(req, None, timeout)
+                data = response.read()
+                response.close()
+
+                path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name
+
+                try:
+                    output_file = open(path, 'wb')
+                    output_file.write(data)
+                    output_file.close()
+                    if save_source:
+                        list_path = main_directory + "/" + save_source + ".txt"
+                        list_file = open(list_path,'a')
+                        list_file.write(path + '\t' + img_src + '\n')
+                        list_file.close()
+                except OSError as e:
+                    download_status = 'fail'
+                    download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
+                except IOError as e:
+                    download_status = 'fail'
+                    download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+
+                download_status = 'success'
+                download_message = "Completed Image Thumbnail ====> " + return_image_name
+
+                # image size parameter
+                if print_size:
+                    print("Image Size: " + str(self.file_size(path)))
+
+            except UnicodeEncodeError as e:
+                download_status = 'fail'
+                download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
+
+        except HTTPError as e:  # If there is any HTTPError
+            download_status = 'fail'
+            download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
+
+        except URLError as e:
+            download_status = 'fail'
+            download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+
+        except ssl.CertificateError as e:
+            download_status = 'fail'
+            download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
+
+        except IOError as e:  # If there is any IOError
+            download_status = 'fail'
+            download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+        return download_status, download_message
+
+
+    # Download Images
+    def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format,ignore_urls):
+        if not silent_mode:
+            if print_urls or no_download:
+                print("Image URL: " + image_url)
+        if ignore_urls:
+            if any(url in image_url for url in ignore_urls.split(',')):
+                return "fail", "Image ignored due to 'ignore url' parameter", None, image_url
+        if thumbnail_only:
+            return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url
+        if no_download:
+            return "success","Printed url without downloading",None,image_url
+        try:
+            req = Request(image_url, headers={
+                "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+            try:
+                # timeout time to download an image
+                if socket_timeout:
+                    timeout = float(socket_timeout)
+                else:
+                    timeout = 10
+
+                response = urlopen(req, None, timeout)
+                data = response.read()
+                response.close()
+
+                extensions = [".jpg", ".jpeg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico"]
+                # keep everything after the last '/'
+                image_name = str(image_url[(image_url.rfind('/')) + 1:])
+                if format:
+                    if not image_format or image_format != format:
+                        download_status = 'fail'
+                        download_message = "Wrong image format returned. Skipping..."
+                        return_image_name = ''
+                        absolute_path = ''
+                        return download_status, download_message, return_image_name, absolute_path
+
+                if image_format == "" or not image_format or "." + image_format not in extensions:
+                    download_status = 'fail'
+                    download_message = "Invalid or missing image format. Skipping..."
+                    return_image_name = ''
+                    absolute_path = ''
+                    return download_status, download_message, return_image_name, absolute_path
+                elif image_name.lower().find("." + image_format) < 0:
+                    image_name = image_name + "." + image_format
+                else:
+                    image_name = image_name[:image_name.lower().find("." + image_format) + (len(image_format) + 1)]
+
+                # prefix name in image
+                if prefix:
+                    prefix = prefix + " "
+                else:
+                    prefix = ''
+
+                if no_numbering:
+                    path = main_directory + "/" + dir_name + "/" + prefix + image_name
+                else:
+                    path = main_directory + "/" + dir_name + "/" + prefix + str(count) + "." + image_name
+
+                try:
+                    output_file = open(path, 'wb')
+                    output_file.write(data)
+                    output_file.close()
+                    if save_source:
+                        list_path = main_directory + "/" + save_source + ".txt"
+                        list_file = open(list_path,'a')
+                        list_file.write(path + '\t' + img_src + '\n')
+                        list_file.close()
+                    absolute_path = os.path.abspath(path)
+                except OSError as e:
+                    download_status = 'fail'
+                    download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
+                    return_image_name = ''
+                    absolute_path = ''
+
+                #return image name back to calling method to use it for thumbnail downloads
+                download_status = 'success'
+                download_message = "Completed Image ====> " + prefix + str(count) + "." + image_name
+                return_image_name = prefix + str(count) + "." + image_name
+
+                # image size parameter
+                if not silent_mode:
+                    if print_size:
+                        print("Image Size: " + str(self.file_size(path)))
+
+            except UnicodeEncodeError as e:
+                download_status = 'fail'
+                download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
+                return_image_name = ''
+                absolute_path = ''
+
+            except URLError as e:
+                download_status = 'fail'
+                download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+                return_image_name = ''
+                absolute_path = ''
+                
+            except BadStatusLine as e:
+                download_status = 'fail'
+                download_message = "BadStatusLine on an image...trying next one..." + " Error: " + str(e)
+                return_image_name = ''
+                absolute_path = ''
+
+        except HTTPError as e:  # If there is any HTTPError
+            download_status = 'fail'
+            download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except URLError as e:
+            download_status = 'fail'
+            download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except ssl.CertificateError as e:
+            download_status = 'fail'
+            download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except IOError as e:  # If there is any IOError
+            download_status = 'fail'
+            download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        except IncompleteRead as e:
+            download_status = 'fail'
+            download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
+            return_image_name = ''
+            absolute_path = ''
+
+        return download_status,download_message,return_image_name,absolute_path
+
+
+    # Finding 'Next Image' from the given raw page
+    def _get_next_item(self,s):
+        start_line = s.find('rg_meta notranslate')
+        if start_line == -1:  # If no links are found then give an error!
+            end_quote = 0
+            link = "no_links"
+            return link, end_quote
+        else:
+            start_line = s.find('class="rg_meta notranslate">')
+            start_object = s.find('{', start_line + 1)
+            end_object = s.find('</div>', start_object + 1)
+            object_raw = str(s[start_object:end_object])
+            #remove escape characters based on python version
+            version = (3, 0)
+            cur_version = sys.version_info
+            if cur_version >= version: #python3
+                try:
+                    object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
+                    final_object = json.loads(object_decode)
+                except:
+                    final_object = ""
+            else:  #python2
+                try:
+                    final_object = (json.loads(self.repair(object_raw)))
+                except:
+                    final_object = ""
+            return final_object, end_object
+
+
+    # Getting all links with the help of '_images_get_next_image'
+    def _get_image_objects(self,s):
+        start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10
+        start_object = s.find('[', start_line + 1)
+        end_object = s.find('</script>', start_object + 1) - 4
+        object_raw = str(s[start_object:end_object])
+        object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
+        image_objects = json.loads(object_decode)[31][0][12][2]
+        return image_objects
+
+    def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
+        items = []
+        abs_path = []
+        errorCount = 0
+        i = 0
+        count = 1
+        image_objects = self._get_image_objects(page)
+        while count < limit+1:
+            if len(image_objects) == 0:
+                print("no_links")
+                break
+            else:
+                #format the item for readability
+                object = self.format_object(image_objects[i])
+                if arguments['metadata']:
+                    if not arguments["silent_mode"]:
+                        print("\nImage Metadata: " + str(object))
+
+                #download the images
+                download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format'],arguments['ignore_urls'])
+                if not arguments["silent_mode"]:
+                    print(download_message)
+                if download_status == "success":
+
+                    # download image_thumbnails
+                    if arguments['thumbnail'] or arguments["thumbnail_only"]:
+                        download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments['ignore_urls'])
+                        if not arguments["silent_mode"]:
+                            print(download_message_thumbnail)
+
+                    count += 1
+                    object['image_filename'] = return_image_name
+                    items.append(object)  # Append all the links in the list named 'Links'
+                    abs_path.append(absolute_path)
+                else:
+                    errorCount += 1
+
+                #delay param
+                if arguments['delay']:
+                    time.sleep(int(arguments['delay']))
+            i += 1
+        if count < limit:
+            print("\n\nUnfortunately all " + str(
+                limit) + " could not be downloaded because some images were not downloadable. " + str(
+                count-1) + " is all we got for this search filter!")
+        return items,errorCount,abs_path
+
+
+    # Bulk Download
+    def download(self,arguments):
+        paths_agg = {}
+        # for input coming from other python files
+        if __name__ != "__main__":
+            # if the calling file contains config_file param
+            if 'config_file' in arguments:
+                records = []
+                json_file = json.load(open(arguments['config_file']))
+                for record in range(0, len(json_file['Records'])):
+                    arguments = {}
+                    for i in args_list:
+                        arguments[i] = None
+                    for key, value in json_file['Records'][record].items():
+                        arguments[key] = value
+                    records.append(arguments)
+                total_errors = 0
+                for rec in records:
+                    paths, errors = self.download_executor(rec)
+                    for i in paths:
+                        paths_agg[i] = paths[i]
+                    if not arguments["silent_mode"]:
+                        if arguments['print_paths']:
+                            print(paths.encode('raw_unicode_escape').decode('utf-8'))
+                    total_errors = total_errors + errors
+                return paths_agg,total_errors
+            # if the calling file contains params directly
+            else:
+                paths, errors = self.download_executor(arguments)
+                for i in paths:
+                    paths_agg[i] = paths[i]
+                if not arguments["silent_mode"]:
+                    if arguments['print_paths']:
+                        print(paths.encode('raw_unicode_escape').decode('utf-8'))
+                return paths_agg, errors
+        # for input coming from CLI
+        else:
+            paths, errors = self.download_executor(arguments)
+            for i in paths:
+                paths_agg[i] = paths[i]
+            if not arguments["silent_mode"]:
+                if arguments['print_paths']:
+                    print(paths.encode('raw_unicode_escape').decode('utf-8'))
+        return paths_agg, errors
+
+    def download_executor(self,arguments):
+        paths = {}
+        errorCount = None
+        for arg in args_list:
+            if arg not in arguments:
+                arguments[arg] = None
+        ######Initialization and Validation of user arguments
+        if arguments['keywords']:
+            search_keyword = [str(item) for item in arguments['keywords'].split(',')]
+
+        if arguments['keywords_from_file']:
+            search_keyword = self.keywords_from_file(arguments['keywords_from_file'])
+
+        # both time and time range should not be allowed in the same query
+        if arguments['time'] and arguments['time_range']:
+            raise ValueError('Either time or time range should be used in a query. Both cannot be used at the same time.')
+
+        # both time and time range should not be allowed in the same query
+        if arguments['size'] and arguments['exact_size']:
+            raise ValueError('Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.')
+
+        # both image directory and no image directory should not be allowed in the same query
+        if arguments['image_directory'] and arguments['no_directory']:
+            raise ValueError('You can either specify image directory or specify no image directory, not both!')
+
+        # Additional words added to keywords
+        if arguments['suffix_keywords']:
+            suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')]
+        else:
+            suffix_keywords = ['']
+
+        # Additional words added to keywords
+        if arguments['prefix_keywords']:
+            prefix_keywords = [str(sk) + " " for sk in arguments['prefix_keywords'].split(',')]
+        else:
+            prefix_keywords = ['']
+
+        # Setting limit on number of images to be downloaded
+        if arguments['limit']:
+            limit = int(arguments['limit'])
+        else:
+            limit = 100
+
+        if arguments['url']:
+            current_time = str(datetime.datetime.now()).split('.')[0]
+            search_keyword = [current_time.replace(":", "_")]
+
+        if arguments['similar_images']:
+            current_time = str(datetime.datetime.now()).split('.')[0]
+            search_keyword = [current_time.replace(":", "_")]
+
+        # If single_image or url argument not present then keywords is mandatory argument
+        if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and \
+                        arguments['keywords'] is None and arguments['keywords_from_file'] is None:
+            print('-------------------------------\n'
+                  'Uh oh! Keywords is a required argument \n\n'
+                  'Please refer to the documentation on guide to writing queries \n'
+                  'https://github.com/hardikvasa/google-images-download#examples'
+                  '\n\nexiting!\n'
+                  '-------------------------------')
+            sys.exit()
+
+        # If this argument is present, set the custom output directory
+        if arguments['output_directory']:
+            main_directory = arguments['output_directory']
+        else:
+            main_directory = "downloads"
+
+        # Proxy settings
+        if arguments['proxy']:
+            os.environ["http_proxy"] = arguments['proxy']
+            os.environ["https_proxy"] = arguments['proxy']
+            ######Initialization Complete
+        total_errors = 0
+        for pky in prefix_keywords:                 # 1.for every prefix keywords
+            for sky in suffix_keywords:             # 2.for every suffix keywords
+                i = 0
+                while i < len(search_keyword):      # 3.for every main keyword
+                    iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (search_keyword[i]) + (sky)
+                    if not arguments["silent_mode"]:
+                        print(iteration.encode('raw_unicode_escape').decode('utf-8'))
+                        print("Evaluating...")
+                    else:
+                        print("Downloading images for: " + (pky) + (search_keyword[i]) + (sky) + " ...")
+                    search_term = pky + search_keyword[i] + sky
+
+                    if arguments['image_directory']:
+                        dir_name = arguments['image_directory']
+                    elif arguments['no_directory']:
+                        dir_name = ''
+                    else:
+                        dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '')   #sub-directory
+
+                    if not arguments["no_download"]:
+                        self.create_directories(main_directory,dir_name,arguments['thumbnail'],arguments['thumbnail_only'])     #create directories in OS
+
+                    params = self.build_url_parameters(arguments)     #building URL with params
+
+                    url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site'],arguments['safe_search'])      #building main search url
+
+                    if limit < 101:
+                        raw_html = self.download_page(url)  # download page
+                    else:
+                        raw_html = self.download_extended_page(url,arguments['chromedriver'])
+
+                    if not arguments["silent_mode"]:
+                        if arguments['no_download']:
+                            print("Getting URLs without downloading images...")
+                        else:
+                            print("Starting Download...")
+                    items,errorCount,abs_path = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments)    #get all image items and download images
+                    paths[pky + search_keyword[i] + sky] = abs_path
+
+                    #dumps into a json file
+                    if arguments['extract_metadata']:
+                        try:
+                            if not os.path.exists("logs"):
+                                os.makedirs("logs")
+                        except OSError as e:
+                            print(e)
+                        json_file = open("logs/"+search_keyword[i]+".json", "w")
+                        json.dump(items, json_file, indent=4, sort_keys=True)
+                        json_file.close()
+
+                    #Related images
+                    if arguments['related_images']:
+                        print("\nGetting list of related keywords...this may take a few moments")
+                        tabs = self.get_all_tabs(raw_html)
+                        for key, value in tabs.items():
+                            final_search_term = (search_term + " - " + key)
+                            print("\nNow Downloading - " + final_search_term)
+                            if limit < 101:
+                                new_raw_html = self.download_page(value)  # download page
+                            else:
+                                new_raw_html = self.download_extended_page(value,arguments['chromedriver'])
+                            self.create_directories(main_directory, final_search_term,arguments['thumbnail'],arguments['thumbnail_only'])
+                            self._get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit,arguments)
+
+                    i += 1
+                    total_errors = total_errors + errorCount
+                    if not arguments["silent_mode"]:
+                        print("\nErrors: " + str(errorCount) + "\n")
+        return paths, total_errors
+
+#------------- Main Program -------------#
+def main():
+    records = user_input()
+    total_errors = 0
+    t0 = time.time()  # start the timer
+    for arguments in records:
+
+        if arguments['single_image']:  # Download Single Image using a URL
+            response = googleimagesdownload()
+            response.single_image(arguments['single_image'])
+        else:  # or download multiple images based on keywords/keyphrase search
+            response = googleimagesdownload()
+            paths,errors = response.download(arguments)  #wrapping response in a variable just for consistency
+            total_errors = total_errors + errors
+
+        t1 = time.time()  # stop the timer
+        total_time = t1 - t0  # Calculating the total time required to crawl, find and download all the links of 60,000 images
+        if not arguments["silent_mode"]:
+            print("\nEverything downloaded!")
+            print("Total errors: " + str(total_errors))
+            print("Total time taken: " + str(total_time) + " Seconds")
+
+if __name__ == "__main__":
+    main()
+
+# In[ ]:
diff --git a/userbot/modules/scrapers.py b/userbot/modules/scrapers.py
index 840ef60a..cc97da23 100644
--- a/userbot/modules/scrapers.py
+++ b/userbot/modules/scrapers.py
@@ -24,7 +24,6 @@
 from urbandict import define
 from requests import get
 from search_engine_parser import GoogleSearch
-from google_images_download import google_images_download
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
 from googletrans import LANGUAGES, Translator
@@ -41,6 +40,7 @@
 from userbot.events import register
 from telethon.tl.types import DocumentAttributeAudio
 from userbot.modules.upload_download import progress, humanbytes, time_formatter
+from userbot.google_images_download import googleimagesdownload
 
 CARBONLANG = "auto"
 TTS_LANG = "en"
@@ -132,7 +132,7 @@ async def img_sampler(event):
         query = query.replace("lim=" + lim[0], "")
     except IndexError:
         lim = 3
-    response = google_images_download.googleimagesdownload()
+    response = googleimagesdownload()
 
     # creating list of arguments
     arguments = {