From 8decf8dc460eba5e836904fffcfb32e569545a80 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 27 Nov 2024 19:34:27 -0500 Subject: [PATCH 1/6] Update CDP Mode --- seleniumbase/core/browser_launcher.py | 25 ++++++++++++++----- seleniumbase/core/sb_cdp.py | 8 ++++++ seleniumbase/undetected/cdp_driver/browser.py | 17 ++++++------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py index 5283b6dc75f..2240410e938 100644 --- a/seleniumbase/core/browser_launcher.py +++ b/seleniumbase/core/browser_launcher.py @@ -612,6 +612,7 @@ def uc_open_with_cdp_mode(driver, url=None): cdp.save_cookies = CDPM.save_cookies cdp.load_cookies = CDPM.load_cookies cdp.clear_cookies = CDPM.clear_cookies + cdp.sleep = CDPM.sleep cdp.bring_active_window_to_front = CDPM.bring_active_window_to_front cdp.bring_to_front = CDPM.bring_active_window_to_front cdp.get_active_element = CDPM.get_active_element @@ -684,6 +685,7 @@ def uc_open_with_cdp_mode(driver, url=None): cdp.select_if_unselected = CDPM.select_if_unselected cdp.unselect_if_selected = CDPM.unselect_if_selected cdp.is_checked = CDPM.is_checked + cdp.is_selected = CDPM.is_selected cdp.is_element_present = CDPM.is_element_present cdp.is_element_visible = CDPM.is_element_visible cdp.wait_for_element_visible = CDPM.wait_for_element_visible @@ -699,6 +701,8 @@ def uc_open_with_cdp_mode(driver, url=None): cdp.assert_url_contains = CDPM.assert_url_contains cdp.assert_text = CDPM.assert_text cdp.assert_exact_text = CDPM.assert_exact_text + cdp.assert_true = CDPM.assert_true + cdp.assert_false = CDPM.assert_false cdp.scroll_into_view = CDPM.scroll_into_view cdp.scroll_to_y = CDPM.scroll_to_y cdp.scroll_to_top = CDPM.scroll_to_top @@ -1167,7 +1171,12 @@ def _uc_gui_click_captcha( frame = "%s div" % frame elif ( driver.is_element_present('[name*="cf-turnstile-"]') - and driver.is_element_present('[class*=spacer] + div div') + and driver.is_element_present("#challenge-form div > div") + ): + frame = "#challenge-form div > div" + elif ( + driver.is_element_present('[name*="cf-turnstile-"]') + and driver.is_element_present("[class*=spacer] + div div") ): frame = '[class*=spacer] + div div' elif ( @@ -1240,8 +1249,8 @@ def _uc_gui_click_captcha( return try: if ctype == "g_rc" and not driver.is_connected(): - x = (i_x + 32) * width_ratio - y = (i_y + 34) * width_ratio + x = (i_x + 29) * width_ratio + y = (i_y + 35) * width_ratio elif visible_iframe: selector = "span" if ctype == "g_rc": @@ -1256,8 +1265,8 @@ def _uc_gui_click_captcha( y = i_y + element.rect["y"] + (element.rect["height"] / 2.0) y += 0.5 else: - x = (i_x + 34) * width_ratio - y = (i_y + 34) * width_ratio + x = (i_x + 32) * width_ratio + y = (i_y + 32) * width_ratio if driver.is_connected(): driver.switch_to.default_content() except Exception: @@ -1497,6 +1506,7 @@ def _uc_gui_handle_captcha_(driver, frame="iframe", ctype=None): tab_count += 1 time.sleep(0.027) active_element_css = js_utils.get_active_element_css(driver) + print(active_element_css) if ( active_element_css.startswith(selector) or active_element_css.endswith(" > div" * 2) @@ -1514,7 +1524,10 @@ def _uc_gui_handle_captcha_(driver, frame="iframe", ctype=None): except Exception: return if ( - driver.is_element_present(".footer .clearfix .ray-id") + ( + driver.is_element_present(".footer .clearfix .ray-id") + or driver.is_element_present("script[data-cf-beacon]") + ) and hasattr(sb_config, "_saved_cf_tab_count") and sb_config._saved_cf_tab_count ): diff --git a/seleniumbase/core/sb_cdp.py b/seleniumbase/core/sb_cdp.py index ace9190bbe7..17c1683d4f7 100644 --- a/seleniumbase/core/sb_cdp.py +++ b/seleniumbase/core/sb_cdp.py @@ -1681,6 +1681,14 @@ def assert_exact_text( % (text, element.text_all, selector) ) + def assert_true(self, expression): + if not expression: + raise AssertionError("%s is not true") + + def assert_false(self, expression): + if expression: + raise AssertionError("%s is not false") + def scroll_into_view(self, selector): self.find_element(selector).scroll_into_view() self.loop.run_until_complete(self.page.wait()) diff --git a/seleniumbase/undetected/cdp_driver/browser.py b/seleniumbase/undetected/cdp_driver/browser.py index 882de93edca..cadb8fa73bf 100644 --- a/seleniumbase/undetected/cdp_driver/browser.py +++ b/seleniumbase/undetected/cdp_driver/browser.py @@ -6,8 +6,9 @@ import json import logging import os -import pickle import pathlib +import pickle +import re import shutil import urllib.parse import urllib.request @@ -644,7 +645,7 @@ async def get_all( """ connection = None for _tab in self._browser.tabs: - if _tab.closed: + if hasattr(_tab, "closed") and _tab.closed: continue connection = _tab break @@ -674,7 +675,7 @@ async def set_all(self, cookies: List[cdp.network.CookieParam]): """ connection = None for _tab in self._browser.tabs: - if _tab.closed: + if hasattr(_tab, "closed") and _tab.closed: continue connection = _tab break @@ -698,13 +699,11 @@ async def save(self, file: PathLike = ".session.dat", pattern: str = ".*"): - Contain "nowsecure" :type pattern: str """ - import re - pattern = re.compile(pattern) save_path = pathlib.Path(file).resolve() connection = None for _tab in self._browser.tabs: - if _tab.closed: + if hasattr(_tab, "closed") and _tab.closed: continue connection = _tab break @@ -746,15 +745,13 @@ async def load(self, file: PathLike = ".session.dat", pattern: str = ".*"): - Contain "nowsecure" :type pattern: str """ - import re - pattern = re.compile(pattern) save_path = pathlib.Path(file).resolve() cookies = pickle.load(save_path.open("r+b")) included_cookies = [] connection = None for _tab in self._browser.tabs: - if _tab.closed: + if hasattr(_tab, "closed") and _tab.closed: continue connection = _tab break @@ -779,7 +776,7 @@ async def clear(self): """ connection = None for _tab in self._browser.tabs: - if _tab.closed: + if hasattr(_tab, "closed") and _tab.closed: continue connection = _tab break From 68a51565f4ac884352b710c1222facfc5a743d42 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 27 Nov 2024 19:35:23 -0500 Subject: [PATCH 2/6] Add a new CDP Mode example --- examples/cdp_mode/raw_demo_site.py | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 examples/cdp_mode/raw_demo_site.py diff --git a/examples/cdp_mode/raw_demo_site.py b/examples/cdp_mode/raw_demo_site.py new file mode 100644 index 00000000000..2c0dda51404 --- /dev/null +++ b/examples/cdp_mode/raw_demo_site.py @@ -0,0 +1,72 @@ +"""Example of using various CDP Mode commands""" +from seleniumbase import SB + +with SB(uc=True, test=True) as sb: + url = "https://seleniumbase.io/demo_page" + sb.activate_cdp_mode(url) + + # Assert various things + sb.cdp.assert_title("Web Testing Page") + sb.cdp.assert_element("tbody#tbodyId") + sb.cdp.assert_text("Demo Page", "h1") + + # Type text into various text fields and then assert + sb.cdp.type("#myTextInput", "This is Automated") + sb.cdp.type("textarea.area1", "Testing Time!\n") + sb.cdp.type('[name="preText2"]', "Typing Text!") + sb.cdp.assert_text("This is Automated", "#myTextInput") + sb.cdp.assert_text("Testing Time!\n", "textarea.area1") + sb.cdp.assert_text("Typing Text!", '[name="preText2"]') + + # Hover & click a dropdown element and assert results + sb.cdp.assert_text("Automation Practice", "h3") + sb.cdp.gui_hover_and_click("#myDropdown", "#dropOption2") + sb.cdp.assert_text("Link Two Selected", "h3") + + # Click a button and then verify the expected results + sb.cdp.assert_text("This Text is Green", "#pText") + sb.cdp.click('button:contains("Click Me")') + sb.cdp.assert_text("This Text is Purple", "#pText") + + # Verify that a slider control updates a progress bar + sb.cdp.assert_element('progress[value="50"]') + sb.cdp.set_value("input#mySlider", "100") + sb.cdp.assert_element('progress[value="100"]') + + # Verify that a "select" option updates a meter bar + sb.cdp.assert_element('meter[value="0.25"]') + sb.cdp.select_option_by_text("#mySelect", "Set to 75%") + sb.cdp.assert_element('meter[value="0.75"]') + + # Verify that clicking a radio button selects it + sb.cdp.assert_false(sb.cdp.is_selected("#radioButton2")) + sb.cdp.click("#radioButton2") + sb.cdp.assert_true(sb.cdp.is_selected("#radioButton2")) + + # Verify that clicking a checkbox makes it selected + sb.cdp.assert_element_not_visible("img#logo") + sb.cdp.assert_false(sb.cdp.is_selected("#checkBox1")) + sb.cdp.click("#checkBox1") + sb.cdp.assert_true(sb.cdp.is_selected("#checkBox1")) + sb.cdp.assert_element("img#logo") + + # Verify clicking on multiple elements with one call + sb.cdp.assert_false(sb.cdp.is_selected("#checkBox2")) + sb.cdp.assert_false(sb.cdp.is_selected("#checkBox3")) + sb.cdp.assert_false(sb.cdp.is_selected("#checkBox4")) + sb.cdp.click_visible_elements("input.checkBoxClassB") + sb.cdp.assert_true(sb.cdp.is_selected("#checkBox2")) + sb.cdp.assert_true(sb.cdp.is_selected("#checkBox3")) + sb.cdp.assert_true(sb.cdp.is_selected("#checkBox4")) + + # Verify Drag and Drop + sb.cdp.assert_element_not_visible("div#drop2 img#logo") + sb.cdp.gui_drag_and_drop("img#logo", "div#drop2") + sb.cdp.assert_element("div#drop2 img#logo") + + # Click inside an iframe and test highlighting + sb.cdp.flash("iframe#myFrame3") + sb.cdp.sleep(1) + sb.cdp.nested_click("iframe#myFrame3", ".fBox") + sb.cdp.sleep(0.5) + sb.cdp.highlight("iframe#myFrame3") From b80af58e29cf1481d18200a24e6b3b2eb8a954f2 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 27 Nov 2024 19:36:17 -0500 Subject: [PATCH 3/6] Update presentations --- examples/presenter/uc_presentation.py | 1 + examples/presenter/uc_presentation_3.py | 1 + examples/presenter/uc_presentation_4.py | 1070 +++++++++++++++++++++++ 3 files changed, 1072 insertions(+) create mode 100644 examples/presenter/uc_presentation_4.py diff --git a/examples/presenter/uc_presentation.py b/examples/presenter/uc_presentation.py index c52dc7ad328..554cefcfa90 100644 --- a/examples/presenter/uc_presentation.py +++ b/examples/presenter/uc_presentation.py @@ -1,3 +1,4 @@ +# https://www.youtube.com/watch?v=5dMFI3e85ig import os import subprocess from contextlib import suppress diff --git a/examples/presenter/uc_presentation_3.py b/examples/presenter/uc_presentation_3.py index 1b66fef2090..ac9f6b3e610 100644 --- a/examples/presenter/uc_presentation_3.py +++ b/examples/presenter/uc_presentation_3.py @@ -1,3 +1,4 @@ +# https://www.youtube.com/watch?v=-EpZlhGWo9k import sys from contextlib import suppress from seleniumbase import BaseCase diff --git a/examples/presenter/uc_presentation_4.py b/examples/presenter/uc_presentation_4.py new file mode 100644 index 00000000000..b5700836cb4 --- /dev/null +++ b/examples/presenter/uc_presentation_4.py @@ -0,0 +1,1070 @@ +# https://www.youtube.com/watch?v=Mr90iQmNsKM +from contextlib import suppress +from seleniumbase import BaseCase +from seleniumbase import SB +BaseCase.main(__name__, __file__) + + +class UCPresentationClass(BaseCase): + def test_presentation_4(self): + self.open("data:,") + self.set_window_position(4, 40) + self._output_file_saves = False + self.create_presentation(theme="serif", transition="fade") + self.add_slide( + '' + ) + self.add_slide( + "

This continues my Undetectable Automation series:

" + '' + ) + self.begin_presentation(filename="uc_presentation.html") + + with suppress(Exception): + self.open("https://www.bostoncodecamp.com/CC37/info") + self.create_tour(theme="hopscotch") + self.add_tour_step( + "

Good Afternoon and Welcome!

", 'h1.wow' + ) + self.add_tour_step( + "

PSA: Visit our sponsors later.

", + '[href*="/Sponsors"]', + ) + self.add_tour_step( + "

Let's check out the schedule...

", + '[href*="/Schedule/SessionGrid"]' + ) + self.play_tour() + + with suppress(Exception): + self.open( + "https://www.bostoncodecamp.com/CC37/Schedule/SessionGrid" + ) + self.highlight("h2", loops=8) + if self.is_element_visible('[data-sessionid="765448"]'): + self.highlight('div[data-sessionid="765448"]', loops=10) + self.create_tour(theme="driverjs") + self.add_tour_step( + "

Here we are

", '[data-sessionid="765448"]' + ) + self.play_tour() + self.click('a[onclick*="765448"]') + self.create_tour(theme="hopscotch") + self.add_tour_step( + "

What to expect

", + "div.sz-modal-session", + alignment="left", + ) + self.play_tour() + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Last time...

" + ) + self.add_slide( + "

Last time...

" + '' + ) + self.add_slide( + "

This time...

" + '' + ) + self.add_slide( + "Note: There are different kinds of reCAPTCHA,
" + "and not all of them are created equal.
" + '' + ) + self.add_slide( + "

This is what happens when you fail reCAPTCHA:

" + '' + ) + self.add_slide( + "

This is what happens when you fail hCAPTCHA:

" + '' + ) + self.add_slide( + "

If you like puppies, hCAPTCHA has you covered:

" + '' + ) + self.add_slide( + "

This is what happens when some anti-bots detect you:

" + '' + ) + self.add_slide( + "

And this is what happens when Gandalf blocks you:

" + '' + ) + self.add_slide( + "

No joke... There's a Hobbit CAPTCHA

" + '' + ) + self.add_slide( + "

Important Notice:

" + "(Know the laws and legal implications!)" + '' + ) + self.add_slide( + "

🔹 By the end of this presentation... 🔹



" + "✅ You'll learn which anti-bot systems work,
" + "and which ones don't. (Hint: Most don't work.)

" + "✅ There will be multiple live demos." + "

" + "✅ You'll learn how to bypass weak defenses." + "

" + "✅ You'll learn powerful web-scraping techniques." + "

" + ) + self.add_slide( + "

But first, a little about me...

" + '' + ) + self.add_slide( + "

About me: (Michael Mintz)

\n" + "
    \n" + "
  • I created the SeleniumBase framework." + "
  • \n" + "
  • I lead the Automation Team at iboss." + "
  • \n" + "
", + image="https://seleniumbase.io/other/iboss_booth.png", + ) + self.add_slide( + "

In my spare time,

" + "

I may be found...

" + ) + self.add_slide( + "

Spending time with entrepreneurs...

" + '' + ) + self.add_slide( + "

Spending time with celebrities...

" + '' + ) + self.add_slide( + "

Spending time with politicians...

" + '' + ) + self.add_slide( + "

Spending time with philanthropists...

" + '' + ) + self.add_slide( + "

Speaking at conferences...

" + '' + ) + self.add_slide( + "

Attending conferences as a guest...

" + '' + ) + self.add_slide( + "

Jet-skiing in Key West...

" + '' + ) + self.add_slide( + "

And working on SeleniumBase...

" + '' + ) + self.add_slide( + "

Enough about me...


" + "

Let's begin the presentation!



" + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + '' + "
" + ) + self.add_slide( + '' + ) + self.add_slide( + "Playwright using CDP" + '' + ) + self.add_slide( + "Selenium using CDP" + '' + ) + self.add_slide( + "

Microsoft still supports Selenium,
" + "even though they have Playwright.

" + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "As a birthday gift, BrightData invested a lot of money into " + "Selenium (making them an official sponsor)." + '' + ) + self.add_slide( + "That's great news for the Selenium community!" + '' + ) + self.add_slide( + "Now, let's get back to CDP..." + '' + ) + self.add_slide( + "There are lots of GitHub repos using CDP.
" + "(This repo tracks some of them)" + '' + ) + self.add_slide( + "The first major Python implementation of CDP:" + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "PyCDP was the key ingredient to stealthy automation." + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "In addition to using CDP for controlling Chrome in a" + " stealthy way, you can also achieve stealth by using" + " tools that can control the mouse and keyboard.

" + "PyAutoGUI is one such tool:" + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "PyAutoGUI requires a headed browser to work.

" + " Since most Linux machines have headless displays that" + " don't support headed browsers, an external tool called" + " Xvfb must be used in order to simulate a headed browser" + " in a headless Linux environment..." + ) + self.add_slide( + '' + ) + self.add_slide( + "

To have a completely stealthy framework" + " for clicking CAPTCHAs & bypassing anti-bot systems," + " you need:


" + "

    \n" + '
  • A framework that uses a "regular" browser
    ' + '(to hide evidence of automation activity)' + '
    ' + "

  • \n" + "
  • CDP capabilities for performing stealthy actions" + "

  • \n" + "
  • PyAutoGUI for performing tricky actions
    " + "(eg. clicking Shadow-root CAPTCHAs)
    " + "

  • \n" + "
  • Xvfb integration for headless Linux systems" + "
  • \n

\n" + ) + self.add_slide( + "SeleniumBase CDP Mode simplifies all that for you:

" + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "List of sites with their invisible anti-bot services:" + '' + ) + self.add_slide( + "

Let's get started with live demos of bypassing" + " physical CAPTCHAs:

" + ) + self.begin_presentation(filename="uc_presentation.html") + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up first...




" + "planetminecraft.com/account/sign_in/" + "





" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en") as sb: + url = "www.planetminecraft.com/account/sign_in/" + sb.activate_cdp_mode(url) + sb.sleep(2) + sb.cdp.gui_click_element("#turnstile-widget div") + sb.sleep(2) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "cloudflare.com/login" + "





" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en") as sb: + url = "https://www.cloudflare.com/login" + sb.activate_cdp_mode(url) + sb.sleep(3) + sb.uc_gui_click_captcha() + sb.sleep(2.5) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "gitlab.com/users/sign_in" + "





" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en") as sb: + url = "https://gitlab.com/users/sign_in" + sb.activate_cdp_mode(url) + sb.sleep(2) + sb.uc_gui_click_captcha() + sb.assert_text("Username", '[for="user_login"]', timeout=3) + sb.assert_element('label[for="user_login"]') + sb.highlight('button:contains("Sign in")') + sb.highlight('h1:contains("GitLab.com")') + sb.post_message("SeleniumBase wasn't detected", duration=8) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

The code for the previous live demo:

" + "

", + code=( + "from seleniumbase import SB\n\n" + "with SB(uc=True) as sb:\n" + ' url = "https://gitlab.com/users/sign_in"\n' + " sb.activate_cdp_mode(url)\n" + " sb.sleep(2)\n" + " sb.uc_gui_click_captcha()\n\n" + " ...\n\n\n\n\n" + ), + ) + self.add_slide( + "

The code for the previous live demo:

" + "

", + code=( + "from seleniumbase import SB\n\n" + "with SB(uc=True) as sb:\n" + ' url = "https://gitlab.com/users/sign_in"\n' + " sb.activate_cdp_mode(url)\n" + " sb.sleep(2)\n" + " sb.uc_gui_click_captcha()\n\n" + ' sb.assert_text("Username", \'[for="user_login"]\',' + ' timeout=3)\n' + ' sb.assert_element(\'[for="user_login"]\')\n' + ' sb.set_messenger_theme(location="bottom_center")' + '\n' + ' sb.post_message("SeleniumBase wasn\'t detected!")' + '\n' + ), + ) + self.add_slide( + "

Up next...




" + "bing.com/turing/captcha/challenge" + "





" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en") as sb: + url = "https://www.bing.com/turing/captcha/challenge" + sb.activate_cdp_mode(url) + sb.sleep(1) + sb.uc_gui_click_captcha() + sb.sleep(2.5) + + self.create_presentation(theme="serif", transition="none") + self.add_slide("

Having fun yet?!?

") + self.add_slide( + "

If you're not yet concerned about online security,
" + " then you probably need to see more live demos...

") + self.add_slide( + "

Time for live demos of bypassing
" + "some invisible anti-bot services:

" + ) + self.add_slide( + "

Up next...




" + "pokemon.com/us" + "


" + "(Protected by Imperva / Incapsula)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en", ad_block=True) as sb: + url = "https://www.pokemon.com/us" + sb.activate_cdp_mode(url) + sb.sleep(3.2) + sb.cdp.click("button#onetrust-accept-btn-handler") + sb.sleep(1.2) + sb.cdp.click("a span.icon_pokeball") + sb.sleep(2.5) + sb.cdp.click('b:contains("Show Advanced Search")') + sb.sleep(2.5) + sb.cdp.click('span[data-type="type"][data-value="electric"]') + sb.sleep(0.5) + sb.scroll_into_view("a#advSearch") + sb.sleep(0.5) + sb.cdp.mouse_click("a#advSearch") + sb.sleep(1.2) + sb.cdp.click('img[src*="img/pokedex/detail/025.png"]') + sb.cdp.assert_text("Pikachu", 'div[class*="title"]') + sb.cdp.assert_element('img[alt="Pikachu"]') + sb.cdp.scroll_into_view("div.pokemon-ability-info") + sb.sleep(1.2) + sb.cdp.flash('div[class*="title"]') + sb.cdp.flash('img[alt="Pikachu"]') + sb.cdp.flash("div.pokemon-ability-info") + name = sb.cdp.get_text("label.styled-select") + info = sb.cdp.get_text("div.version-descriptions p.active") + print("\n\n*** %s: ***\n* %s" % (name, info)) + sb.sleep(2) + sb.cdp.highlight_overlay("div.pokemon-ability-info") + sb.sleep(2) + sb.cdp.click('a[href="https://www.pokemon.com/us/play-pokemon/"]') + sb.sleep(0.6) + sb.cdp.click('h3:contains("Find an Event")') + location = "Concord, MA, USA" + sb.cdp.type('input[data-testid="location-search"]', location) + sb.sleep(1.5) + sb.cdp.click( + "div.autocomplete-dropdown-container div.suggestion-item" + ) + sb.sleep(0.6) + sb.cdp.click('img[alt="search-icon"]') + sb.sleep(2) + events = sb.cdp.select_all('div[data-testid="event-name"]') + print("\n*** Pokemon events near %s: ***" % location) + for event in events: + print("* " + event.text) + sb.sleep(2) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "walmart.com" + "


" + "(Protected by Akamai + PerimeterX)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en", ad_block=True) as sb: + url = "https://www.walmart.com/" + sb.activate_cdp_mode(url) + sb.sleep(2.5) + sb.cdp.mouse_click('input[aria-label="Search"]') + sb.sleep(1.2) + search = "Settlers of Catan Board Game" + required_text = "Catan" + sb.cdp.press_keys('input[aria-label="Search"]', search + "\n") + sb.sleep(3.8) + print('\n\n*** Walmart Search for "%s":' % search) + print(' (Results must contain "%s".)' % required_text) + unique_item_text = [] + items = sb.cdp.find_elements('div[data-testid="list-view"]') + for item in items: + if required_text in item.text: + description = item.querySelector( + '[data-automation-id="product-price"] + span' + ) + if ( + description + and description.text not in unique_item_text + ): + unique_item_text.append(description.text) + print("* " + description.text) + price = item.querySelector( + '[data-automation-id="product-price"]' + ) + if price: + price_text = price.text + price_text = ( + price_text.split("current price Now ")[-1] + ) + price_text = price_text.split("current price ")[-1] + price_text = price_text.split(" ")[0] + print(" (" + price_text + ")") + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "albertsons.com/recipes/" + "


" + "(Protected by Imperva / Incapsula)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en") as sb: + url = "https://www.albertsons.com/recipes/" + sb.activate_cdp_mode(url) + sb.sleep(2.5) + sb.remove_element("div > div > article") + sb.cdp.scroll_into_view('input[type="search"]') + sb.cdp.click_if_visible("button.banner-close-button") + sb.cdp.click("input#search-suggestion-input") + sb.sleep(0.2) + search = "Avocado Smoked Salmon" + required_text = "Salmon" + sb.cdp.press_keys("input#search-suggestion-input", search) + sb.sleep(0.8) + sb.cdp.click("#suggestion-0 a span") + sb.sleep(3.2) + sb.cdp.click_if_visible("button.banner-close-button") + sb.sleep(1.2) + print('\n\n*** Albertsons Search for "%s":' % search) + print(' (Results must contain "%s".)' % required_text) + unique_item_text = [] + item_selector = 'a[href*="/meal-plans-recipes/shop/"]' + info_selector = 'span[data-test-id*="recipe-thumb-title"]' + items = sb.cdp.find_elements( + "%s %s" % (item_selector, info_selector) + ) + for item in items: + sb.sleep(0.03) + item.scroll_into_view() + sb.sleep(0.025) + if required_text in item.text: + item.flash(color="44CC88") + sb.sleep(0.025) + if item.text not in unique_item_text: + unique_item_text.append(item.text) + print("* " + item.text) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "easyjet.com/en/" + "


" + "(Protected by Akamai)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en", ad_block=True) as sb: + url = "https://www.easyjet.com/en/" + sb.activate_cdp_mode(url) + sb.sleep(2.5) + sb.cdp.click_if_visible("button#ensCloseBanner") + sb.sleep(1.2) + sb.cdp.click('input[name="from"]') + sb.sleep(1.2) + sb.cdp.type('input[name="from"]', "London") + sb.sleep(0.6) + sb.cdp.click_if_visible("button#ensCloseBanner") + sb.sleep(0.6) + sb.cdp.click('span[data-testid="airport-name"]') + sb.sleep(1.2) + sb.cdp.type('input[name="to"]', "Venice") + sb.sleep(1.2) + sb.cdp.click('span[data-testid="airport-name"]') + sb.sleep(1.2) + sb.cdp.click('input[name="when"]') + sb.sleep(1.2) + sb.cdp.click( + '[data-testid="month"]:last-of-type' + ' [aria-disabled="false"]' + ) + sb.sleep(1.2) + sb.cdp.click( + '[data-testid="month"]:last-of-type' + ' [aria-disabled="false"]' + ) + sb.sleep(1.2) + sb.cdp.click('button[data-testid="submit"]') + sb.sleep(3.5) + sb.connect() + sb.sleep(2.5) + for window in sb.driver.window_handles: + sb.switch_to_window(window) + if "/buy/flights" in sb.get_current_url(): + break + sb.click_if_visible("button#ensCloseBanner") + days = sb.find_elements('div[class*="FlightGridLayout_column"]') + for day in days: + if not day.text.strip(): + continue + print( + "\n\n**** " + " ".join(day.text.split("\n")[0:2]) + " ****" + ) + fares = day.find_elements( + "css selector", 'button[class*="flightDet"]' + ) + if not fares: + print("No flights today!") + for fare in fares: + info = fare.text + info = info.replace("LOWEST FARE\n", "") + info = info.replace("\n", " ") + print(info) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "hyatt.com" + "


" + "(Protected by Kasada)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en", ad_block=True) as sb: + url = "https://www.hyatt.com/" + sb.activate_cdp_mode(url) + sb.sleep(2.5) + sb.cdp.click_if_visible('button[aria-label="Close"]') + sb.sleep(1) + sb.cdp.click('span:contains("Explore")') + sb.sleep(1) + sb.cdp.click('a:contains("Hotels & Resorts")') + sb.sleep(3) + location = "Anaheim, CA, USA" + sb.cdp.press_keys("input#searchbox", location) + sb.sleep(2) + sb.cdp.click("div#suggestion-list ul li a") + sb.sleep(1) + sb.cdp.click('div.hotel-card-footer button') + sb.sleep(1) + sb.cdp.click('button[data-locator="find-hotels"]') + sb.sleep(5) + hotel_names = sb.cdp.select_all( + 'div[data-booking-status="BOOKABLE"]' + ' [class*="HotelCard_header"]' + ) + hotel_prices = sb.cdp.select_all( + 'div[data-booking-status="BOOKABLE"] div.rate' + ) + sb.assert_true(len(hotel_names) == len(hotel_prices)) + print("\n\nHyatt Hotels in %s:" % location) + print("(" + sb.cdp.get_text("ul.b-color_text-white") + ")") + if len(hotel_names) == 0: + print("No availability over the selected dates!") + for i, hotel in enumerate(hotel_names): + with suppress(Exception): + print( + "* %s: %s => %s" + % (i + 1, hotel.text, hotel_prices[i].text) + ) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "bestwestern.com/en_US.html" + "


" + "(Protected by DataDome)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en", ad_block=True) as sb: + url = "https://www.bestwestern.com/en_US.html" + sb.activate_cdp_mode(url) + sb.sleep(2.5) + sb.cdp.click_if_visible("div.onetrust-close-btn-handler") + sb.sleep(1) + sb.cdp.click("input#destination-input") + sb.sleep(2) + location = "Palm Springs, CA, USA" + sb.cdp.press_keys("input#destination-input", location) + sb.sleep(1) + sb.cdp.click("ul#google-suggestions li") + sb.sleep(1) + sb.cdp.click("button#btn-modify-stay-update") + sb.sleep(4) + sb.cdp.click("label#available-label") + sb.sleep(2.5) + print("\n\nBest Western Hotels in %s:" % location) + summary_details = sb.cdp.get_text("#summary-details-column") + dates = summary_details.split("ROOM")[0].split("DATES")[-1].strip() + print("(Dates: %s)" % dates) + flip_cards = sb.cdp.select_all(".flipCard") + for i, flip_card in enumerate(flip_cards): + hotel = flip_card.query_selector(".hotelName") + price = flip_card.query_selector(".priceSection") + if hotel and price: + print("* %s: %s => %s" % ( + i + 1, hotel.text.strip(), price.text.strip()) + ) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "priceline.com" + "


" + "(Protected by DataDome)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en", ad_block=True) as sb: + window_handle = sb.driver.current_window_handle + url = "https://www.priceline.com" + sb.activate_cdp_mode(url) + sb.sleep(2.5) + sb.cdp.click('input[name="endLocation"]') + sb.sleep(1) + location = "Portland, OR, USA" + selection = "Oregon, United States" # (Dropdown option) + sb.cdp.press_keys('input[name="endLocation"]', location) + sb.sleep(1) + sb.click_if_visible('input[name="endLocation"]') + sb.sleep(0.5) + sb.cdp.click(selection) + sb.sleep(1.5) + sb.cdp.click('button[aria-label="Dismiss calendar"]') + sb.sleep(4.5) + sb.connect() + if len(sb.driver.window_handles) > 1: + sb.switch_to_window(window_handle) + sb.driver.close() + sb.sleep(0.2) + sb.switch_to_newest_window() + sb.sleep(0.6) + sb.sleep(0.8) + for y in range(1, 9): + sb.scroll_to_y(y * 400) + sb.sleep(1.25) + hotel_names = sb.find_elements( + 'a[data-autobot-element-id*="HOTEL_NAME"]' + ) + hotel_prices = sb.find_elements('span[font-size="4,,,5"]') + print("\n\nPriceline Hotels in %s:" % location) + print(sb.get_text('[data-testid="POPOVER-DATE-PICKER"]')) + if len(hotel_names) == 0: + print("No availability over the selected dates!") + count = 0 + for i, hotel in enumerate(hotel_names): + if hotel_prices[i] and hotel_prices[i].text: + count += 1 + hotel_price = "$" + hotel_prices[i].text + print("* %s: %s => %s" % (count, hotel.text, hotel_price)) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "

Up next...




" + "nike.com" + "


" + "(Protected by Shape Security)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en", ad_block=True) as sb: + url = "https://www.nike.com/" + sb.activate_cdp_mode(url) + sb.sleep(2.5) + sb.cdp.gui_click_element('div[data-testid="user-tools-container"]') + sb.sleep(1.5) + search = "Nike Air Force 1" + sb.cdp.press_keys('input[type="search"]', search) + sb.sleep(4) + elements = sb.cdp.select_all( + 'ul[data-testid*="products"] figure .details' + ) + if elements: + print('\n\n**** Found results for "%s": ****' % search) + for element in elements: + print("* " + element.text) + sb.sleep(2) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

Up next...




" + "nordstrom.com" + "


" + "(Protected by Shape Security)" + "


" + ) + self.begin_presentation(filename="uc_presentation.html") + + with SB(uc=True, test=True, locale_code="en") as sb: + url = "https://www.nordstrom.com/" + sb.activate_cdp_mode(url) + sb.sleep(2.2) + sb.cdp.click("input#keyword-search-input") + sb.sleep(0.8) + search = "cocktail dresses for women teal" + sb.cdp.press_keys("input#keyword-search-input", search + "\n") + sb.sleep(2.2) + for i in range(16): + sb.cdp.scroll_down(16) + sb.sleep(0.16) + print('\n\n*** Nordstrom Search for "%s":' % search) + unique_item_text = [] + items = sb.cdp.find_elements("article") + for item in items: + description = item.querySelector("article h3") + if description and description.text not in unique_item_text: + unique_item_text.append(description.text) + price_text = "" + price = item.querySelector( + 'div div span[aria-hidden="true"]' + ) + if price: + price_text = price.text + print("* %s (%s)" % (description.text, price_text)) + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

CDP is powerful, as you can see.

" + "

(Especially when used for stealth!)

" + '' + ) + self.add_slide( + "

Out of the following 9 anti-bot defense systems...

" + '' + ) + self.add_slide( + "

These are weak: (Can't detect stealthy CDP)

" + '' + ) + self.add_slide( + "

And these are strong: (CDP is detected)

" + '' + ) + self.add_slide( + "

What is Microsoft's stance
on stealthy CDP?
" + "



Officially...

" + ) + self.add_slide( + '' + ) + self.add_slide( + "

What is Microsoft's stance
on stealthy CDP?

" + "

Unofficially...

" + ) + self.add_slide( + "

There are external repos
using Playwright for stealth." + "



" + "And Microsoft employees are
endorsing them via GitHub Stars." + "

" + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "And steathy CDP works well in GitHub Actions." + '' + ) + self.add_slide( + "Why does stealthy CDP work in GitHub Actions,
" + "but not in other kinds of services like AWS?" + ) + self.add_slide( + "

Answer:

" + 'GitHub Actions runs in a
' + '"residential IP address" space!' + "

" + ) + self.add_slide( + '' + ) + self.add_slide( + "

People can use residential proxies
" + "to get a residential IP address.

" + ) + self.add_slide( + "

Legal info:

" + '' + ) + self.add_slide("

To summarize that...

") + self.add_slide( + "

Scraping public data is probably legal.
" + "(Think Light Side)

" + '' + ) + self.add_slide( + "

Scraping private data is probably NOT legal.
" + "(Think Dark Side)

" + '' + ) + self.add_slide( + "

If you break local and/or international laws,
" + "then bounty hunters may come after you.

" + '' + ) + self.add_slide( + "

Let's get back to SeleniumBase

" + '' + ) + self.add_slide( + "

SeleniumBase includes a special Chrome extension:

" + '

The "Recorder"

' + "

(You can generate complete scripts with it.)

" + "

sbase recorder --uc

" + '' + ) + self.add_slide( + '' + ) + self.begin_presentation(filename="uc_presentation.html") + + # import sys + # from seleniumbase.console_scripts import sb_recorder + # sys.argv.append("--uc") + # sb_recorder.main() + + self.create_presentation(theme="serif", transition="none") + self.add_slide( + "

How does one make an automation Recorder?

" + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + '' + ) + self.add_slide( + "And that's the secret to building a test recorder!" + '' + ) + self.add_slide( + "Also note that there are more stealth CDP repos
" + "other than the ones that you have already seen." + '' + ) + self.add_slide( + "

👤 Field trip to the CDP Mode help docs 👤

" + "

" + "

Let's take a look at the CDP Mode docs
" + "from the SeleniumBase GitHub repo...

" + '' + '' + ) + self.add_slide( + "

Questions?

" + "https://github.com/seleniumbase/SeleniumBase/discussions" + "

" + "

📌 Found a bug? 🐞

" + "https://github.com/seleniumbase/SeleniumBase/issues" + "
" + ) + self.add_slide( + "

📊 Final remarks 📣



" + "

🛠️ SeleniumBase gives you 🛠️
" + "the tools you need to succeed!" + "


" + "And tools to build lots of bots..." + "


" + ) + self.add_slide( + "
🏁 The End 🏁
" + '' + ) + self.begin_presentation(filename="uc_presentation.html") From 47c2379136410f372a165dc1c1cf08383ddffd76 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 27 Nov 2024 19:36:45 -0500 Subject: [PATCH 4/6] Update the documentation --- examples/cdp_mode/ReadMe.md | 11 ++++++++++- help_docs/uc_mode.md | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/examples/cdp_mode/ReadMe.md b/examples/cdp_mode/ReadMe.md index d039f40ed15..aa08a70d7fb 100644 --- a/examples/cdp_mode/ReadMe.md +++ b/examples/cdp_mode/ReadMe.md @@ -2,7 +2,14 @@ ## [](https://github.com/seleniumbase/SeleniumBase/) CDP Mode 🐙 -🐙 SeleniumBase CDP Mode (Chrome Devtools Protocol Mode) is a special mode inside of SeleniumBase UC Mode that lets bots appear human while controlling the browser with the CDP-Driver. Although regular UC Mode can't perform WebDriver actions while the driver is disconnected from the browser, the CDP-Driver can still perform actions while maintaining its cover. (For Python 3.11 or newer!) +🐙 SeleniumBase CDP Mode (Chrome Devtools Protocol Mode) is a special mode inside of SeleniumBase UC Mode that lets bots appear human while controlling the browser with the CDP-Driver. Although regular UC Mode can't perform WebDriver actions while the driver is disconnected from the browser, the CDP-Driver can still perform actions while maintaining its cover. + +-------- + + +

(Watch the CDP Mode tutorial on YouTube! ▶️)

+ +-------- 👤 UC Mode avoids bot-detection by first disconnecting WebDriver from the browser at strategic times, calling special PyAutoGUI methods to bypass CAPTCHAs (as needed), and finally reconnecting the driver afterwards so that WebDriver actions can be performed again. Although this approach works for bypassing simple CAPTCHAs, more flexibility is needed for bypassing bot-detection on websites with advanced protection. (That's where CDP Mode comes in.) @@ -410,6 +417,8 @@ sb.cdp.assert_url(url) sb.cdp.assert_url_contains(substring) sb.cdp.assert_text(text, selector="html") sb.cdp.assert_exact_text(text, selector="html") +sb.cdp.assert_true() +sb.cdp.assert_false() sb.cdp.scroll_into_view(selector) sb.cdp.scroll_to_y(y) sb.cdp.scroll_to_top() diff --git a/help_docs/uc_mode.md b/help_docs/uc_mode.md index 60cad7c8d23..02921e894ea 100644 --- a/help_docs/uc_mode.md +++ b/help_docs/uc_mode.md @@ -23,6 +23,11 @@ ---- + +

(Watch the 4th UC Mode tutorial on YouTube! ▶️)

+ +---- + 👤 UC Mode is based on [undetected-chromedriver](https://github.com/ultrafunkamsterdam/undetected-chromedriver). UC Mode includes multiple updates, fixes, and improvements, such as: * Automatically changing user-agents to prevent detection. From 6311726d9f38ae1cb16133a77690f48e346f2ba6 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 27 Nov 2024 19:37:14 -0500 Subject: [PATCH 5/6] Refresh optional dependencies --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0f6788c33cf..0e72782b069 100755 --- a/setup.py +++ b/setup.py @@ -254,7 +254,7 @@ "pdfminer": [ 'pdfminer.six==20240706', 'cryptography==39.0.2;python_version<"3.9"', - 'cryptography==43.0.3;python_version>="3.9"', + 'cryptography==44.0.0;python_version>="3.9"', 'cffi==1.17.1', "pycparser==2.22", ], From 266fab820fdd539fc975169639ff0919b286e219 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 27 Nov 2024 19:37:37 -0500 Subject: [PATCH 6/6] Version 4.33.1 --- seleniumbase/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index 425f1365c70..ca10f028ed2 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.33.0" +__version__ = "4.33.1"