别着急,坐和放宽
使用社交账号登录
python -m venv .venv
source .venv/bin/activate
pip install patchright
pip install playwright_captcha
# 或使用 uv 安装
# uv init demo
# uv install playwright_captcha
# uv install patchright
#安装浏览器,当然你也可以不用安装 playwright 的浏览器,也可以使用系统自带的浏览器,通过 CDP 协议连接
# (.venv) D:\Dev\Blog-demo\playwright>patchright install -h
# Usage: playwright install [options] [browser...]
# ensure browsers necessary for this version of Playwright are installed
# Options:
# --with-deps install system dependencies for browsers
# --dry-run do not execute installation, only print information
# --list prints list of browsers from all playwright installations
# --force force reinstall of stable browser channels
# --only-shell only install headless shell when installing chromium
# --no-shell do not install chromium headless shell
# -h, --help display help for command
# Examples:
# - $ install
# Install default browsers.
# - $ install chrome firefox
# Install custom browsers, supports chromium, chromium-headless-shell, chromium-tip-of-tree-headless-shell, chrome, chrome-beta, msedge, msedge-beta, msedge-dev, bidi-chromium, firefox, webkit, webkit-wsl.
# 选择安装单个浏览器即可
patchright install chromium
from pathlib import Path
from patchright.async_api import BrowserContext, async_playwright, Page
from typing import Union
from playwright_captcha import ClickSolver, CaptchaType, FrameworkType
class Browser:
def __init__(self):
self._context: Union[BrowserContext, None] = None
@property
def context(self) -> BrowserContext:
if self._context is None:
raise RuntimeError("Browser context is not initialized.")
return self._context
async def start(self, user_data_dir: Path | str = Path.home() / ".my_browser_data"):
try:
pw = await async_playwright().start()
# Simulate starting the browser and creating a context
self._context = await pw.chromium.launch_persistent_context(
user_data_dir=user_data_dir, # 用户数据目录,用于持久化保存 Cookie、缓存等浏览器状态
headless=False, # 是否以无头模式运行(无界面)。注:某些防爬机制下,headless 模式更易被检测
chromium_sandbox=False, # 是否启用 Chromium 沙箱,禁用它可以减少在某些环境(如 Docker)下的运行限制
ignore_default_args=["--enable-automation"], # 忽略默认的自动化标志,防止网站检测到正在使用自动化工具
viewport={"width": 1920, "height": 1080}, # 设置浏览器视口的宽度和高度
has_touch=False, # 是否模拟支持触摸事件
is_mobile=False, # 是否模拟移动设备模式
handle_sighup=False, # 是否在接收到 SIGHUP 信号时关闭浏览器
handle_sigterm=False, # 是否在接收到 SIGTERM 信号时关闭浏览器
handle_sigint=False, # 是否在接收到 SIGINT 信号(如 Ctrl+C)时关闭浏览器
timezone_id="Asia/Shanghai", # 设置浏览器的时区
)
except Exception as e:
raise e
async def stop(self):
if self._context:
await self._context.close()
self._context = None
@classmethod
async def handle_patch(cls, page: Page):
solver = ClickSolver(
framework=FrameworkType.PLAYWRIGHT,
page=page,
max_attempts=5,
attempt_delay=8,
)
try:
await solver.prepare()
return solver
except Exception as e:
raise e
@classmethod
async def handle_turnstile(cls, page: Page, solver: ClickSolver):
try:
await solver.solve_captcha(
captcha_container=page, captcha_type=CaptchaType.CLOUDFLARE_TURNSTILE
)
except Exception as e:
... # 如果没有检测到验证码,则跳过
CDP 模式链接:
async def launch_by_cdp(self, cdp_endpoint: str = "http://127.0.0.1:9222"):
"""
需要事先启动浏览器,命令行示例:
MacOS:
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir="/path/to/your/custom/profile"
Windows:
C:\\Program\ Files\\Google\\Chrome\\Application\\chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\path\to\your\custom\profile"
Linux:
/usr/bin/google-chrome-stable --remote-debugging-port=9222 --user-data-dir="/path/to/your/custom/profile"
Args:
cdp_endpoint (_type_, optional): _description_. Defaults to "http://127.0.0.1:9222".
Raises:
e: _description_
"""
try:
pw = await async_playwright().start()
browser = await pw.chromium.connect_over_cdp(cdp_endpoint)
self._context = browser.contexts[0] # 假设只使用第一个上下文
except Exception as e:
raise e
import asyncio
from browser import Browser
async def main():
b = None
try:
b = Browser()
await b.start("./chrome_data")
page = await b.context.new_page()
# 关键
solver = await Browser.handle_patch(page)
await page.goto("https://www.example.com", wait_until="load")
await asyncio.sleep(2) # 等待页面加载完成
await Browser.handle_turnstile(page, solver) # 处理处理验证码
except Exception as e:
print(e)
finally:
if b is not None:
await b.stop()
if __name__ == "__main__":
asyncio.run(main())
我们在自动化测试或者需要与 Google 扩展交互时,通过老办法的方式就是直接打开chrome-extension://nkbihfbeogaeaoehlefnkodbefgpgknn/home.html#unlock页面,但是这种不太用好,因为有时弹窗的界面与打开的页面相互干扰,导致自动化程序可能会运行异常,我们可以通过下面方式监测任何新的页面打开:
你只需要在在期望打开新页面之后调用它,他就会返回新页面的Page对象,如果超出时间没有任何页面打开则会报TimeoutError。注意,这里使用了@asynccontextmanager装饰器,你需要使用async with语句调用,或者你可以修改这个函数。
通过以上代码,我们实现了在 Playwright 中自动处理验证码的功能。通过自定义的ClickSolver类,我们可以轻松地处理各种验证码,包括 Google reCAPTCHA 和 Cloudflare Turnstile。同时,我们还提供了Browser类来简化 Playwright 的使用,使得代码更加简洁和易读。
这个 Demo 只是演示如何处理 Cloudflare Turnstile 验证码,如果需要处理 Google reCAPTCHA 验证码,需要替换 captchatype=CaptchaType.CLOUDFLARETURNSTILE
@asynccontextmanager
async def handle_expect_page(self, timeout: int = 3000):
page: Union[Page, None] = None
try:
async with self.context.expect_page(timeout=timeout) as page_info:
page = await page_info.value
yield page
except Exception as e:
raise e
finally:
if page is not None:
await page.close()