|
| 1 | +# Scrappey - Official Python Wrapper |
| 2 | + |
| 3 | +[](https://badge.fury.io/py/scrappey) |
| 4 | +[](https://www.python.org/downloads/) |
| 5 | +[](https://opensource.org/licenses/MIT) |
| 6 | + |
| 7 | +Official Python wrapper for [Scrappey.com](https://scrappey.com) - Web scraping API with automatic Cloudflare bypass, antibot solving, captcha solving, and browser automation. |
| 8 | + |
| 9 | +## Features |
| 10 | + |
| 11 | +- **Cloudflare Bypass** - Automatically bypass Cloudflare protection |
| 12 | +- **Antibot Solving** - Handle Datadome, PerimeterX, Kasada, Akamai, and more |
| 13 | +- **Captcha Solving** - Automatic solving for reCAPTCHA, hCaptcha, Turnstile |
| 14 | +- **Browser Automation** - Full browser control with actions like click, type, scroll |
| 15 | +- **Session Management** - Maintain cookies and state across requests |
| 16 | +- **Proxy Support** - Built-in proxy rotation with country selection |
| 17 | +- **Async Support** - Both sync and async clients included |
| 18 | +- **Type Hints** - Full type annotations for IDE support and AI assistants |
| 19 | + |
| 20 | +## Installation |
| 21 | + |
| 22 | +```bash |
| 23 | +pip install scrappey |
| 24 | +``` |
| 25 | + |
| 26 | +## Quick Start |
| 27 | + |
| 28 | +```python |
| 29 | +from scrappey import Scrappey |
| 30 | + |
| 31 | +# Initialize with your API key |
| 32 | +scrappey = Scrappey(api_key="YOUR_API_KEY") |
| 33 | + |
| 34 | +# Simple GET request |
| 35 | +result = scrappey.get(url="https://example.com") |
| 36 | +print(result["solution"]["response"]) |
| 37 | + |
| 38 | +# Don't forget to close the client when done |
| 39 | +scrappey.close() |
| 40 | +``` |
| 41 | + |
| 42 | +Or use as a context manager: |
| 43 | + |
| 44 | +```python |
| 45 | +from scrappey import Scrappey |
| 46 | + |
| 47 | +with Scrappey(api_key="YOUR_API_KEY") as scrappey: |
| 48 | + result = scrappey.get(url="https://example.com") |
| 49 | + print(result["solution"]["statusCode"]) |
| 50 | +``` |
| 51 | + |
| 52 | +## Async Usage |
| 53 | + |
| 54 | +```python |
| 55 | +import asyncio |
| 56 | +from scrappey import AsyncScrappey |
| 57 | + |
| 58 | +async def main(): |
| 59 | + async with AsyncScrappey(api_key="YOUR_API_KEY") as scrappey: |
| 60 | + # Parallel requests |
| 61 | + urls = ["https://example1.com", "https://example2.com"] |
| 62 | + results = await asyncio.gather(*[ |
| 63 | + scrappey.get(url=url) for url in urls |
| 64 | + ]) |
| 65 | + for result in results: |
| 66 | + print(result["solution"]["statusCode"]) |
| 67 | + |
| 68 | +asyncio.run(main()) |
| 69 | +``` |
| 70 | + |
| 71 | +## Examples |
| 72 | + |
| 73 | +### Cloudflare Bypass |
| 74 | + |
| 75 | +```python |
| 76 | +result = scrappey.get( |
| 77 | + url="https://protected-site.com", |
| 78 | + cloudflareBypass=True, |
| 79 | + premiumProxy=True, |
| 80 | + proxyCountry="UnitedStates", |
| 81 | +) |
| 82 | + |
| 83 | +if result["data"] == "success": |
| 84 | + print("Successfully bypassed Cloudflare!") |
| 85 | + print(result["solution"]["response"]) |
| 86 | +``` |
| 87 | + |
| 88 | +### Session Management |
| 89 | + |
| 90 | +Sessions persist cookies and browser state across requests: |
| 91 | + |
| 92 | +```python |
| 93 | +# Create a session |
| 94 | +session = scrappey.create_session(proxyCountry="UnitedStates") |
| 95 | +session_id = session["session"] |
| 96 | + |
| 97 | +try: |
| 98 | + # All requests with this session share cookies |
| 99 | + scrappey.get(url="https://example.com/login", session=session_id) |
| 100 | + scrappey.get(url="https://example.com/dashboard", session=session_id) |
| 101 | +finally: |
| 102 | + # Clean up when done |
| 103 | + scrappey.destroy_session(session_id) |
| 104 | +``` |
| 105 | + |
| 106 | +### Browser Automation |
| 107 | + |
| 108 | +```python |
| 109 | +result = scrappey.browser_action( |
| 110 | + url="https://example.com/login", |
| 111 | + actions=[ |
| 112 | + {"type": "wait_for_selector", "cssSelector": "#login-form"}, |
| 113 | + {"type": "type", "cssSelector": "#email", "text": "user@example.com"}, |
| 114 | + {"type": "type", "cssSelector": "#password", "text": "password123"}, |
| 115 | + {"type": "click", "cssSelector": "#submit-btn", "waitForSelector": ".dashboard"}, |
| 116 | + {"type": "execute_js", "code": "document.querySelector('.user-name').innerText"}, |
| 117 | + ], |
| 118 | +) |
| 119 | + |
| 120 | +# Get JavaScript return values |
| 121 | +print(result["solution"]["javascriptReturn"]) |
| 122 | +``` |
| 123 | + |
| 124 | +### POST Requests |
| 125 | + |
| 126 | +```python |
| 127 | +# Form data |
| 128 | +result = scrappey.post( |
| 129 | + url="https://httpbin.org/post", |
| 130 | + postData="username=user&password=pass", |
| 131 | +) |
| 132 | + |
| 133 | +# JSON data |
| 134 | +result = scrappey.post( |
| 135 | + url="https://api.example.com/data", |
| 136 | + postData={"key": "value"}, |
| 137 | + customHeaders={"Content-Type": "application/json"}, |
| 138 | +) |
| 139 | +``` |
| 140 | + |
| 141 | +### Automatic Captcha Solving |
| 142 | + |
| 143 | +```python |
| 144 | +result = scrappey.get( |
| 145 | + url="https://site-with-captcha.com", |
| 146 | + automaticallySolveCaptchas=True, |
| 147 | + alwaysLoad=["recaptcha", "hcaptcha", "turnstile"], |
| 148 | +) |
| 149 | +``` |
| 150 | + |
| 151 | +### Screenshot Capture |
| 152 | + |
| 153 | +```python |
| 154 | +result = scrappey.screenshot( |
| 155 | + url="https://example.com", |
| 156 | + width=1920, |
| 157 | + height=1080, |
| 158 | +) |
| 159 | + |
| 160 | +# Save screenshot |
| 161 | +import base64 |
| 162 | +with open("screenshot.png", "wb") as f: |
| 163 | + f.write(base64.b64decode(result["solution"]["screenshot"])) |
| 164 | +``` |
| 165 | + |
| 166 | +### Using Request Builder Output |
| 167 | + |
| 168 | +Copy directly from the [Request Builder](https://app.scrappey.com/#/builder): |
| 169 | + |
| 170 | +```python |
| 171 | +result = scrappey.request({ |
| 172 | + "cmd": "request.get", |
| 173 | + "url": "https://example.com", |
| 174 | + "browserActions": [ |
| 175 | + {"type": "wait", "wait": 2000}, |
| 176 | + {"type": "scroll", "cssSelector": "footer"} |
| 177 | + ], |
| 178 | + "screenshot": True |
| 179 | +}) |
| 180 | +``` |
| 181 | + |
| 182 | +## API Reference |
| 183 | + |
| 184 | +### Scrappey Client |
| 185 | + |
| 186 | +```python |
| 187 | +Scrappey( |
| 188 | + api_key: str, # Your API key (required) |
| 189 | + base_url: str = "...", # API URL (optional) |
| 190 | + timeout: float = 300, # Request timeout in seconds |
| 191 | +) |
| 192 | +``` |
| 193 | + |
| 194 | +### Methods |
| 195 | + |
| 196 | +| Method | Description | |
| 197 | +|--------|-------------| |
| 198 | +| `get(url, **options)` | Perform GET request | |
| 199 | +| `post(url, postData, **options)` | Perform POST request | |
| 200 | +| `put(url, postData, **options)` | Perform PUT request | |
| 201 | +| `delete(url, **options)` | Perform DELETE request | |
| 202 | +| `patch(url, postData, **options)` | Perform PATCH request | |
| 203 | +| `request(options)` | Send request with full options dict | |
| 204 | +| `create_session(**options)` | Create a new session | |
| 205 | +| `destroy_session(session)` | Destroy a session | |
| 206 | +| `list_sessions()` | List all active sessions | |
| 207 | +| `is_session_active(session)` | Check if session is active | |
| 208 | +| `browser_action(url, actions, **options)` | Execute browser actions | |
| 209 | +| `screenshot(url, **options)` | Capture screenshot | |
| 210 | + |
| 211 | +### Common Options |
| 212 | + |
| 213 | +| Option | Type | Description | |
| 214 | +|--------|------|-------------| |
| 215 | +| `session` | str | Session ID for state persistence | |
| 216 | +| `proxy` | str | Custom proxy (http://user:pass@ip:port) | |
| 217 | +| `proxyCountry` | str | Proxy country (e.g., "UnitedStates") | |
| 218 | +| `premiumProxy` | bool | Use premium residential proxies | |
| 219 | +| `mobileProxy` | bool | Use mobile carrier proxies | |
| 220 | +| `cloudflareBypass` | bool | Enable Cloudflare bypass | |
| 221 | +| `datadomeBypass` | bool | Enable Datadome bypass | |
| 222 | +| `kasadaBypass` | bool | Enable Kasada bypass | |
| 223 | +| `automaticallySolveCaptchas` | bool | Auto-solve captchas | |
| 224 | +| `browserActions` | list | Browser automation actions | |
| 225 | +| `screenshot` | bool | Capture screenshot | |
| 226 | +| `cssSelector` | str | Extract content by CSS selector | |
| 227 | +| `customHeaders` | dict | Custom HTTP headers | |
| 228 | + |
| 229 | +### Response Structure |
| 230 | + |
| 231 | +```python |
| 232 | +{ |
| 233 | + "solution": { |
| 234 | + "verified": True, |
| 235 | + "response": "<html>...</html>", |
| 236 | + "statusCode": 200, |
| 237 | + "currentUrl": "https://example.com", |
| 238 | + "cookies": [...], |
| 239 | + "cookieString": "session=abc; token=xyz", |
| 240 | + "userAgent": "Mozilla/5.0...", |
| 241 | + "screenshot": "base64...", |
| 242 | + "javascriptReturn": [...], |
| 243 | + }, |
| 244 | + "timeElapsed": 1234, |
| 245 | + "data": "success", # or "error" |
| 246 | + "session": "session-id", |
| 247 | + "error": "error message if failed" |
| 248 | +} |
| 249 | +``` |
| 250 | + |
| 251 | +## Multi-Language Examples |
| 252 | + |
| 253 | +Examples are provided for: |
| 254 | + |
| 255 | +- **Python** - `examples/python/` |
| 256 | +- **Node.js** - `examples/nodejs/` |
| 257 | +- **TypeScript** - `examples/typescript/` |
| 258 | +- **Go** - `examples/go/` |
| 259 | +- **Java** - `examples/java/` |
| 260 | +- **C#** - `examples/csharp/` |
| 261 | +- **PHP** - `examples/php/` |
| 262 | +- **Ruby** - `examples/ruby/` |
| 263 | +- **Rust** - `examples/rust/` |
| 264 | +- **Kotlin** - `examples/kotlin/` |
| 265 | +- **cURL** - `examples/curl/` |
| 266 | + |
| 267 | +## Error Handling |
| 268 | + |
| 269 | +The API returns errors in the response body. Check the `data` field: |
| 270 | + |
| 271 | +```python |
| 272 | +result = scrappey.get(url="https://example.com") |
| 273 | + |
| 274 | +if result["data"] == "success": |
| 275 | + html = result["solution"]["response"] |
| 276 | +else: |
| 277 | + error = result.get("error", "Unknown error") |
| 278 | + print(f"Request failed: {error}") |
| 279 | +``` |
| 280 | + |
| 281 | +Client-side errors raise exceptions: |
| 282 | + |
| 283 | +```python |
| 284 | +from scrappey import ( |
| 285 | + ScrappeyError, |
| 286 | + ScrappeyConnectionError, |
| 287 | + ScrappeyTimeoutError, |
| 288 | + ScrappeyAuthenticationError, |
| 289 | +) |
| 290 | + |
| 291 | +try: |
| 292 | + result = scrappey.get(url="https://example.com") |
| 293 | +except ScrappeyConnectionError: |
| 294 | + print("Could not connect to API") |
| 295 | +except ScrappeyTimeoutError: |
| 296 | + print("Request timed out") |
| 297 | +except ScrappeyAuthenticationError: |
| 298 | + print("Invalid API key") |
| 299 | +except ScrappeyError as e: |
| 300 | + print(f"API error: {e}") |
| 301 | +``` |
| 302 | + |
| 303 | +## Links |
| 304 | + |
| 305 | +- **Website**: https://scrappey.com |
| 306 | +- **Documentation**: https://wiki.scrappey.com/getting-started |
| 307 | +- **Request Builder**: https://app.scrappey.com/#/builder |
| 308 | +- **API Reference**: https://wiki.scrappey.com/api-reference |
| 309 | +- **GitHub**: https://github.com/pim97/scrappey-wrapper-python |
| 310 | + |
| 311 | +## License |
| 312 | + |
| 313 | +MIT License - see [LICENSE](LICENSE) for details. |
| 314 | + |
| 315 | +## Disclaimer |
| 316 | + |
| 317 | +Please ensure that your web scraping activities comply with the website's terms of service and legal regulations. Scrappey is not responsible for any misuse or unethical use of the library. Use responsibly and respect website policies. |
0 commit comments