Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions components/scrape_autopilot/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Overview

Scrape Autopilot provides cost-efficient web scraping for public URLs with Markdown, HTML, and plain text outputs. Use it to extract clean page content from websites and feed the results into Pipedream workflows, AI steps, databases, alerts, and reporting pipelines.

Authenticate with your Scrape Autopilot API key from https://www.scrappilot.com/dashboard.

# Example Use Cases

- **AI-ready content extraction**: Scrape a URL as Markdown, then send the clean content to an AI step for summarization, classification, or entity extraction.
- **Batch website monitoring**: Scrape a short list of URLs on a schedule and compare the returned text or Markdown against previous runs.
- **Lead and research workflows**: Extract readable page content from company websites, product pages, or public articles, then store structured results in Airtable, Google Sheets, or a database.
25 changes: 25 additions & 0 deletions components/scrape_autopilot/actions/get-balance/get-balance.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import scrapeAutopilot from "../../scrape_autopilot.app.mjs";

export default {
name: "Get Balance",
description: "Check your Scrape Autopilot credit balance to keep cost-efficient scraping workflows under control. [See the documentation](https://www.scrappilot.com/docs)",
key: "scrape_autopilot-get-balance",
version: "0.0.1",
annotations: {
destructiveHint: false,
openWorldHint: true,
readOnlyHint: true,
},
type: "action",
props: {
scrapeAutopilot,
},
async run({ $ }) {
const data = await this.scrapeAutopilot.getBalance({
$,
});

$.export("$summary", `Credit balance: ${data.credits}`);
return data;
},
};
45 changes: 45 additions & 0 deletions components/scrape_autopilot/actions/scrape-url/scrape-url.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import scrapeAutopilot from "../../scrape_autopilot.app.mjs";

export default {
name: "Scrape URL",
description: "Cost-efficiently scrape one public URL and return Markdown, HTML, or text. [See the documentation](https://www.scrappilot.com/docs)",
key: "scrape_autopilot-scrape-url",
version: "0.0.1",
annotations: {
destructiveHint: false,
openWorldHint: true,
readOnlyHint: false,
},
type: "action",
props: {
scrapeAutopilot,
url: {
type: "string",
label: "URL",
description: "The fully qualified public URL to scrape.",
},
format: {
propDefinition: [
scrapeAutopilot,
"format",
],
},
js: {
propDefinition: [
scrapeAutopilot,
"js",
],
},
},
async run({ $ }) {
const data = await this.scrapeAutopilot.scrapeUrl({
$,
url: this.url,
format: this.format,
js: this.js,
});

$.export("$summary", `Scraped ${this.url}`);
return data;
},
};
65 changes: 65 additions & 0 deletions components/scrape_autopilot/actions/scrape-urls/scrape-urls.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { ConfigurationError } from "@pipedream/platform";
import scrapeAutopilot from "../../scrape_autopilot.app.mjs";

const MAX_URLS = 10;

export default {
name: "Scrape URLs",
description: "Cost-efficiently scrape up to 10 public URLs and return one result per URL. [See the documentation](https://www.scrappilot.com/docs)",
key: "scrape_autopilot-scrape-urls",
version: "0.0.1",
annotations: {
destructiveHint: false,
openWorldHint: true,
readOnlyHint: false,
},
type: "action",
props: {
scrapeAutopilot,
urls: {
type: "string[]",
label: "URLs",
description: "Public URLs to scrape. Maximum 10.",
},
format: {
propDefinition: [
scrapeAutopilot,
"format",
],
},
js: {
propDefinition: [
scrapeAutopilot,
"js",
],
},
},
async run({ $ }) {
const urls = (this.urls || []).map((url) => url.trim()).filter(Boolean);

if (!urls.length) {
throw new ConfigurationError("Provide at least one URL.");
}

if (urls.length > MAX_URLS) {
throw new ConfigurationError(
`Scrape Autopilot batch scraping is limited to ${MAX_URLS} URLs.`,
);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

const data = await this.scrapeAutopilot.scrapeUrls({
$,
urls,
format: this.format,
js: this.js,
});

$.export(
"$summary",
`Scraped ${urls.length} URL${urls.length === 1
? ""
: "s"}`,
);
return data;
},
};
20 changes: 20 additions & 0 deletions components/scrape_autopilot/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"name": "@pipedream/scrape_autopilot",
"version": "0.0.1",
"description": "Pipedream Scrape Autopilot Components",
"main": "scrape_autopilot.app.mjs",
"keywords": [
"pipedream",
"scrape_autopilot",
"web-scraping",
"markdown"
],
"homepage": "https://pipedream.com/apps/scrape_autopilot",
"author": "Pipedream <support@pipedream.com> (https://pipedream.com/)",
"publishConfig": {
"access": "public"
},
"dependencies": {
"@pipedream/platform": "^3.1.1"
}
}
163 changes: 163 additions & 0 deletions components/scrape_autopilot/scrape_autopilot.app.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import { axios } from "@pipedream/platform";

const FORMATS = [
{
label: "Markdown",
value: "md",
},
{
label: "HTML",
value: "html",
},
{
label: "Text",
value: "text",
},
];

export default {
type: "app",
app: "scrape_autopilot",
propDefinitions: {
scrapeAutopilot: {
type: "app",
app: "scrape_autopilot",
label: "Scrape Autopilot",
description: "Connect your Scrape Autopilot account.",
},
format: {
type: "string",
label: "Output Format",
description: "The response format to return.",
options: FORMATS,
optional: true,
default: "md",
},
js: {
type: "boolean",
label: "Enable JavaScript Rendering",
description: "Use JavaScript rendering for dynamic pages. This consumes more credits.",
optional: true,
default: false,
},
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
methods: {
/**
* Returns the Scrape Autopilot API base URL.
*
* @returns {string} Base URL for Scrape Autopilot API requests.
*/
_baseUrl() {
return "https://www.scrappilot.com";
},
/**
* Builds authorization headers for Scrape Autopilot API requests.
*
* @returns {object} Headers containing the connected account API key.
*/
_authHeaders() {
return {
Authorization: this.$auth.api_key,
};
},
/**
* Makes an authenticated Scrape Autopilot API request.
*
* @param {object} opts - Request options.
* @param {*} opts.$ - Pipedream execution context.
* @param {string} opts.path - API path beginning with `/`.
* @param {object} [opts.headers] - Additional request headers.
* @returns {Promise<object>} Parsed API response body.
*/
async _makeRequest({
$,
path,
headers,
...args
}) {
return axios($, {
...args,
baseURL: this._baseUrl(),
url: path,
headers: {
...this._authHeaders(),
...headers,
},
});
},
/**
* Scrapes one public URL.
*
* @param {object} opts - Scrape request options.
* @param {*} opts.$ - Pipedream execution context.
* @param {string} opts.url - Fully qualified public URL to scrape.
* @param {string} [opts.format] - Output format: `md`, `html`, or `text`.
* @param {boolean} [opts.js] - Whether to enable JavaScript rendering.
* @returns {Promise<object>} Scrape result.
*/
async scrapeUrl({
$,
url,
format,
js,
}) {
return this._makeRequest({
$,
method: "POST",
path: "/api/scrape",
headers: {
"Content-Type": "application/json",
},
data: {
url,
format,
js,
},
});
},
/**
* Scrapes multiple public URLs.
*
* @param {object} opts - Batch scrape request options.
* @param {*} opts.$ - Pipedream execution context.
* @param {string[]} opts.urls - Fully qualified public URLs to scrape.
* @param {string} [opts.format] - Output format: `md`, `html`, or `text`.
* @param {boolean} [opts.js] - Whether to enable JavaScript rendering.
* @returns {Promise<object>} Batch scrape result.
*/
async scrapeUrls({
$,
urls,
format,
js,
}) {
return this._makeRequest({
$,
method: "POST",
path: "/api/scrape",
headers: {
"Content-Type": "application/json",
},
data: {
urls,
format,
js,
},
});
},
/**
* Gets the remaining credit balance.
*
* @param {object} opts - Balance request options.
* @param {*} opts.$ - Pipedream execution context.
* @returns {Promise<object>} Account status and credit balance.
*/
async getBalance({ $ }) {
return this._makeRequest({
$,
method: "GET",
path: "/api/status",
});
},
},
};