Welcome to the official documentation for Universal Scraper Enterprise – our premium solution for dynamic web scraping. Designed for enterprise use, our product offers advanced stealth techniques to bypass modern web security, execute custom instructions, and seamlessly integrate via multiple languages.
Universal Scraper Enterprise provides your organization with unparalleled control over web data extraction. Our solution handles:
URL: /
Method: GET
Description: Serves this documentation page.
URL: /v1
Methods: GET, POST
Description: Initiates a scraping job and returns a jobId
along with a
status URL.
URL: /status/:jobId
Method: GET
Description: Retrieves the current status and results of your scraping job.
The /v1
endpoint accepts the following parameters:
"https://example.com"
).[
{ "wait_for": "#login" },
{ "fill": ["#username", "user123"] },
{ "fill": ["#password", "secret"] },
{ "click": "#submit" }
]
{"User-Agent": "MyAgent"}
).
["image", "stylesheet"]
).
The instructions
parameter defines the actions executed within the browser.
Supported instructions include:
wait_for
: Wait for a specific element.
{ "wait_for": "#main-content" }
wait
: Pause execution for a defined time in milliseconds.
{ "wait": 3000 }
wait_event
: Wait for events such as networkidle
or load
.
{ "wait_event": "networkidle" }
navigate
: Navigate to a URL or relative path.
{ "navigate": "/checkout" }
click
: Simulate a mouse click.
{ "click": "#login-button" }
wait_for_and_click
: Wait for an element then click it.
{ "wait_for_and_click": "#submit-button" }
fill
: Fill an input field with text.
{ "fill": ["#username", "user123"] }
check
: Ensure a checkbox is checked.
{ "check": "#terms" }
uncheck
: Ensure a checkbox is unchecked.
{ "uncheck": "#subscribe" }
select_option
: Choose an option from a dropdown.
{ "select_option": ["#country", "US"] }
scroll_x
: Scroll horizontally.
{ "scroll_x": 200 }
scroll_y
: Scroll vertically.
{ "scroll_y": 400 }
evaluate
: Execute custom JavaScript code.
{ "evaluate": "document.body.style.backgroundColor = 'black'" }
solve_cloudflare
: Bypass Cloudflare challenges.
{ "solve_cloudflare": true }
solve_captcha
: Solve captchas via CapSolver.
{ "solve_captcha": true }
frame_click
: Click an element within an iframe.
{ "frame_click": ["iframe#loginFrame", "#loginButton"] }
frame_wait_for
: Wait for an element inside an iframe.
{ "frame_wait_for": ["iframe#loginFrame", "#username"] }
frame_fill
: Fill an input field within an iframe.
{ "frame_fill": ["iframe#loginFrame", "#username", "testuser"] }
frame_check
: Check a checkbox inside an iframe.
{ "frame_check": ["iframe#loginFrame", "#rememberMe"] }
frame_uncheck
: Uncheck a checkbox inside an iframe.
{ "frame_uncheck": ["iframe#loginFrame", "#newsletter"] }
frame_select_option
: Select an option inside an iframe.
{ "frame_select_option": ["iframe#loginFrame", "#country", "US"] }
frame_evaluate
: Execute JavaScript within an iframe.
{ "frame_evaluate": ["iframe#loginFrame", "document.body.style.backgroundColor = 'blue'"] }
Persistent browser sessions are maintained for 30 minutes. Provide a session_id
to reuse an
instance;
use delete_session_id
to terminate it. Proxies can be configured via the
proxies
parameter and refined with the country_code
.
The following helper functions demonstrate how to integrate with Universal Scraper Enterprise in PHP, Python, and Node.js. For each language, separate examples are provided for GET and POST requests.
<?php
function scrape(
$targetUrl,
$instructions = [],
$proxies = [],
$headers = null,
$httpMethod = 'GET',
$postData = false,
$block_resources = [],
$js_render = false,
$json_response = false,
$wait_param = 0,
$wait_for_param = false,
$session_id = null,
$delete_session_id = false,
$country_code = "any",
$waitSeconds = 60,
$maxAttempts = 20
) {
if (empty($targetUrl) || !filter_var($targetUrl, FILTER_VALIDATE_URL)) {
throw new Exception("Parameter 'targetUrl' must be a valid URL.");
}
if (!is_array($instructions)) {
throw new Exception("Parameter 'instructions' must be an array.");
}
$baseUrl = "https://advanced-scraper.com/v1/";
$statusBaseUrl = "https://advanced-scraper.com/status/";
$params = [
"url" => $targetUrl,
"instructions" => json_encode($instructions),
"useProxy" => (!empty($proxies)) ? "true" : "false",
"proxies" => json_encode($proxies),
"customHeaders" => ($headers !== null) ? "true" : "false",
"headers" => ($headers !== null) ? json_encode($headers) : "null",
"requestMethod" => strtoupper($httpMethod),
"postData" => $postData ?? "false",
"block_resources" => json_encode($block_resources),
"js_render" => $js_render ? "true" : "false",
"json_response" => $json_response ? "true" : "false",
"wait_param" => $wait_param,
"wait_for_param" => $wait_for_param ? "true" : "false",
"session_id" => ($session_id !== null) ? $session_id : "null",
"delete_session_id" => $delete_session_id ? "true" : "false",
"country_code" => $country_code
];
$queryString = http_build_query($params);
$requestUrl = $baseUrl . "?" . $queryString;
$ch = curl_init($requestUrl);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POSTFIELDS, $queryString);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-Type: application/x-www-form-urlencoded"));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
$response = curl_exec($ch);
if (curl_errno($ch)) {
$err = curl_error($ch);
curl_close($ch);
throw new Exception('Job creation error: ' . $err);
}
curl_close($ch);
$data = json_decode($response, true);
if (!$data || !isset($data['jobId'])) {
throw new Exception("Job creation failed. Response: " . $response);
}
$jobId = $data['jobId'];
$statusUrl = $statusBaseUrl . $jobId;
$attempts = 0;
$statusData = null;
do {
sleep($waitSeconds);
$chStatus = curl_init($statusUrl);
curl_setopt($chStatus, CURLOPT_RETURNTRANSFER, true);
curl_setopt($chStatus, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($chStatus, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($chStatus, CURLOPT_TIMEOUT, 30);
$statusResponse = curl_exec($chStatus);
if (curl_errno($chStatus)) {
$errStatus = curl_error($chStatus);
curl_close($chStatus);
throw new Exception("Failed to fetch job status: " . $errStatus);
}
curl_close($chStatus);
$statusData = json_decode($statusResponse, true);
if (!$statusData || !isset($statusData['status'])) {
throw new Exception("Invalid status response: " . $statusResponse);
}
$attempts++;
if ($attempts >= $maxAttempts) {
throw new Exception("Maximum polling attempts reached.");
}
} while ($statusData['status'] === 'processing');
if ($statusData['status'] === 'completed') {
return $statusData['result'];
} else {
throw new Exception("Job failed: " . ($statusData['error'] ?? "Unknown error."));
}
}
?>
Usage Example (GET):
// Scrape a page via GET
$result = scrape(
"https://example.com",
[ { "wait_for": "#main-content" } ]
);
Usage Example (POST):
// Submit data via POST
$result = scrape(
"https://example.com/api/submit",
[],
[],
[ "Content-Type" => "application/json" ],
"POST",
'{"name":"John Doe","email":"john@example.com"}'
);