Playground Sign in Start free
Ujeebu SDKs

Python SDK

The official Ujeebu SDK for Python. Built with the popular requests library, it provides a Pythonic interface perfect for data science, web scraping, and automation.

Installation

pip install ujeebu-python
poetry add ujeebu-python
pipenv install ujeebu-python

Requirements:

  • Python 3.7 or higher
  • requests library (automatically installed)

Quick Start

from ujeebu_python import UjeebuClient

# Initialize with your API key
client = UjeebuClient('your-api-key')

# Scrape a website
response = client.scrape(
    'https://example.com',
    params={'js': True, 'response_type': 'html'}
)

print(response.text)

Authentication

The SDK requires an API key for authentication. Get yours from the Ujeebu Dashboard.

import os
from ujeebu_python import UjeebuClient

client = UjeebuClient(os.environ['UJEEBU_API_KEY'])
from dotenv import load_dotenv
import os
from ujeebu_python import UjeebuClient

load_dotenv()
client = UjeebuClient(os.getenv('UJEEBU_API_KEY'))

WARNING — Security

Never hardcode API keys in your source code or commit them to version control.

Core Methods

scrape()

Scrape web pages with various rendering and extraction options.

response = client.scrape('https://example.com')
print(response.text)
response = client.scrape(
    'https://example.com',
    params={
        'js': True,
        'js_timeout': 5000,
        'wait_for': '.dynamic-content'
    }
)

print(response.text)
response = client.scrape(
    'https://example.com',
    params={
        'extract_rules': {
            'title': 'h1',
            'articles': {
                'selector': '.article',
                'type': 'list',
                'data': {
                    'headline': 'h2',
                    'author': '.author'
                }
            }
        }
    }
)

data = response.json()
print(data['result'])

extract()

Extract clean article content from web pages.

response = client.extract('https://example.com/article')

data = response.json()
print(data['article']['title'])
print(data['article']['author'])
print(data['article']['text'])
print(data['article']['pub_date'])
response = client.extract(
    'https://example.com/article',
    params={
        'strip_tags': 'script,style,nav',
        'images': True
    }
)

serp()

Get structured search engine results.

response = client.serp(params={
    'search': 'artificial intelligence',
    'search_type': 'search',
    'lang': 'en',
    'results_count': 20
})

data = response.json()
print(data['organic_results'])
print(data['knowledge_graph'])
response = client.serp(params={
    'search': 'latest technology news',
    'search_type': 'news',
    'lang': 'en',
    'results_count': 10
})

data = response.json()
for article in data['news']:
    print(article['title'])
response = client.serp(params={
    'search': 'beautiful landscapes',
    'search_type': 'images',
    'results_count': 50
})

data = response.json()
for image in data['images']:
    print(image['link'])

preview()

Generate preview cards for URLs (similar to social media link previews).

response = client.preview('https://example.com/article')

data = response.json()
print(data['title'])
print(data['description'])
print(data['image'])
print(data['author'])
print(data['site_name'])

ai_scrape()

Extract structured data using AI-powered natural language prompts.

response = client.ai_scrape(
    'https://example.com/product',
    'Extract the product name, price, and rating'
)

data = response.json()
print(data['data'])
response = client.ai_scrape(
    'https://example.com/product',
    'Extract product details',
    params={
        'schema': {
            'type': 'object',
            'properties': {
                'name': {'type': 'string'},
                'price': {'type': 'number'},
                'rating': {'type': 'number'}
            },
            'required': ['name', 'price']
        }
    }
)

data = response.json()
print(data['data'])

auto_extract()

Automatically extract structured data from any web page without writing prompts or selectors.

response = client.auto_extract('https://example.com/article')

data = response.json()
print(f"Page type: {data['page_type']}")
print(data['data'])
response = client.auto_extract(
    'https://example.com/article',
    params={'html': existing_html}  # Skip fetching
)

data = response.json()
print(data['data'])
response = client.auto_extract(
    'https://example.com/product',
    params={
        'proxy_type': 'premium',
        'auto_proxy': False,
        'js': True,
        'scroll_down': True,
        'wait_for': '.product-details',
        'wait_for_timeout': 5000,
        'timeout': 60000,
        'force_refresh': True,
        'auto_captcha_solve': True,
        'provider': 'openai',
        'model': 'gpt-4o'
    }
)

data = response.json()
print(f"Page type: {data['page_type']}")
print(data['data'])
print(f"Credits used: {response.headers.get('ujb-credits')}")

markdown()

Convert web pages to clean, LLM-optimized markdown.

response = client.markdown('https://example.com/article')

data = response.json()
print(data['markdown'])
response = client.markdown(
    'https://docs.example.com/guide',
    params={
        'filter': 'bm25',
        'query': 'installation instructions'
    }
)

data = response.json()
print(data['markdown'])
print(data['references'])
response = client.markdown(
    'https://example.com/spa-page',
    params={
        'filter': 'fit',
        'citations': True,
        'js': True,
        'wait': 3000,
        'wait_for_selector': '.content-loaded',
        'timeout': 120,
        'proxy_type': 'premium'
    }
)

data = response.json()
print(data['markdown'])
print(data['fit_markdown'])
print(data['markdown_with_citations'])
print(data['references'])
print(f"Credits used: {response.headers.get('ujb-credits')}")

Convenience Methods

get_pdf()

Generate a PDF of a web page.

response = client.get_pdf(
    'https://example.com',
    params={'js': True, 'wait_for': 2000}
)

data = response.json()
# data['pdf'] contains base64-encoded PDF

get_screenshot()

Capture a screenshot of a web page.

response = client.get_screenshot(
    'https://example.com',
    params={'js': True, 'screenshot_fullpage': True}
)

data = response.json()
# data['screenshot'] contains base64-encoded image
response = client.get_screenshot(
    'https://example.com',
    params={'screenshot_partial': '.hero-section'}
)

data = response.json()
# data['screenshot'] contains base64-encoded image

get_html()

Get clean HTML content.

response = client.get_html(
    'https://example.com',
    params={'js': True, 'strip_tags': 'script,style'}
)

data = response.json()
print(data['html'])

Scrape Parameters

INFO — Calling convention

The Python SDK uses positional url and a params dict: client.scrape('https://example.com', params={'js': True}). All responses are requests.Response objects — use .json() for JSON responses or .text for HTML.

Parameter Type Required Default Description
url string Yes The URL to scrape (positional argument).
js boolean No False Enable JavaScript rendering.
response_type string No html Output format: 'html', 'screenshot', 'pdf', 'raw'.
json boolean No False When true, returns a JSON response instead of raw content.
timeout int No 60 Maximum number of seconds before request timeout.
wait_for `str int` No None
wait_for_timeout int No None Timeout in milliseconds for the wait_for parameter.
js_timeout int No 30000 Timeout for JavaScript execution in milliseconds.
device string No desktop Device to emulate: 'desktop', 'mobile', or specific device name.
extract_rules dict No None Rules for structured data extraction using CSS selectors.
proxy_type string No rotating Proxy type: 'rotating', 'advanced', 'premium', 'residential', 'mobile', 'custom'.
proxy_country string No US Country ISO code when using premium proxy.
auto_proxy boolean No False Automatically try different proxies until one succeeds.
proxy_session string No None Alphanumeric identifier to route requests through the same proxy instance.
auto_captcha_solve boolean No False Enable automatic CAPTCHA detection and solving.
auto_captcha_solve_timeout int No 120000 Timeout in milliseconds for CAPTCHA solving.

AI Scraper Parameters

Parameter Type Required Default Description
url string Yes The URL to scrape.
prompt string No Natural language instruction describing what data to extract. Required unless schema is provided.
schema dict No JSON schema defining the expected structure of extracted data. When provided, prompt becomes optional.
temperature float No 0.0 LLM temperature (0.0-1.0). Lower = more deterministic.
js bool No True Enable JavaScript rendering.
proxy_type string No auto Proxy type: 'rotating', 'advanced', 'premium', 'residential'. Auto-selects if not specified.
proxy_country string No None ISO country code for proxy location.
timeout int No 120 Request timeout in seconds.
wait_for `str int` No None

Markdown Parameters

Parameter Type Required Default Description
url string Yes The URL to convert to markdown.
filter string No fit Content filter: 'raw' (full page), 'fit' (main content), 'bm25' (relevance-ranked with query).
query string No None Search query for BM25 relevance filtering. Required when filter is 'bm25'.
citations bool No True (GET) / False (POST) Include citation references in the markdown output. Default is True for GET requests, False for POST requests.
js bool No True Enable JavaScript rendering for dynamic pages.
wait int No None Milliseconds to wait after page load before conversion.
wait_for_selector string No None CSS selector to wait for before conversion.
timeout int No 60 Request timeout in seconds.
proxy str No None Custom proxy URL to use for the request.
proxy_type str No "" (auto_proxy) Proxy type: 'rotating', 'advanced', 'premium', 'residential', 'residential_us', 'residential_geo'. If not set, auto_proxy selects the best proxy automatically.
auto_captcha_solve bool No True Enable automatic CAPTCHA detection and solving.
auto_captcha_solve_timeout int No 0 Timeout for CAPTCHA solving in milliseconds.

AutoExtract Parameters

Parameter Type Required Default Description
url string Yes The URL to extract data from.
html string No None Pre-fetched HTML content. Skips fetching step when provided.
proxy_type string No "" (auto_proxy) Proxy type: 'rotating', 'advanced', 'premium', 'residential', 'residential_us', 'residential_geo'.
auto_proxy bool No True Automatic proxy selection for best results.
js bool No True Enable JavaScript rendering.
scroll_down bool No False Scroll down the page before extraction.
wait_for string No None CSS selector to wait for before extraction.
wait_for_timeout int No 30000 Timeout in milliseconds for the wait_for selector.
timeout int No 120000 Overall request timeout in milliseconds.
force_refresh bool No False Force regeneration of extraction rules even if cached.
auto_captcha_solve bool No True Enable automatic CAPTCHA detection and solving.
auto_captcha_solve_timeout int No 0 Timeout in milliseconds for CAPTCHA solving.
provider string No None LLM provider for rule generation (e.g., 'openai', 'anthropic', 'google').
model string No None LLM model to use for rule generation.

Error Handling

from requests.exceptions import HTTPError, ConnectionError, Timeout

try:
    response = client.scrape('https://example.com')
    response.raise_for_status()
    print(response.text)
except HTTPError as e:
    print(f'HTTP Error: {e.response.status_code}')
    print(f'Message: {e.response.json().get("message")}')
except ConnectionError:
    print('Network connection error')
except Timeout:
    print('Request timed out')
except Exception as e:
    print(f'Error: {e}')

Integration Examples

With Pandas

import pandas as pd
from ujeebu_python import UjeebuClient

client = UjeebuClient(os.environ['UJEEBU_API_KEY'])

# Scrape product data
response = client.scrape(
    'https://example.com/products',
    params={
        'extract_rules': {
            'products': {
                'selector': '.product',
                'type': 'list',
                'data': {
                    'name': '.title',
                    'price': '.price',
                    'rating': '.rating'
                }
            }
        }
    }
)

data = response.json()
df = pd.DataFrame(data['result']['products'])
print(df.head())

Async with asyncio

import asyncio
from concurrent.futures import ThreadPoolExecutor
from ujeebu_python import UjeebuClient

client = UjeebuClient(os.environ['UJEEBU_API_KEY'])
urls = ['https://example1.com', 'https://example2.com', 'https://example3.com']

async def scrape_all(urls):
    loop = asyncio.get_event_loop()
    with ThreadPoolExecutor() as executor:
        tasks = [
            loop.run_in_executor(executor, client.scrape, url)
            for url in urls
        ]
        return await asyncio.gather(*tasks)

results = asyncio.run(scrape_all(urls))
for result in results:
    print(len(result.text))
Ready to build?

Spin up an API key in 60 seconds

Free tier: 5,000 credits, no card, full access to every endpoint on this page.