Ujeebu SDKs

Python SDK

The official Ujeebu SDK for Python. Built with the popular requests library, it provides a Pythonic interface perfect for data science, web scraping, and automation.

Installation

pip install ujeebu-python

poetry add ujeebu-python

pipenv install ujeebu-python

Requirements:

Python 3.7 or higher
requests library (automatically installed)

Quick Start

from ujeebu_python import UjeebuClient

# Initialize with your API key
client = UjeebuClient('your-api-key')

# Scrape a website
response = client.scrape(
    'https://example.com',
    params={'js': True, 'response_type': 'html'}
)

print(response.text)

Authentication

The SDK requires an API key for authentication. Get yours from the Ujeebu Dashboard.

Using Environment Variables (Recommended)

import os
from ujeebu_python import UjeebuClient

client = UjeebuClient(os.environ['UJEEBU_API_KEY'])

from dotenv import load_dotenv
import os
from ujeebu_python import UjeebuClient

load_dotenv()
client = UjeebuClient(os.getenv('UJEEBU_API_KEY'))

WARNING - Security

Never hardcode API keys in your source code or commit them to version control.

Core Methods

scrape()

Scrape web pages with various rendering and extraction options.

response = client.scrape('https://example.com')
print(response.text)

response = client.scrape(
    'https://example.com',
    params={
        'js': True,
        'js_timeout': 5000,
        'wait_for': '.dynamic-content'
    }
)

print(response.text)

response = client.scrape(
    'https://example.com',
    params={
        'extract_rules': {
            'title': 'h1',
            'articles': {
                'selector': '.article',
                'type': 'list',
                'data': {
                    'headline': 'h2',
                    'author': '.author'
                }
            }
        }
    }
)

data = response.json()
print(data['result'])

extract()

Extract clean article content from web pages.

response = client.extract('https://example.com/article')

data = response.json()
print(data['article']['title'])
print(data['article']['author'])
print(data['article']['text'])
print(data['article']['pub_date'])

response = client.extract(
    'https://example.com/article',
    params={
        'strip_tags': 'script,style,nav',
        'images': True
    }
)

serp()

Get structured search engine results.

response = client.serp(params={
    'search': 'artificial intelligence',
    'search_type': 'search',
    'lang': 'en',
    'results_count': 20
})

data = response.json()
print(data['organic_results'])
print(data['knowledge_graph'])

response = client.serp(params={
    'search': 'latest technology news',
    'search_type': 'news',
    'lang': 'en',
    'results_count': 10
})

data = response.json()
for article in data['news']:
    print(article['title'])

response = client.serp(params={
    'search': 'beautiful landscapes',
    'search_type': 'images',
    'results_count': 50
})

data = response.json()
for image in data['images']:
    print(image['link'])

preview()

Generate preview cards for URLs (similar to social media link previews).

response = client.preview('https://example.com/article')

data = response.json()
print(data['title'])
print(data['description'])
print(data['image'])
print(data['author'])
print(data['site_name'])

markdown()

Convert web pages to clean, LLM-optimized markdown.

response = client.markdown('https://example.com/article')

data = response.json()
print(data['markdown'])

response = client.markdown(
    'https://docs.example.com/guide',
    params={
        'filter': 'bm25',
        'query': 'installation instructions'
    }
)

data = response.json()
print(data['markdown'])
print(data['references'])

response = client.markdown(
    'https://example.com/spa-page',
    params={
        'filter': 'fit',
        'citations': True,
        'js': True,
        'wait': 3000,
        'wait_for_selector': '.content-loaded',
        'timeout': 120,
        'proxy_type': 'premium'
    }
)

data = response.json()
print(data['markdown'])
print(data['fit_markdown'])
print(data['markdown_with_citations'])
print(data['references'])
print(f"Credits used: {response.headers.get('ujb-credits')}")

Convenience Methods

get_pdf()

Generate a PDF of a web page.

response = client.get_pdf(
    'https://example.com',
    params={'js': True, 'wait_for': 2000}
)

data = response.json()
# data['pdf'] contains base64-encoded PDF

get_screenshot()

Capture a screenshot of a web page.

response = client.get_screenshot(
    'https://example.com',
    params={'js': True, 'screenshot_fullpage': True}
)

data = response.json()
# data['screenshot'] contains base64-encoded image

response = client.get_screenshot(
    'https://example.com',
    params={'screenshot_partial': '.hero-section'}
)

data = response.json()
# data['screenshot'] contains base64-encoded image

get_html()

Get clean HTML content.

response = client.get_html(
    'https://example.com',
    params={'js': True, 'strip_tags': 'script,style'}
)

data = response.json()
print(data['html'])

Scrape Parameters

INFO - Calling convention

The Python SDK uses positional url and a params dict: client.scrape('https://example.com', params={'js': True}). All responses are requests.Response objects - use .json() for JSON responses or .text for HTML.

Parameter	Type	Required	Default	Description
`url`	`string`	Yes		The URL to scrape (positional argument).
`js`	`boolean`	No	`False`	Enable JavaScript rendering.
`response_type`	`string`	No	`html`	Output format: 'html', 'screenshot', 'pdf', 'raw'.
`json`	`boolean`	No	`False`	When true, returns a JSON response instead of raw content.
`timeout`	`int`	No	`60`	Maximum number of seconds before request timeout.
`wait_for`	`str	int`	No	`None`
`wait_for_timeout`	`int`	No	`None`	Timeout in milliseconds for the wait_for parameter.
`js_timeout`	`int`	No	`30000`	Timeout for JavaScript execution in milliseconds.
`device`	`string`	No	`desktop`	Device to emulate: 'desktop', 'mobile', or specific device name.
`extract_rules`	`dict`	No	`None`	Rules for structured data extraction using CSS selectors.
`proxy_type`	`string`	No	`rotating`	Proxy type: 'rotating', 'advanced', 'premium', 'residential', 'mobile', 'custom'.
`proxy_country`	`string`	No	`US`	Country ISO code when using premium proxy.
`auto_proxy`	`boolean`	No	`False`	Automatically try different proxies until one succeeds.
`proxy_session`	`string`	No	`None`	Alphanumeric identifier to route requests through the same proxy instance.
`auto_captcha_solve`	`boolean`	No	`False`	Enable automatic CAPTCHA detection and solving.
`auto_captcha_solve_timeout`	`int`	No	`120000`	Timeout in milliseconds for CAPTCHA solving.

Markdown Parameters

Parameter	Type	Required	Default	Description
`url`	`string`	Yes		The URL to convert to markdown.
`filter`	`string`	No	`fit`	Content filter: 'raw' (full page), 'fit' (main content), 'bm25' (relevance-ranked with query).
`query`	`string`	No	`None`	Search query for BM25 relevance filtering. Required when filter is 'bm25'.
`citations`	`bool`	No	`True (GET) / False (POST)`	Include citation references in the markdown output. Default is True for GET requests, False for POST requests.
`js`	`bool`	No	`True`	Enable JavaScript rendering for dynamic pages.
`wait`	`int`	No	`None`	Milliseconds to wait after page load before conversion.
`wait_for_selector`	`string`	No	`None`	CSS selector to wait for before conversion.
`timeout`	`int`	No	`60`	Request timeout in seconds.
`proxy`	`str`	No	`None`	Custom proxy URL to use for the request.
`proxy_type`	`str`	No	`"" (auto_proxy)`	Proxy type: 'rotating', 'advanced', 'premium', 'residential', 'residential_us', 'residential_geo'. If not set, auto_proxy selects the best proxy automatically.
`auto_captcha_solve`	`bool`	No	`True`	Enable automatic CAPTCHA detection and solving.
`auto_captcha_solve_timeout`	`int`	No	`0`	Timeout for CAPTCHA solving in milliseconds.

Error Handling

from requests.exceptions import HTTPError, ConnectionError, Timeout

try:
    response = client.scrape('https://example.com')
    response.raise_for_status()
    print(response.text)
except HTTPError as e:
    print(f'HTTP Error: {e.response.status_code}')
    print(f'Message: {e.response.json().get("message")}')
except ConnectionError:
    print('Network connection error')
except Timeout:
    print('Request timed out')
except Exception as e:
    print(f'Error: {e}')

Integration Examples

With Pandas

import pandas as pd
from ujeebu_python import UjeebuClient

client = UjeebuClient(os.environ['UJEEBU_API_KEY'])

# Scrape product data
response = client.scrape(
    'https://example.com/products',
    params={
        'extract_rules': {
            'products': {
                'selector': '.product',
                'type': 'list',
                'data': {
                    'name': '.title',
                    'price': '.price',
                    'rating': '.rating'
                }
            }
        }
    }
)

data = response.json()
df = pd.DataFrame(data['result']['products'])
print(df.head())

Async with asyncio

import asyncio
from concurrent.futures import ThreadPoolExecutor
from ujeebu_python import UjeebuClient

client = UjeebuClient(os.environ['UJEEBU_API_KEY'])
urls = ['https://example1.com', 'https://example2.com', 'https://example3.com']

async def scrape_all(urls):
    loop = asyncio.get_event_loop()
    with ThreadPoolExecutor() as executor:
        tasks = [
            loop.run_in_executor(executor, client.scrape, url)
            for url in urls
        ]
        return await asyncio.gather(*tasks)

results = asyncio.run(scrape_all(urls))
for result in results:
    print(len(result.text))

Ready to build?

Spin up an API key in 60 seconds

Free tier: 5,000 credits, no card, full access to every endpoint on this page.

Get free API key or try the playground →