import requests
import json
import time
extract_rules = {
"reviews": {
"selector": "[data-hook='review']",
"type": "obj",
"multiple": True,
"children": {
"reviewer_name": {"selector": ".a-profile-name", "type": "text"},
"rating": {"selector": "i[data-hook='review-star-rating'] span", "type": "text"},
"title": {"selector": "a[data-hook='review-title'] span", "type": "text"},
"review_text": {"selector": "span[data-hook='review-body'] span", "type": "text"}
}
}
}
def scrape_amazon_reviews(product_asin, page=1):
review_url = f"https://www.amazon.com/product-reviews/{product_asin}/"
if page > 1:
review_url += f"?pageNumber={page}"
response = requests.post("https://api.ujeebu.com/scrape",
headers={
"ApiKey": "YOUR_API_KEY",
"Content-Type": "application/json"
},
json={
"url": review_url,
"js": True,
"wait_for": "[data-hook='review']",
"extract_rules": extract_rules
})
return response.json()["result"]
# Scrape multiple pages
all_reviews = []
for page in range(1, 6):
data = scrape_amazon_reviews("B08N5WRWNW", page)
all_reviews.extend(data.get("reviews", []))
time.sleep(2) # Rate limiting
print(f"Scraped {len(all_reviews)} reviews")
const axios = require('axios');
const extractRules = {
reviews: {
selector: "[data-hook='review']",
type: 'obj',
multiple: true,
children: {
reviewer_name: { selector: '.a-profile-name', type: 'text' },
rating: { selector: "i[data-hook='review-star-rating'] span", type: 'text' },
title: { selector: "a[data-hook='review-title'] span", type: 'text' },
review_text: { selector: "span[data-hook='review-body'] span", type: 'text' }
}
}
};
async function scrapeReviews(productAsin, maxPages = 5) {
const allReviews = [];
for (let page = 1; page <= maxPages; page++) {
const url = `https://www.amazon.com/product-reviews/${productAsin}/` +
(page > 1 ? `?pageNumber=${page}` : '');
const response = await axios.post('https://api.ujeebu.com/scrape', {
url,
js: true,
wait_for: "[data-hook='review']",
extract_rules: extractRules
}, {
headers: {
'ApiKey': 'YOUR_API_KEY',
'Content-Type': 'application/json'
}
});
const reviews = response.data.result.reviews || [];
allReviews.push(...reviews);
if (page < maxPages) await new Promise(r => setTimeout(r, 2000));
}
return allReviews;
}
// Usage
scrapeReviews('B08N5WRWNW', 5)
.then(reviews => console.log(`Found ${reviews.length} reviews`));
curl -X POST "https://api.ujeebu.com/scrape" \
-H "ApiKey: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://www.amazon.com/product-reviews/B08N5WRWNW/",
"js": true,
"wait_for": "[data-hook=review]",
"extract_rules": {
"reviews": {
"selector": "[data-hook=review]",
"type": "obj",
"multiple": true,
"children": {
"reviewer_name": {"selector": ".a-profile-name", "type": "text"},
"rating": {"selector": "i[data-hook=review-star-rating] span", "type": "text"},
"title": {"selector": "a[data-hook=review-title] span", "type": "text"},
"review_text": {"selector": "span[data-hook=review-body] span", "type": "text"}
}
}
}
}'
from ujeebu_python import UjeebuClient
# Initialize the client
ujeebu = UjeebuClient(api_key="YOUR_API_KEY")
# Define extraction rules
extract_rules = {
"reviews": {
"selector": "[data-hook='review']",
"type": "obj",
"multiple": True,
"children": {
"reviewer_name": {"selector": ".a-profile-name", "type": "text"},
"rating": {"selector": "i[data-hook='review-star-rating'] span", "type": "text"},
"title": {"selector": "a[data-hook='review-title'] span", "type": "text"},
"review_text": {"selector": "span[data-hook='review-body'] span", "type": "text"}
}
}
}
# Scrape multiple pages of reviews
def scrape_amazon_reviews(product_asin, page=1):
review_url = f"https://www.amazon.com/product-reviews/{product_asin}/"
if page > 1:
review_url += f"?pageNumber={page}"
response = ujeebu.scrape_with_rules(
url=review_url,
extract_rules=extract_rules,
params={
"js": True,
"wait_for": "[data-hook='review']"
}
)
return response.json()['result']
# Scrape multiple pages
all_reviews = []
for page in range(1, 6):
data = scrape_amazon_reviews("B08N5WRWNW", page)
all_reviews.extend(data.get("reviews", []))
print(f"Scraped {len(all_reviews)} reviews")
const { UjeebuClient } = require('@ujeebu-org/ujeebu-sdk');
// Initialize the client
const client = new UjeebuClient(process.env.UJEEBU_API_KEY);
// Define extraction rules
const extractRules = {
reviews: {
selector: "[data-hook='review']",
type: 'obj',
multiple: true,
children: {
reviewer_name: { selector: '.a-profile-name', type: 'text' },
rating: { selector: "i[data-hook='review-star-rating'] span", type: 'text' },
title: { selector: "a[data-hook='review-title'] span", type: 'text' },
review_text: { selector: "span[data-hook='review-body'] span", type: 'text' }
}
}
};
async function scrapeReviews(productAsin, maxPages = 5) {
const allReviews = [];
for (let page = 1; page <= maxPages; page++) {
const url = `https://www.amazon.com/product-reviews/${productAsin}/` +
(page > 1 ? `?pageNumber=${page}` : '');
const response = await client.scrapeWithRules(url, extractRules, {
js: true,
wait_for: "[data-hook='review']"
});
const reviews = response.data.result.reviews || [];
allReviews.push(...reviews);
if (page < maxPages) await new Promise(r => setTimeout(r, 2000));
}
return allReviews;
}
// Usage
scrapeReviews('B08N5WRWNW', 5)
.then(reviews => console.log(`Found ${reviews.length} reviews`));
package main
import (
"fmt"
"log"
"time"
"github.com/ujeebu/ujeebu-go"
)
func main() {
// Initialize the client
client, err := ujeebu.NewClient("YOUR-API-KEY")
if err != nil {
log.Fatalf("Failed to create client: %v", err)
}
// Define extraction rules
extractRules := map[string]any{
"reviews": map[string]any{
"selector": "[data-hook='review']",
"type": "obj",
"multiple": true,
"children": map[string]any{
"reviewer_name": map[string]any{
"selector": ".a-profile-name",
"type": "text",
},
"rating": map[string]any{
"selector": "i[data-hook='review-star-rating'] span",
"type": "text",
},
"title": map[string]any{
"selector": "a[data-hook='review-title'] span",
"type": "text",
},
"review_text": map[string]any{
"selector": "span[data-hook='review-body'] span",
"type": "text",
},
},
},
}
// Scrape multiple pages
allReviews := []map[string]any{}
for page := 1; page <= 5; page++ {
url := fmt.Sprintf("https://www.amazon.com/product-reviews/B08N5WRWNW/?pageNumber=%d", page)
params := ujeebu.ScrapeParams{
URL: url,
Js: true,
WaitFor: "[data-hook='review']",
ExtractRules: extractRules,
}
response, _, err := client.Scrape(params)
if err != nil {
log.Printf("Error scraping page %d: %v", page, err)
continue
}
if reviews, ok := response["reviews"].([]any); ok {
for _, review := range reviews {
if r, ok := review.(map[string]any); ok {
allReviews = append(allReviews, r)
}
}
}
time.Sleep(2 * time.Second) // Rate limiting
}
fmt.Printf("Scraped %d reviews\n", len(allReviews))
}