curl -X POST "https://api.ujeebu.com/scrape" \
-H "ApiKey: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://news.ycombinator.com/",
"js": true,
"extract_rules": {
"stories": {
"selector": "tr.athing",
"type": "obj",
"multiple": true,
"children": {
"title": {"selector": ".titleline > a", "type": "text"},
"url": {"selector": ".titleline > a", "type": "link"},
"points": {"type": "fn", "fn": "return $parent.nextElementSibling?.querySelector('"'"'.score'"'"')?.textContent || null;"}
}
}
}
}'
import requests
extract_rules = {
"stories": {
"selector": "tr.athing",
"type": "obj",
"multiple": True,
"children": {
"rank": {"selector": ".rank", "type": "text"},
"title": {"selector": ".titleline > a", "type": "text"},
"url": {"selector": ".titleline > a", "type": "link"},
"domain": {"selector": ".sitestr", "type": "text"},
"points": {
"type": "fn",
"fn": "return $parent.nextElementSibling?.querySelector('.score')?.textContent || null;"
},
"author": {
"type": "fn",
"fn": "return $parent.nextElementSibling?.querySelector('.hnuser')?.textContent || null;"
},
"time": {
"type": "fn",
"fn": "return $parent.nextElementSibling?.querySelector('.age')?.textContent || null;"
},
"comments_text": {
"type": "fn",
"fn": "const links = Array.from($parent.nextElementSibling?.querySelectorAll('.subtext a') || []); return links.length ? links[links.length-1].textContent : null;"
}
}
}
}
response = requests.post("https://api.ujeebu.com/scrape",
headers={
"ApiKey": "YOUR_API_KEY",
"Content-Type": "application/json"
},
json={
"url": "https://news.ycombinator.com/",
"js": True,
"extract_rules": extract_rules
})
stories = response.json()["result"]["stories"]
for story in stories[:5]:
print(f"{story['rank']} {story['title']}")
print(f" {story.get('points', 'N/A')} by {story.get('author', 'N/A')}")
const axios = require('axios');
const extractRules = {
stories: {
selector: 'tr.athing',
type: 'obj',
multiple: true,
children: {
rank: { selector: '.rank', type: 'text' },
title: { selector: '.titleline > a', type: 'text' },
url: { selector: '.titleline > a', type: 'link' },
domain: { selector: '.sitestr', type: 'text' },
points: {
type: 'fn',
fn: "return $parent.nextElementSibling?.querySelector('.score')?.textContent || null;"
},
author: {
type: 'fn',
fn: "return $parent.nextElementSibling?.querySelector('.hnuser')?.textContent || null;"
}
}
}
};
const response = await axios.post('https://api.ujeebu.com/scrape', {
url: 'https://news.ycombinator.com/',
js: true,
extract_rules: extractRules
}, {
headers: { 'ApiKey': 'YOUR_API_KEY' }
});
const stories = response.data.result.stories;
stories.slice(0, 5).forEach(story => {
console.log(`${story.rank} ${story.title}`);
console.log(` ${story.points || 'N/A'} by ${story.author || 'N/A'}`);
});