Scraping agent API is used to manage web scraping agents under an Agenty account. Use this API to create new web scraper, modify an existing scraper etc.
Create a scraping agent
Endpoint:
Method: POST
URL: https://api.agenty.com/v2/agents
Headers:
Key | Value | Description |
---|---|---|
Content-Type | application/json |
Query params:
Key | Value | Description |
---|---|---|
apikey | {{API_KEY}} |
Body:
{
"name": "Books price scraping agent",
"description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
"type": "scraping",
"version": 4,
"config": {
"url": "http://books.toscrape.com/",
"code": null,
"form": {
"actions": [],
"enabled": false
},
"login": {
"actions": [],
"enabled": false
},
"retry": {
"enabled": true,
"max_try": 0,
"try_interval": 0
},
"device": "desktop",
"header": {},
"logout": {
"actions": [],
"enabled": false
},
"browser": "agenty",
"viewport": {
"width": 1920,
"height": 969
},
"wait_for": {
"type": "selector",
"value": null
},
"anonymous": {
"proxy": true,
"country": null,
"proxy_type": null,
"skip_resource_types": []
},
"block_ads": true,
"lazy_load": false,
"pagination": {
"enabled": true,
"type": "click",
"selector": "",
"script": null,
"max_pages": 10
},
"user_agent": null,
"collections": [
{
"name": "Collection1",
"fields": [
{
"name": "name",
"type": "CSS",
"selector": "#ajax-load-more h3",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"join": false,
"postprocessing": [],
"formatter": []
},
{
"name": "date",
"type": "CSS",
"selector": ".entry-meta",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
},
{
"name": "discription",
"type": "CSS",
"selector": ".entry-meta+ p",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
},
{
"name": "image",
"type": "CSS",
"selector": "#ajax-load-more .wp-post-image",
"extract": "ATTR",
"attribute": "src",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
}
],
"from": null,
"schema": null
}
],
"authenticate": null,
"go_to_options": {
"referer": null,
"timeout": 500,
"wait_until": []
},
"block_trackers": true,
"reject_resource_types": [],
"reject_request_pattern": [],
"set_javascript_enabled": true
}
}
Responses:
Status: OK | Code: 200
{
"agent_id": "o432e26w46",
"account_id": 14,
"user_id": 29,
"project_id": null,
"name": "Books price scraping agent",
"description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
"type": "scraping",
"tags": [],
"version": 1,
"config": {
"url": "http://books.toscrape.com/",
"collections": [
{
"name": "Collection1",
"fields": [
{
"name": "name",
"type": "CSS",
"selector": "#ajax-load-more h3",
"extract": "TEXT",
"attribute": null,
"from": null,
"join": false
},
{
"name": "date",
"type": "CSS",
"selector": ".entry-meta",
"extract": "TEXT",
"attribute": null,
"from": null,
"join": false
},
{
"name": "discription",
"type": "CSS",
"selector": ".entry-meta+ p",
"extract": "TEXT",
"attribute": null,
"from": null,
"join": false
},
{
"name": "image",
"type": "CSS",
"selector": "#ajax-load-more .wp-post-image",
"extract": "ATTR",
"attribute": "src",
"from": null,
"join": false
}
],
"query": null
}
],
"browser": "agenty",
"user_agent": null,
"wait_for": {
"type": "selector",
"value": null,
"hidden": false,
"visible": false,
"timeout": 0
},
"go_to_options": {
"timeout": 500,
"wait_until": [],
"referer": null
},
"device": "desktop",
"frame": null,
"viewport": {
"width": 1920,
"height": 969
},
"authenticate": null,
"login": {
"enabled": false,
"actions": []
},
"logout": {
"enabled": false,
"actions": []
},
"pagination": {
"enabled": true,
"type": "click",
"value": null,
"container": null,
"max_pages": 10
},
"headers": [],
"retry": {
"enabled": true,
"max_try": 0,
"try_interval": 0
},
"form": {
"enabled": false,
"actions": []
},
"lazy_load": false,
"block_ads": true,
"block_trackers": true,
"set_javascript_enabled": true,
"reject_resource_types": [],
"reject_request_pattern": [],
"code": null,
"developer_mode": false,
"anonymous": {
"proxy": true,
"proxy_type": null,
"country": null,
"skip_resource_types": []
}
},
"is_public": false,
"is_managed": false,
"created_at": "0001-01-01T00:00:00Z",
"updated_at": null,
"icon": null,
"scheduler": null,
"scripts": null
}
Get scraping agent by id
Endpoint:
Method: GET
URL: https://api.agenty.com/v2/agents/scraping/gowlyyg3dp
Headers:
Key | Value | Description |
---|---|---|
Content-Type | application/json |
Query params:
Key | Value | Description |
---|---|---|
apikey | {{API_KEY}} |
Responses:
Status: OK | Code: 200
{
"config": {
"sourceurl": "http://books.toscrape.com/",
"collections": [
{
"name": "Collection1",
"fields": [
{
"name": "PRODUCT_NAME",
"type": "CSS",
"selector": "h3 a",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
},
{
"name": "PRICE",
"type": "CSS",
"selector": ".price_color",
"extract": "TEXT",
"attribute": "",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": null,
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "IMAGE",
"type": "CSS",
"selector": ".thumbnail",
"extract": "ATTR",
"attribute": "src",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "DETAILS_PAGE_URL",
"type": "CSS",
"selector": ".product_pod h3 a",
"extract": "ATTR",
"attribute": "href",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
}
]
}
],
"engine": {
"name": "default",
"loadjavascript": true,
"loadimages": false,
"timeout": 30,
"viewport": {
"width": 1280,
"height": 600
}
},
"waitafterpageload": null,
"login": {
"enabled": false,
"type": null,
"data": []
},
"logout": null,
"pagination": {
"enabled": true,
"type": "CLICK",
"selector": ".next a",
"maxpages": 50
},
"header": {
"method": "GET",
"encoding": "utf-8",
"data": [
{
"key": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
{
"key": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
},
{
"key": "Accept-Language",
"value": "*"
}
]
},
"autoredirect": {
"enabled": true,
"maxautoredirect": 3
},
"failretry": {
"enabled": true,
"maxtry": 3,
"tryinterval": 2,
"timeout": 0
},
"proxy": {
"enabled": false,
"type": null,
"reference": null
},
"throttling": {
"enabled": false,
"type": null,
"seconds": 0
},
"formsubmit": {
"enabled": false,
"data": []
},
"profiles": null,
"meta": null,
"input": {
"type": "SOURCE",
"reference": null
}
},
"agent_id": "gowlyyg3dp",
"project_id": null,
"name": "Books scraping agent",
"description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
"type": "scraping",
"version": 2,
"created_at": "2019-03-09T03:01:19",
"updated_at": "2019-03-09T03:03:07",
"cron_expression": null,
"schedule_description": null,
"next_auto_run_at": null,
"access_group_id": 0
}
Update a scraping agent
Endpoint:
Method: PUT
URL: https://api.agenty.com/v1/agents/scraping/{{AGENT_ID}}
Headers:
Key | Value | Description |
---|---|---|
Content-Type | application/json |
Query params:
Key | Value | Description |
---|---|---|
apikey | {{API_KEY}} |
Body:
{
"name": "Books scraping agent",
"description": "This agent will extract the product list, prices, image and detail page hyperlink from books.toscrape.com website",
"type": "scraping",
"config": {
"sourceurl": "http://books.toscrape.com/",
"collections": [
{
"name": "Collection1",
"fields": [
{
"name": "PRODUCT_NAME",
"type": "CSS",
"selector": "h3 a",
"extract": "TEXT",
"attribute": null,
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [],
"formatter": []
},
{
"name": "PRICE",
"type": "CSS",
"selector": ".price_color",
"extract": "TEXT",
"attribute": "",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": null,
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "IMAGE",
"type": "CSS",
"selector": ".thumbnail",
"extract": "ATTR",
"attribute": "src",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
},
{
"name": "DETAILS_PAGE_URL",
"type": "CSS",
"selector": ".product_pod h3 a",
"extract": "ATTR",
"attribute": "href",
"from": null,
"visible": true,
"cleantrim": true,
"joinresult": false,
"postprocessing": [
{
"function": "Insert",
"parameters": [
{
"name": "Input",
"value": "http://books.toscrape.com/"
}
]
}
],
"formatter": []
}
]
}
],
"engine": {
"name": "default",
"loadjavascript": true,
"loadimages": false,
"timeout": 30,
"viewport": {
"width": 1280,
"height": 600
}
},
"waitafterpageload": null,
"login": {
"enabled": false,
"type": null,
"data": []
},
"logout": null,
"pagination": {
"enabled": true,
"type": "CLICK",
"selector": ".next a",
"maxpages": 50
},
"header": {
"method": "GET",
"encoding": "utf-8",
"data": [
{
"key": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
{
"key": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
},
{
"key": "Accept-Language",
"value": "*"
}
]
},
"autoredirect": {
"enabled": true,
"maxautoredirect": 3
},
"failretry": {
"enabled": true,
"maxtry": 3,
"tryinterval": 2,
"timeout": 0
},
"proxy": {
"enabled": false,
"type": null,
"reference": null
},
"throttling": {
"enabled": false,
"type": null,
"seconds": 0
},
"formsubmit": {
"enabled": false,
"data": []
},
"meta": null,
"input": {
"type": "SOURCE",
"reference": null
}
}
}
Responses:
Status: OK | Code: 200
{
"status_code": 200,
"message": "Agent with id: gowlyyg3dp updated successfully"
}