一句命令就行,主要是搭配RSS使用
docker run -d -p 3000:3000 ghcr.io/yhdsl/huginn:latest
这次主要是为了自定义爬取内容
筛选掉一些我用不上的,比如说奶粉啥的
{
"schema_version": 1,
"name": "什么值得买榜单",
"description": "关键词里面自己修改",
"source_url": false,
"guid": "3038bbb808e3628363d6d97ea85b50d5",
"tag_fg_color": "#ffffff",
"tag_bg_color": "#f33535",
"icon": "gear",
"exported_at": "2024-01-19T06:25:19Z",
"agents": [
{
"type": "Agents::TriggerAgent",
"name": "什么值得买-筛选数据(关键词)",
"disabled": false,
"guid": "0635876a2d42933095f5463f1b0d95bc",
"options": {
"expected_receive_period_in_days": "2",
"keep_event": "true",
"rules": [
{
"type": "!regex",
"value": "酒|咖啡|手机|收藏|窖|过期|水饺|抽纸|老抽|生抽|牛肉|鸭|话费|奶粉|生蚝|羊肉|海鲜|螺狮粉",
"path": "title"
}
]
},
"keep_events_for": 259200,
"propagate_immediately": true
},
{
"type": "Agents::WebsiteAgent",
"name": "什么值得买-获取数据",
"disabled": false,
"guid": "c693a156bc391111f3bd6ff08fb1ced2",
"options": {
"expected_update_period_in_days": "2",
"url": "{{ url }}",
"type": "html",
"mode": "all",
"extract": {
"title": {
"xpath": "//*[@id=\"feed-main\"]/div[2]/div/div[1]/h1",
"value": "normalize-space(.) "
},
"content": {
"xpath": "//*[@id=\"feed-main\"]/div[3]/article/div[1]",
"value": "normalize-space(.) "
},
"photo": {
"xpath": "//*[@id=\"feed-main\"]/div[2]/a/img",
"value": "@src"
},
"worth": {
"xpath": "//*[@id=\"rating_worthy_num\"]",
"value": "normalize-space(.) "
},
"worth_percent": {
"xpath": "//*[@id=\"rating_all_num\"]",
"value": "normalize-space(.) "
},
"comment": {
"xpath": "//*[@id=\"content\"]/div/div[1]/div[3]/a/span",
"value": "normalize-space(.) "
},
"not_worth": {
"xpath": "//*[@id=\"rating_unworthy_num\"]",
"value": "normalize-space(.) "
},
"link_to_buy": {
"xpath": "//*[@id=\"feed-main\"]/div[2]/a",
"value": "@href"
}
}
},
"schedule": "every_12h",
"keep_events_for": 259200,
"propagate_immediately": true
},
{
"type": "Agents::WebsiteAgent",
"name": "什么值得买-源数据",
"disabled": false,
"guid": "c7c2f2cd1fff9b75c390fb6d1a3f0d54",
"options": {
"expected_update_period_in_days": "2",
"url": "https://faxian.smzdm.com/h2s0t0f0c0p1/",
"type": "html",
"mode": "on_change",
"extract": {
"url": {
"xpath": "//*[@id=\"feed-main-list\"]/li/div/div[1]/a[1]",
"value": "@href"
},
"title": {
"xpath": "//*[@id=\"feed-main-list\"]/li/div/h5/a",
"value": "normalize-space(.)"
},
"img": {
"xpath": "//*[@id=\"feed-main-list\"]/li/div/div[1]/a[1]/img",
"value": "@src"
},
"price": {
"xpath": "//*[@id=\"feed-main-list\"]/li/div/div[2]",
"value": "normalize-space(.)"
},
"link": {
"xpath": "//*[@id=\"feed-main-list\"]/li/div/div[5]/div[2]/div/div/a[1]",
"value": "@href"
}
}
},
"schedule": "every_10m",
"keep_events_for": 259200,
"propagate_immediately": true
},
{
"type": "Agents::DataOutputAgent",
"name": "什么值得买-RSS",
"disabled": false,
"guid": "cc405cd989e17453bb9f5aacb0ab7ab3",
"options": {
"secrets": [
"smzdm"
],
"expected_receive_period_in_days": 2,
"template": {
"title": "什么值得买-热门榜",
"description": "{{content}}",
"item": {
"title": "{{title}}",
"description": "<p>{{content}}</p><p>值{{worth}} 不值{{not_worth}} {{worth_percent}}</p><div class=\"img_description\"></div><p></p><img src=\"{{photo}}\" referrerpolicy=\"no-referrer\"><div class=\"img_description\"></div><p><a href=\"{{link_to_buy}}\">{{购买链接}}</a></p>",
"link": "{{link_to_buy}}"
},
"link": "{{link_to_buy}}"
},
"ns_media": "true"
},
"propagate_immediately": true
}
],
"links": [
{
"source": 0,
"receiver": 1
},
{
"source": 1,
"receiver": 3
},
{
"source": 2,
"receiver": 0
}
],
"control_links": [
]
}
标签:xpath,feed,2024.01,19,value,爬取,div,main,id
From: https://www.cnblogs.com/mokou/p/17974597