2024-08-14SiteCrawler 网页源码解析(ChatApi)

该接口来自于ChatApi,依然是需要扣除相应tokens的,所需要的字段说明

1,chatapi-token:需要填入你在chatapi中申请的令牌

2,url:需要请求的url请带有标准的http头,例如https://www.google.com

3,js_render:

是否支持js渲染,填入False或者True。

建议默认为False,否则会加载过多内容,

部分页面不支持直接请求,所以需要填入True后再次测试。

导入如下插件配置,并且在工作流中灵活使用。

{
“nodes”: [
{
“nodeId”: “pluginInput”,
“name”: “自定义插件输入”,
“intro”: “可以配置插件需要哪些输入,利用这些输入来运行插件”,
“avatar”: “core/workflow/template/workflowStart”,
“flowNodeType”: “pluginInput”,
“showStatus”: false,
“position”: {
“x”: 140.82673714191208,
“y”: -298.5868443770263
},
“version”: “481”,
“inputs”: [
{
“renderTypeList”: [
“reference”
],
“selectedTypeIndex”: 0,
“valueType”: “string”,
“canEdit”: true,
“key”: “chatapi-token”,
“label”: “chatapi-token”,
“description”: “获取chatapi中的令牌写入”,
“required”: true
},
{
“renderTypeList”: [
“reference”
],
“selectedTypeIndex”: 0,
“valueType”: “string”,
“canEdit”: true,
“key”: “url”,
“label”: “url”,
“description”: “需要抓取的url链接,请严格填入标准地址,例如https://www.google.com/”,
“required”: true
},
{
“renderTypeList”: [
“reference”
],
“selectedTypeIndex”: 0,
“valueType”: “string”,
“canEdit”: true,
“key”: “js_render”,
“label”: “js_render”,
“description”: “是否支持js渲染,填入False或者True。\n建议默认为False,否则会加载过多内容,\n部分页面不支持直接请求,所以需要填入True后再次测试。”,
“required”: true
}
],
“outputs”: [
{
“id”: “chatapi-token”,
“valueType”: “string”,
“key”: “chatapi-token”,
“label”: “chatapi-token”,
“type”: “hidden”
},
{
“id”: “text”,
“valueType”: “string”,
“key”: “url”,
“label”: “url”,
“type”: “hidden”
},
{
“id”: “js_render”,
“valueType”: “string”,
“key”: “js_render”,
“label”: “js_render”,
“type”: “hidden”
}
]
},
{
“nodeId”: “pluginOutput”,
“name”: “自定义插件输出”,
“intro”: “自定义配置外部输出,使用插件时,仅暴露自定义配置的输出”,
“avatar”: “core/workflow/template/pluginOutput”,
“flowNodeType”: “pluginOutput”,
“showStatus”: false,
“position”: {
“x”: 1730.6356700358813,
“y”: -192.42221566861426
},
“version”: “481”,
“inputs”: [
{
“renderTypeList”: [
“reference”
],
“valueType”: “string”,
“canEdit”: true,
“key”: “result”,
“label”: “result”,
“description”: “”,
“value”: [
“fxeuqI9GnWvO”,
“qLUQfhG0ILRX”
]
}
],
“outputs”: []
},
{
“nodeId”: “bqKyItJbuSk0”,
“name”: “HTTP 请求”,
“intro”: “可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)”,
“avatar”: “core/workflow/template/httpRequest”,
“flowNodeType”: “httpRequest468”,
“showStatus”: true,
“position”: {
“x”: 647.4675982173143,
“y”: -569.4085725632642
},
“version”: “481”,
“inputs”: [
{
“key”: “system_addInputParam”,
“renderTypeList”: [
“addInputParam”
],
“valueType”: “dynamic”,
“label”: “”,
“required”: false,
“description”: “core.module.input.description.HTTP Dynamic Input”,
“customInputConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: true
}
},
{
“key”: “system_httpMethod”,
“renderTypeList”: [
“custom”
],
“valueType”: “string”,
“label”: “”,
“value”: “POST”,
“required”: true
},
{
“key”: “system_httpReqUrl”,
“renderTypeList”: [
“hidden”
],
“valueType”: “string”,
“label”: “”,
“description”: “core.module.input.description.Http Request Url”,
“placeholder”: “https://api.ai.com/getInventory”,
“required”: false,
“value”: “https://chatapi.aisws.com/v1/chat/completions”
},
{
“key”: “system_httpHeader”,
“renderTypeList”: [
“custom”
],
“valueType”: “any”,
“value”: [
{
“key”: “content-type”,
“type”: “string”,
“value”: “application/json”
},
{
“key”: “Authorization”,
“type”: “string”,
“value”: “{{chatapi-token}}”
}
],
“label”: “”,
“description”: “core.module.input.description.Http Request Header”,
“placeholder”: “core.module.input.description.Http Request Header”,
“required”: false
},
{
“key”: “system_httpParams”,
“renderTypeList”: [
“hidden”
],
“valueType”: “any”,
“value”: [],
“label”: “”,
“required”: false
},
{
“key”: “system_httpJsonBody”,
“renderTypeList”: [
“hidden”
],
“valueType”: “any”,
“value”: “{\r\n \”model\”: \”sitecrawler\”,\r\n \”messages\”: [\r\n {\r\n \”role\”: \”user\”,\r\n \”content\”: {\r\n \”requesturl\”: \”{{url}}\”,\r\n \”javascript_rendering\”: \”{{js_render}}\”\r\n }\r\n }\r\n ]\r\n}”,
“label”: “”,
“required”: false
},
{
“renderTypeList”: [
“reference”
],
“valueType”: “string”,
“canEdit”: true,
“key”: “chatapi-token”,
“label”: “chatapi-token”,
“customInputConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: true
},
“required”: true,
“value”: [
“pluginInput”,
“chatapi-token”
]
},
{
“renderTypeList”: [
“reference”
],
“valueType”: “string”,
“canEdit”: true,
“key”: “url”,
“label”: “url”,
“customInputConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: true
},
“required”: true,
“value”: [
“pluginInput”,
“text”
]
},
{
“renderTypeList”: [
“reference”
],
“valueType”: “string”,
“canEdit”: true,
“key”: “js_render”,
“label”: “js_render”,
“customInputConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: true
},
“required”: true,
“value”: [
“pluginInput”,
“js_render”
]
}
],
“outputs”: [
{
“id”: “error”,
“key”: “error”,
“label”: “请求错误”,
“description”: “HTTP请求错误信息,成功时返回空”,
“valueType”: “object”,
“type”: “static”
},
{
“id”: “httpRawResponse”,
“key”: “httpRawResponse”,
“label”: “原始响应”,
“required”: true,
“description”: “HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。”,
“valueType”: “any”,
“type”: “static”
},
{
“id”: “system_addOutputParam”,
“key”: “system_addOutputParam”,
“type”: “dynamic”,
“valueType”: “dynamic”,
“label”: “”,
“customFieldConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: false
}
},
{
“id”: “oUUcZSdecHln”,
“valueType”: “string”,
“type”: “dynamic”,
“key”: “choices”,
“label”: “choices”
}
]
},
{
“nodeId”: “fxeuqI9GnWvO”,
“name”: “代码运行”,
“intro”: “执行一段简单的脚本代码,通常用于进行复杂的数据处理。”,
“avatar”: “core/workflow/template/codeRun”,
“flowNodeType”: “code”,
“showStatus”: true,
“position”: {
“x”: 1189.8054509734725,
“y”: -459.8882963268526
},
“version”: “482”,
“inputs”: [
{
“key”: “system_addInputParam”,
“renderTypeList”: [
“addInputParam”
],
“valueType”: “dynamic”,
“label”: “”,
“required”: false,
“description”: “这些变量会作为代码的运行的输入参数”,
“customInputConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: true
}
},
{
“key”: “codeType”,
“renderTypeList”: [
“hidden”
],
“label”: “”,
“value”: “js”
},
{
“key”: “code”,
“renderTypeList”: [
“custom”
],
“label”: “”,
“value”: “function main({ choices }) {\n // 首先解析 choices 字符串为 JSON 对象\n let parsedChoices;\n try {\n parsedChoices = JSON.parse(choices);\n } catch (e) {\n return {\n result: \”Invalid input: choices cannot be parsed as JSON.\”\n };\n }\n\n // 检查解析后的数据是否符合预期\n if (!Array.isArray(parsedChoices) || parsedChoices.length === 0) {\n return {\n result: \”Invalid input: parsed choices is not an array or is empty.\”\n };\n }\n\n // 获取 message.content 对象\n const message = parsedChoices[0].message;\n if (!message || !message.content) {\n return {\n result: \”Invalid input: message or content is missing.\”\n };\n }\n\n const content = message.content;\n\n // 提取 extracted_html 和 error_flag 数据\n const extractedHtml = content.extracted_html || \”\”;\n const errorFlag = content.error_flag || \”\”;\n\n // 判断 extracted_html 是否为空,如果为空,则返回 error_flag 的值\n const result = extractedHtml ? extractedHtml : errorFlag;\n\n // 返回结果\n return {\n result: result\n };\n}\n”
},
{
“renderTypeList”: [
“reference”
],
“valueType”: “string”,
“canEdit”: true,
“key”: “choices”,
“label”: “choices”,
“customInputConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: true
},
“required”: true,
“value”: [
“bqKyItJbuSk0”,
“oUUcZSdecHln”
]
}
],
“outputs”: [
{
“id”: “system_rawResponse”,
“key”: “system_rawResponse”,
“label”: “完整响应数据”,
“valueType”: “object”,
“type”: “static”
},
{
“id”: “error”,
“key”: “error”,
“label”: “运行错误”,
“description”: “代码运行错误信息,成功时返回空”,
“valueType”: “object”,
“type”: “static”
},
{
“id”: “system_addOutputParam”,
“key”: “system_addOutputParam”,
“type”: “dynamic”,
“valueType”: “dynamic”,
“label”: “”,
“customFieldConfig”: {
“selectValueTypeList”: [
“string”,
“number”,
“boolean”,
“object”,
“arrayString”,
“arrayNumber”,
“arrayBoolean”,
“arrayObject”,
“any”,
“chatHistory”,
“datasetQuote”,
“dynamic”,
“selectApp”,
“selectDataset”
],
“showDescription”: false,
“showDefaultValue”: false
},
“description”: “将代码中 return 的对象作为输出,传递给后续的节点。变量名需要对应 return 的 key”
},
{
“id”: “qLUQfhG0ILRX”,
“type”: “dynamic”,
“key”: “result”,
“valueType”: “string”,
“label”: “result”
}
]
}
],
“edges”: [
{
“source”: “pluginInput”,
“target”: “bqKyItJbuSk0”,
“sourceHandle”: “pluginInput-source-right”,
“targetHandle”: “bqKyItJbuSk0-target-left”
},
{
“source”: “bqKyItJbuSk0”,
“target”: “fxeuqI9GnWvO”,
“sourceHandle”: “bqKyItJbuSk0-source-right”,
“targetHandle”: “fxeuqI9GnWvO-target-left”
},
{
“source”: “fxeuqI9GnWvO”,
“target”: “pluginOutput”,
“sourceHandle”: “fxeuqI9GnWvO-source-right”,
“targetHandle”: “pluginOutput-target-left”
}
]
}
请登录后发表评论

    没有回复内容