前置准备
用讯飞大模型3.5搭建好应用,具体操作可以看我的这篇:讯飞星火大模型API,实名认证免费领一年有效期的200万Token,在控制台的左侧有星火知识库,实名认证过就可以开通免费的部分。用这个纯粹是因为免费,关于这个大模型的使用体验啥的不做评价,大家可以也选择自己喜欢的其他模型,如使用其他模型则下文代码中的API接口调用部分需要自行根据所选模型的文档说明进行调整
文档准备
爬虫demo爬取的百度百科——藜麦数据,这个的demo有bug,百度百科网页的class_隔一段时间会变,如果执行报错了就打开百度百科——藜麦数据的网页源码找到这一段内容的类名替换即可,不会操作的话可以留言我出教程。(因为我还没学过这块所以只会替换类名这样的傻瓜式操作,如果有更好的方法感谢各位大佬的指点)
#文档准备
import requests
from bs4 import BeautifulSoup
url = "https://baike.baidu.com/item/%E8%97%9C%E9%BA%A6/5843874"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
# 获取词条名称
title = soup.find("div", class_="lemmaTitleBox_dR4Nr").find("h1").text
summary_list = soup.find('div', class_='J-lemma-content').find_all('div',class_='para_VW7X7 content_V8j6o MARK_MODULE')
print('词条:'+ title)
print('简介:')
with open("./藜.txt","w") as file: ##打开读写文件,逐行将列表读入文件内
for summary in summary_list:
file.write(summary.text+"\n\n")
print(summary.text)
文档上传
# -*- coding:utf-8 -*-
import hashlib
import base64
import hmac
import time
import random
from urllib.parse import urlencode
import json
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
class Document_Upload:
def __init__(self, APPId, APISecret, timestamp):
self.APPId = APPId
self.APISecret = APISecret
self.Timestamp = timestamp
def get_origin_signature(self):
m2 = hashlib.md5()
data = bytes(self.APPId + self.Timestamp, encoding="utf-8")
m2.update(data)
checkSum = m2.hexdigest()
return checkSum
def get_signature(self):
# 获取原始签名
signature_origin = self.get_origin_signature()
# 使用加密键加密文本
signature = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha1).digest()
# base64密文编码
signature = base64.b64encode(signature).decode(encoding='utf-8')
return signature
def get_header(self):
signature = self.get_signature()
header = {
"appId": self.APPId,
"timestamp": self.Timestamp,
"signature": signature,
}
return header
# 提交网络文件
def get_body(self):
body = {
"file": "",
"url": "文件网络地址 例如: https://xxx.xx.com/xxx.pdf",
"fileName": "文件名, 例如:xxx.pdf",
"fileType": "wiki",
"callbackUrl": "your_callbackUrl"
}
form = MultipartEncoder(
fields=body,
boundary='------------------' + str(random.randint(1e28, 1e29 - 1))
)
return form
# 提交本地文件,注意File是字符串比如这里的File为"./藜.txt"
def get_files_and_body(self,File):
body = {
"url": "",
"fileName": File,
"fileType": "wiki",
"needSummary": False,
"stepByStep": False,
"callbackUrl": "your_callbackUrl",
}
files = {'file': open(File, 'rb')}
return files, body
def getFileId(Id,Secret,FileId):
# 先去 开放平台控制台(https://console.xfyun.cn)创建应用,获取下列应用信息进行替换
APPId = Id
APISecret = Secret
curTime = str(int(time.time()))
request_url = "https://chatdoc.xfyun.cn/openapi/fileUpload"
document_upload = Document_Upload(APPId, APISecret, curTime)
headers = document_upload.get_header()
# ******************提交网络文件
# body = document_upload.get_body()
# headers['Content-Type'] = body.content_type
# response = requests.post(request_url, data=body, headers=headers)
# ******************提交本地文件
files, body = document_upload.get_files_and_body(File)
response = requests.post(request_url, files=files, data=body, headers=headers)
file_id = response.json()['data']['fileId']
return file_id
# 文档上传成功
问答检索
# -*- coding:utf-8 -*-
import hashlib
import base64
import hmac
import time
from urllib.parse import urlencode
import json
import websocket
import _thread as thread
import ssl
class Document_Q_And_A:
def __init__(self, APPId, APISecret, TimeStamp, OriginUrl):
self.appId = APPId
self.apiSecret = APISecret
self.timeStamp = TimeStamp
self.originUrl = OriginUrl
def get_origin_signature(self):
m2 = hashlib.md5()
data = bytes(self.appId + self.timeStamp, encoding="utf-8")
m2.update(data)
checkSum = m2.hexdigest()
return checkSum
def get_signature(self):
# 获取原始签名
signature_origin = self.get_origin_signature()
# print(signature_origin)
# 使用加密键加密文本
signature = hmac.new(self.apiSecret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha1).digest()
# base64密文编码
signature = base64.b64encode(signature).decode(encoding='utf-8')
# print(signature)
return signature
def get_header(self):
signature = self.get_signature()
header = {
"Content-Type": "application/json",
"appId": self.appId,
"timestamp": self.timeStamp,
"signature": signature
}
return header
def get_url(self):
signature = self.get_signature()
header = {
"appId": self.appId,
"timestamp": self.timeStamp,
"signature": signature
}
return self.originUrl + "?" + f'appId={self.appId}×tamp={self.timeStamp}&signature={signature}'
# 使用urlencode会导致签名乱码
# return self.originUrl + "?" + urlencode(header)
def get_body(self,content,FileId):
data = {
"chatExtends": {
"wikiPromptTpl": "请将以下内容作为已知信息:\n<wikicontent>\n请根据以上内容回答用户的问题。\n问题:<wikiquestion>\n回答:",
"wikiFilterScore": 0.83,
"temperature": 0.5
},
"fileIds": [
FileId
],
"messages": [
{
"role": "user",
"content": content
}
]
}
return data
# 收到websocket错误的处理
def on_error(ws, error):
print("### error:", error)
# 收到websocket关闭的处理
def on_close(ws, close_status_code, close_msg):
print("### closed ###")
print("关闭代码:", close_status_code)
print("关闭原因:", close_msg)
# 收到websocket连接建立的处理
def on_open(ws):
thread.start_new_thread(run, (ws,))
def run(ws, *args):
data = json.dumps(ws.question)
ws.send(data)
# 收到websocket消息的处理
def on_message(ws, message):
# print(message)
data = json.loads(message)
code = data['code']
if code != 0:
print(f'请求错误: {code}, {data}')
ws.close()
else:
content = data["content"]
status = data["status"]
# print(f'status = {status}')
print(content, end='')
if status == 2:
ws.close()
def GPT(Id, Secret,FileId,content):
# 先去 开放平台控制台(https://console.xfyun.cn)创建应用,获取下列应用信息进行替换
APPId = Id
APISecret = Secret
curTime = str(int(time.time()))
OriginUrl = "wss://chatdoc.xfyun.cn/openapi/chat"
document_Q_And_A = Document_Q_And_A(APPId, APISecret, curTime, OriginUrl)
wsUrl = document_Q_And_A.get_url()
print(wsUrl)
headers = document_Q_And_A.get_header()
body = document_Q_And_A.get_body(content,FileId)
# 禁用WebSocket库的跟踪功能,使其不再输出详细的调试信息。
websocket.enableTrace(False)
ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
ws.appid = APPId
ws.question = body
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
# 文档问答成功
Id和Secret在讯飞开放平台的控制台星火知识库页面找到
if __name__ == '__main__':
Id = '*******'
Secret = '*****'
File = './藜.txt'
FileId = getFileId(Id,Secret,File)
content = '藜怎么防治虫害?'
GPT(Id, Secret,FileId,content)
输出结果
### error: 'content'
### error: 'content'
藜麦的常见虫害有象甲虫、金针虫、蝼蛄、黄条跳甲、横纹菜蝽、萹蓄齿胫叶甲、潜叶蝇、蚜虫、夜蛾等。对于这些虫害的防治,可以采取以下方法:
1. 每亩用3%的辛硫磷颗粒剂2-2.5千克于耕地前均匀撒施,随耕地翻入土中。
2. 每亩用40%的辛硫磷乳油250毫升,加水1-2千克,拌细土20-25千克配成毒土,撒施地面翻入土中,这样可以有效防治地下害虫。
此外,在藜麦8叶龄时,需要将行中的杂草、病株及残株拔掉,提高整齐度和增加通风透光,同时进行根部培土,以防止后期倒伏。### closed ###
关闭代码: None
关闭原因: None
标签:body,知识库,get,Demo,self,浅学,signature,import,def
From: https://blog.csdn.net/qq_44894943/article/details/137050618