概要:本文调用多个api接口来进行不同类型(数据文件)情绪分析处理,并利用flask框架与前端联调将自己的情绪分析项目部署到服务器端。。实现下图功能。(第一篇文章小小记录下,要是有帮助就点个赞叭)
一. 免费申请百度api并调用
首先在百度智能云中申请免费的自然语言处理api
选择自己需要的接口领取(我的已经领取过了)领取链接
接着点击应用列表创建应用获得API Key和Secret Key,并通过下方的API在线调试进行鉴权。
接下来就是在自己的项目中调用api接口。(可以参考百度API在线调试中的技术文档也可以查看下方我自己项目中的调用)
# 设置api调用密钥
API_KEY = "AFjZINkuAXFRfnvBAlKoBj0v"
SECRET_KEY = "TXNz7MClhlihE4HZ9JYRv7ZMNkpfOUVO"
# 获得上传路由的token
def get_access_token():
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
return str(requests.post(url, params=params).json().get("access_token"))
#调用评论观点抽取api
def analyze_comments(comments):
url = "https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag?charset=UTF-8&access_token=" + get_access_token()
payload = json.dumps({
"text": comments,
"type": 9 # 此处类型选择9表示房产类别
})
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
二。利用flask框架构建后端,HTML作为前端
首先确立自己用到哪些python库,下面是笔者项目所用到的依赖。在项目根目录下创建requirements.txt文件
Flask==2.0.1
requests==2.26.0
Jinja2==3.0.1
beautifulsoup4==4.9.3
lxml==4.6.3
from flask import Flask, request, render_template, redirect, url_for, session
import requests
import json
from bs4 import BeautifulSoup
import re
import random
import csv
根目录下创建运行程序app.py,前端目录templates(存储前端代码),static目录存储css文件。
设计调用百度api的函数:
首先定义get_emotion函数,该函数目的是调用两个百度api接口
def get_emotion(data, api_type):
# 定义百度API情感分析的token值和URL值
token = '24.7dfc12283b63985bdc6bd19c33d0c39c.2592000.1723607279.282335-94742082'
if api_type == 'sentiment':
url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?charset=UTF-8&access_token={}'.format(token)
elif api_type == 'emotion':
url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion?access_token={}'.format(token)
else:
return None
#调用评论观点抽取api
def analyze_comments(comments):
url = "https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag?charset=UTF-8&access_token=" + get_access_token()
payload = json.dumps({
"text": comments,
"type": 9 # 此处类型选择9表示房产类别
})
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
1.直接在网站上输入文本进行情绪分析
- 设计根据不同api_type渲染不同模板。这里利用flask框架定义功能1的逻辑
# 分析直接输入的文本路由
@app.route('/input', methods=['GET', 'POST'])
def input_text():
if request.method == 'POST':
text = request.form['text']
api_type = request.form['api_type']
result = get_emotion(text, api_type)
if result:
if api_type == 'sentiment':
sentiment_text = "积极" if result['sentiment'] == 2 else ("中性" if result['sentiment'] == 1 else "消极")
return render_template('result.html', text=None, result=result, sentiment_text=sentiment_text)
elif api_type == 'emotion':
return render_template('result_emotion.html', text=None, result=result)
else:
return "情感分析失败", 500
return render_template('input.html')
设计该路由对应的前端input.html(前端不会有点丑陋)。定义了两个按钮可以选择调用何种api
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>直接输入文本分析</title>
<link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
</head>
<body>
<h1>直接输入文本分析</h1>
<form action="/input" method="post">
<div class="form-group">
<label for="text">文本:</label>
<textarea id="text" name="text" required></textarea>
</div>
<div class="form-group">
<label for="api_type_text">API类型:</label>
<select id="api_type_text" name="api_type">
<option value="sentiment">情感倾向分析</option>
<option value="emotion">对话情绪识别</option>
</select>
</div>
<button type="submit">分析文本</button>
</form>
</div>
</body>
</html>
2.输入网站地址利用 beautifulsoup4库解析网页源码获得文本再调用api进行情绪分析
此处由于设计到爬虫,防止被所爬网页检测定义了多个user_agent模拟不同用户
# 预定义的User-Agent列表
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
]
或所爬取到的内容太大超过百度api的输入字数上线则需要进行文章切割,并遍历切分后的文本,将得到的积极指数和消极指数累加,最后根据累加结果计算情感类别并返回。
大概逻辑如下:
# 将text按照lenth长度分为不同的几段
def cut_text(text, lenth):
textArr = re.findall('.{' + str(lenth) + '}', text)
textArr.append(text[(len(textArr) * lenth):])
return textArr # 返回多段值
def get_emotion(data, api_type):
# 定义百度API情感分析的token值和URL值
token = '24.7dfc12283b63985bdc6bd19c33d0c39c.2592000.1723607279.282335-94742082'
if api_type == 'sentiment':
url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?charset=UTF-8&access_token={}'.format(token)
elif api_type == 'emotion':
url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion?access_token={}'.format(token)
else:
return None
if len(data.encode()) < 2048:
new_each = {'text': data} # 将文本数据保存在变量new_each中,data的数据类型为string
new_each = json.dumps(new_each)
res = requests.post(url, data=new_each) # 利用URL请求百度情感分析API
res_text = res.text # 保存分析得到的结果,以string格式保存
result = res_text.find('items') # 查找得到的结果中是否有items这一项
if result != -1: # 如果结果不等于-1,则说明存在items这一项
json_data = json.loads(res.text)
if api_type == 'sentiment':
confidence = json_data['items'][0]['confidence'] # 得到置信度
negative = json_data['items'][0]['negative_prob'] # 得到消极指数值
positive = json_data['items'][0]['positive_prob'] # 得到积极指数值
sentiment = json_data['items'][0]['sentiment'] # 得到情感类别
return {
'confidence': confidence,
'positive': positive,
'negative': negative,
'sentiment': sentiment
}
elif api_type == 'emotion':
emotions = json_data['items']
return {
'emotions': emotions
}
else:
return None
else:
print("文章切分")
data = cut_text(data, 1500) # 如果文章字节长度大于1500,则切分
if api_type == 'sentiment':
sum_positive = 0.0 # 定义积极指数值总合
sum_negative = 0.0 # 定义消极指数值总和
for each in data: # 遍历每一段文字
new_each = {'text': each} # 将文本数据保存在变量new_each中
new_each = json.dumps(new_each)
res = requests.post(url, data=new_each) # 利用URL请求百度情感分析API
res_text = res.text # 保存分析得到的结果,以string格式保存
result = res_text.find('items')
if result != -1:
json_data = json.loads(res.text) # 如果结果不等于-1,则说明存在items这一项
positive = json_data['items'][0]['positive_prob'] # 得到积极指数值
negative = json_data['items'][0]['negative_prob'] # 得到消极指数值
sum_positive += positive # 积极指数值加和
sum_negative += negative # 消极指数值加和
sentiment = 2 if sum_positive > sum_negative else (1 if sum_positive == sum_negative else 0)
return {
'confidence': None,
'positive': sum_positive,
'negative': sum_negative,
'sentiment': sentiment
}
elif api_type == 'emotion':
emotions = []
for each in data:
new_each = {'text': each} # 将文本数据保存在变量new_each中
new_each = json.dumps(new_each)
res = requests.post(url, data=new_each) # 利用URL请求百度情感分析API
res_text = res.text # 保存分析得到的结果,以string格式保存
result = res_text.find('items')
if result != -1:
json_data = json.loads(res.text) # 如果结果不等于-1,则说明存在items这一项
emotions.extend(json_data['items'])
return {
'emotions': emotions
}
然后定义函数 get_html.(此为获取网站源码并解析的逻辑)
- 使用
random.choice(USER_AGENTS)
从预定义的USER_AGENTS
列表中随机选择一个 User-Agent 字符串。- 将这个 User-Agent 放入
headers
字典中,用于模拟浏览器访问。- 使用
requests.get
方法发送一个 GET 请求到指定的url
,并带上headers
。- 从响应对象
response
中提取网页的 HTML 内容,并存储在变量html
中。- 使用
BeautifulSoup
库解析 HTML 内容,解析器选择lxml
。- 选择所有
<p>
标签并提取其文本内容并最后返回提取的文本内容。def get_html(url): headers = { 'User-Agent': random.choice(USER_AGENTS) # 随机选择一个User-Agent } # 模拟浏览器访问 response = requests.get(url, headers=headers) # 请求访问网站 response.encoding = 'utf-8' # 手动指定编码格式为utf-8 html = response.text # 获取网页源码 soup = BeautifulSoup(html, 'lxml') # 初始化BeautifulSoup库,并设置解析器 a = soup.select('p') text = "" for i in a: text += i.text return text
前端代码crawling.html如下
<!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8"> <title>抓取网站文本分析</title> <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}"> </head> <body> <h1>抓取网站文本分析</h1> <form action="/crawling" method="post"> <label for="url">URL:</label> <input type="text" id="url" name="url" required> <br> <label for="api_type">API类型:</label> <select id="api_type" name="api_type"> <option value="sentiment">情感分析</option> <option value="emotion">情绪分析</option> </select> <br> <input type="checkbox" id="show_text" name="show_text" value="True"> <label for="show_text">显示文本</label> <br> <button type="submit">分析</button> </form> </body> </html>
3.上传csv数据集进行情感分析(评论数据要在名为content列下)
定义上传路由@app.route('/upload', methods=['GET', 'POST'])。
def upload(): if request.method == 'POST': file = request.files['file'] if file and file.filename.endswith('.csv'): comments = [] csv_reader = csv.reader(file.read().decode('utf-8').splitlines()) header = next(csv_reader) # 跳过标题行 if 'content' not in header: return "上传的csv数据集文件中必须有包含的评论列", 400 content_index = header.index('content') # 定义评论列名为“content” for row in csv_reader: if len(row) > content_index: # 确保该行有足够的列 comments.append(row[content_index]) comments_text = "\n".join(comments)
再调用
analyze_comments
函数对合并后的评论进行分析,并将分析结果传递给模板result_pinglun.html
进行渲染并返回给用户result = analyze_comments(comments_text) positive_comments = set() negative_comments = set() for item in result.get('items', []): comment_str = f"{item['prop']}: {item['adj']} - {item['abstract']}" if item['sentiment'] == 2: # 2 表示积极情绪 positive_comments.add(comment_str) elif item['sentiment'] == 0: # 0 表示消极情绪 negative_comments.add(comment_str) return render_template('result_pinglun.html', positive_comments=positive_comments, negative_comments=negative_comments) else: return "未找到文件,请选择要上传的文件", 400 return render_template('upload.html')
前端upload.html如下
<!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8"> <title>上传csv数据集</title> <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}"> </head> <body> <h1>请上传数据集</h1> <form action="/upload" method="post" enctype="multipart/form-data"> <input type="file" name="file" accept=".csv"><br> <button type="submit">上传</button> </form> </body> </html>
关于主页的前端设计,笔者是选用的响应式模板进行修改。
下面是部分前端
</div>
<div class="offcanvas-body ms-lg-auto d-flex flex-column h-100">
<ul class="navbar-nav">
<li class="nav-item">
<a class="nav-link">情绪分析小站</a>
</li>
<li class="nav-item dropdown">
<a href="#" data-bs-toggle="dropdown" class="nav-link dropdown-toggle">注册和登录</a>
<ul class="dropdown-menu">
<li class="dropdown dropdown-submenu dropend">
<a href="{{ url_for('register') }}" class="dropdown-item dropdown-toggle">注册</a>
<li class="dropdown dropdown-submenu dropend">
<a href="{{ url_for('login') }}" class="dropdown-item dropdown-toggle">登录</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a href="#" data-bs-toggle="dropdown" class="nav-link dropdown-toggle">功能设计和使用教程</a>
<div class="dropdown-menu dropdown-lg">
<div class="dropdown-lg-content">
<div>
<h6 class="dropdown-header">功能
</h6>
<ul class="list-unstyled">
<li>
<a href="crawling" class="dropdown-item">网站文本抓取</a>
</li>
<li>
<a href="input" class="dropdown-item">输出文本分析</a>
</li>
<li>
<a href="upload" class="dropdown-item">数据集文本分析</a>
</li>
<li>
<a href="tutorial" class="dropdown-item">使用教程</a>
</li>
</ul>
</div>
三。最后由于篇幅已经过长,关于部署到云服务器并通过域名访问网站的实现后期再写。
购买的阿里云香港服务器(不需要备案)
通过宝塔部署flask项目.
有关的Nginx,uwsig配置问题
需要源文件可以找作者拿。
附完整后端代码
from flask import Flask, request, render_template, redirect, url_for, session
import requests
import json
from bs4 import BeautifulSoup
import re
import random
import csv
app = Flask(__name__)
app.secret_key = 'your_secret_key' # 设置一个密钥用于session加密
# 设置api调用密钥
API_KEY = "AFjZINkuAXFRfnvBAlKoBj0v"
SECRET_KEY = "TXNz7MClhlihE4HZ9JYRv7ZMNkpfOUVO"
# 模拟用户数据库
USERS = {
'user1': 'password1',
'user2': 'password2'
}
# 获得上传路由的token
def get_access_token():
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
return str(requests.post(url, params=params).json().get("access_token"))
#调用评论观点抽取api
def analyze_comments(comments):
url = "https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag?charset=UTF-8&access_token=" + get_access_token()
payload = json.dumps({
"text": comments,
"type": 9 # 此处选择类型9
})
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
# 上传路由
@app.route('/upload', methods=['GET', 'POST'])
def upload():
if request.method == 'POST':
file = request.files['file']
if file and file.filename.endswith('.csv'):
comments = []
csv_reader = csv.reader(file.read().decode('utf-8').splitlines())
header = next(csv_reader) # 跳过标题行
if 'content' not in header:
return "上传的csv数据集文件中必须有包含的评论列", 400
content_index = header.index('content') # 定义评论列名为“content”
for row in csv_reader:
if len(row) > content_index: # 确保该行有足够的列
comments.append(row[content_index])
comments_text = "\n".join(comments)
result = analyze_comments(comments_text)
positive_comments = set()
negative_comments = set()
for item in result.get('items', []):
comment_str = f"{item['prop']}: {item['adj']} - {item['abstract']}"
if item['sentiment'] == 2: # 2 表示积极情绪
positive_comments.add(comment_str)
elif item['sentiment'] == 0: # 0 表示消极情绪
negative_comments.add(comment_str)
return render_template('result_pinglun.html', positive_comments=positive_comments, negative_comments=negative_comments)
else:
return "未找到文件,请选择要上传的文件", 400
return render_template('upload.html')
# 主页路由
@app.route('/')
def index():
return render_template('index.html')
# 注册路由
@app.route('/register', methods=['GET', 'POST'])
def register():
if request.method == 'POST':
username = request.form['username']
password = request.form['password']
if username in USERS:
return "用户名已存在", 400
USERS[username] = password
return redirect(url_for('login'))
return render_template('register.html')
# 登录路由
@app.route('/login', methods=['GET', 'POST'])
def login():
if request.method == 'POST':
username = request.form['username']
password = request.form['password']
if username in USERS and USERS[username] == password:
session['username'] = username
return redirect(url_for('index'))
else:
return "登录失败", 401
return render_template('login.html')
# 分析URL路由
@app.route('/crawling', methods=['GET', 'POST'])
def crawling():
if request.method == 'POST':
url = request.form['url']
api_type = request.form['api_type']
show_text = request.form.get('show_text', False)
text = get_html(url)
result = get_emotion(text, api_type)
if result:
if api_type == 'sentiment':
sentiment_text = "积极" if result['sentiment'] == 2 else ("中性" if result['sentiment'] == 1 else "消极")
return render_template('result.html', text=text if show_text else None, result=result, sentiment_text=sentiment_text)
elif api_type == 'emotion':
return render_template('result_emotion.html', text=text if show_text else None, result=result)
else:
return "情感分析失败", 500
return render_template('crawling.html')
# 分析直接输入的文本路由
@app.route('/input', methods=['GET', 'POST'])
def input_text():
if request.method == 'POST':
text = request.form['text']
api_type = request.form['api_type']
result = get_emotion(text, api_type)
if result:
if api_type == 'sentiment':
sentiment_text = "积极" if result['sentiment'] == 2 else ("中性" if result['sentiment'] == 1 else "消极")
return render_template('result.html', text=None, result=result, sentiment_text=sentiment_text)
elif api_type == 'emotion':
return render_template('result_emotion.html', text=None, result=result)
else:
return "情感分析失败", 500
return render_template('input.html')
# 教程使用说明界面
@app.route('/tutorial')
def tutorial():
return render_template('tutorial.html')
# 预定义的User-Agent列表
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
]
# 将text按照lenth长度分为不同的几段
def cut_text(text, lenth):
textArr = re.findall('.{' + str(lenth) + '}', text)
textArr.append(text[(len(textArr) * lenth):])
return textArr # 返回多段值
def get_emotion(data, api_type):
# 定义百度API情感分析的token值和URL值
token = '24.7dfc12283b63985bdc6bd19c33d0c39c.2592000.1723607279.282335-94742082'
if api_type == 'sentiment':
url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?charset=UTF-8&access_token={}'.format(token)
elif api_type == 'emotion':
url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion?access_token={}'.format(token)
else:
return None
if len(data.encode()) < 2048:
new_each = {'text': data} # 将文本数据保存在变量new_each中,data的数据类型为string
new_each = json.dumps(new_each)
res = requests.post(url, data=new_each) # 利用URL请求百度情感分析API
res_text = res.text # 保存分析得到的结果,以string格式保存
result = res_text.find('items') # 查找得到的结果中是否有items这一项
if result != -1: # 如果结果不等于-1,则说明存在items这一项
json_data = json.loads(res.text)
if api_type == 'sentiment':
confidence = json_data['items'][0]['confidence'] # 得到置信度
negative = json_data['items'][0]['negative_prob'] # 得到消极指数值
positive = json_data['items'][0]['positive_prob'] # 得到积极指数值
sentiment = json_data['items'][0]['sentiment'] # 得到情感类别
return {
'confidence': confidence,
'positive': positive,
'negative': negative,
'sentiment': sentiment
}
elif api_type == 'emotion':
emotions = json_data['items']
return {
'emotions': emotions
}
else:
return None
else:
print("文章切分")
data = cut_text(data, 1500) # 如果文章字节长度大于1500,则切分
if api_type == 'sentiment':
sum_positive = 0.0 # 定义积极指数值总合
sum_negative = 0.0 # 定义消极指数值总和
for each in data: # 遍历每一段文字
new_each = {'text': each} # 将文本数据保存在变量new_each中
new_each = json.dumps(new_each)
res = requests.post(url, data=new_each) # 利用URL请求百度情感分析API
res_text = res.text # 保存分析得到的结果,以string格式保存
result = res_text.find('items')
if result != -1:
json_data = json.loads(res.text) # 如果结果不等于-1,则说明存在items这一项
positive = json_data['items'][0]['positive_prob'] # 得到积极指数值
negative = json_data['items'][0]['negative_prob'] # 得到消极指数值
sum_positive += positive # 积极指数值加和
sum_negative += negative # 消极指数值加和
sentiment = 2 if sum_positive > sum_negative else (1 if sum_positive == sum_negative else 0)
return {
'confidence': None,
'positive': sum_positive,
'negative': sum_negative,
'sentiment': sentiment
}
elif api_type == 'emotion':
emotions = []
for each in data:
new_each = {'text': each} # 将文本数据保存在变量new_each中
new_each = json.dumps(new_each)
res = requests.post(url, data=new_each) # 利用URL请求百度情感分析API
res_text = res.text # 保存分析得到的结果,以string格式保存
result = res_text.find('items')
if result != -1:
json_data = json.loads(res.text) # 如果结果不等于-1,则说明存在items这一项
emotions.extend(json_data['items'])
return {
'emotions': emotions
}
def get_html(url):
headers = {
'User-Agent': random.choice(USER_AGENTS) # 随机选择一个User-Agent
} # 模拟浏览器访问
response = requests.get(url, headers=headers) # 请求访问网站
response.encoding = 'utf-8' # 手动指定编码格式为utf-8
html = response.text # 获取网页源码
soup = BeautifulSoup(html, 'lxml') # 初始化BeautifulSoup库,并设置解析器
a = soup.select('p')
text = ""
for i in a:
text += i.text
return text
@app.route('/logout')
def logout():
session.pop('username', None)
return redirect(url_for('index'))
if __name__ == '__main__':
app.run(debug=True)
标签:return,api,Flask,text,附超,json,HTML,result,data
From: https://blog.csdn.net/2401_83561560/article/details/141028321