我用 Dash Python 构建了一个网站,并将 csv 文件的所有数据显示在一个可以过滤的表中。
我希望用户能够从表中提取数据。当没有过滤器时,我希望他能够提取完整的数据,当他过滤数据时,我希望他能够提取过滤后的数据。
为此,我使用 dcc.download 组件,它是工作得很好,我还使用
df
(from
df = pd.read_csv("./data.csv")
) 作为全局变量,以便能够在我的提取回调中重用它。
这是我的代码:
from dash import Dash, dash_table, dcc, html, State
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import csv
import time
import xlsxwriter
from datetime import datetime
from dash_extensions.enrich import Output, DashProxy, Input, MultiplexerTransform
import os
app = DashProxy(external_stylesheets=[dbc.themes.BOOTSTRAP], transforms=[MultiplexerTransform()])
server = app.server
df = pd.read_csv("./data.csv")
df = df.fillna("NaN")
PAGE_SIZE = 20
# Layout
app.layout = html.Div(children=[
dcc.Download(id="download-dataframe-csv"),
dbc.Card([
dbc.CardBody([
dash_table.DataTable(
id='table-sorting-filtering',
columns=[{'name': i, 'id': i} for i in df.columns],
page_current= 0,
page_size= PAGE_SIZE,
page_action='custom',
filter_action='custom',
filter_query='',
sort_action='custom',
sort_mode='multi',
sort_by=[],
style_data={'text-align':'center'},
style_header={
'backgroundColor': 'white',
'fontWeight': 'bold',
'text-align':'center'
},
style_cell={'padding': '5px'},
style_as_list_view=True,
)]
)],
style={"margin-left":"15px", "margin-right":"15px"}
),
dcc.Interval(
id='interval-component',
interval=1*1000, # in milliseconds
n_intervals=0
),
]
)]
)
operators = [['ge ', '>='],
['le ', '<='],
['lt ', '<'],
['gt ', '>'],
['ne ', '!='],
['eq ', '='],
['contains ']]
def split_filter_part(filter_part):
for operator_type in operators:
for operator in operator_type:
if operator in filter_part:
name_part, value_part = filter_part.split(operator, 1)
name = name_part[name_part.find('{') + 1: name_part.rfind('}')]
value_part = value_part.strip()
v0 = value_part[0]
if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
value = value_part[1: -1].replace('\\' + v0, v0)
else:
try:
value = float(value_part)
except ValueError:
value = value_part
return name, operator_type[0].strip(), value
return [None] * 3
# Display data in table and manage filtering
@app.callback(
Output('table-sorting-filtering', 'data'),
Input('table-sorting-filtering', "page_current"),
Input('table-sorting-filtering', "page_size"),
Input('table-sorting-filtering', 'sort_by'),
Input('table-sorting-filtering', 'filter_query'),
Input('interval-component', 'n_intervals'))
def update_table(page_current, page_size, sort_by, filter, n):
global df
global date_time
df = pd.read_csv("./data.csv")
df = df.fillna("NaN")
date_time = last_modification_time_of_csv("./data.csv")
filtering_expressions = filter.split(' && ')
for filter_part in filtering_expressions:
col_name, operator, filter_value = split_filter_part(filter_part)
if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
# these operators match pandas series operator method names
df = df.loc[getattr(df[col_name], operator)(filter_value)]
elif operator == 'contains':
if type(filter_value) is str:
df = df.loc[df[col_name].str.contains(filter_value)]
if len(sort_by):
df = df.sort_values(
[col['column_id'] for col in sort_by],
ascending=[
col['direction'] == 'asc'
for col in sort_by
],
inplace=False
)
page = page_current
size = page_size
return df.iloc[page * size: (page + 1) * size].to_dict('records')
# Export button
@app.callback(
Output("download-dataframe-csv", "data"),
Input("button_export", "n_clicks"),
prevent_initial_call=True,
)
def export_on_click(n_clicks):
global df
return dcc.send_data_frame(df.to_excel, "export.xlsx")
if __name__ == '__main__':
app.run_server(debug=True, host='0.0.0.0')
我注意到我有一些问题提取数据时。有时它工作得很好,有时我提取了我根本没有过滤的数据。所以我想知道全局变量是否是一个好的解决方案,因为我有多个用户使用我的网站。
我看到有一种方法可以在 Dash Python 中使用状态来做到这一点,但我不太明白它是如何工作的,如果这是我可以在我的情况下使用的东西。
有人可以帮助我吗?
谢谢
是对的,在 Dash 中使用全局变量来存储数据供多个用户使用不是一个好的解决方案。这会导致用户之间出现数据不一致和错误,因为所有用户都将共享相同的数据副本。
使用
dcc.Store
组件存储经过过滤的数据框,然后在的导出回调中访问它,这是一种更好的方法。
以下是更新后的代码:
from dash import Dash, dash_table, dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
import pandas as pd
from dash_extensions.enrich import Output, DashProxy, Input, MultiplexerTransform
app = DashProxy(external_stylesheets=[dbc.themes.BOOTSTRAP], transforms=[MultiplexerTransform()])
server = app.server
df = pd.read_csv("./data.csv")
df = df.fillna("NaN")
PAGE_SIZE = 20
app.layout = html.Div(
children=[
dcc.Download(id="download-dataframe-csv"),
dbc.Card(
[
dbc.CardBody(
[
dash_table.DataTable(
id='table-sorting-filtering',
columns=[{'name': i, 'id': i} for i in df.columns],
page_current=0,
page_size=PAGE_SIZE,
page_action='custom',
filter_action='custom',
filter_query='',
sort_action='custom',
sort_mode='multi',
sort_by=[],
style_data={'text-align': 'center'},
style_header={
'backgroundColor': 'white',
'fontWeight': 'bold',
'text-align': 'center'
},
style_cell={'padding': '5px'},
style_as_list_view=True,
)
]
)
],
style={"margin-left": "15px", "margin-right": "15px"}
),
dcc.Store(id='filtered-data'), # 存储过滤后的数据
html.Button("Download filtered data", id="button_export"),
dcc.Interval(
id='interval-component',
interval=1 * 1000, # in milliseconds
n_intervals=0
),
]
)
operators = [['ge ', '>='],
['le ', '<='],
['lt ', '<'],
['gt ', '>'],
['ne ', '!='],
['eq ', '='],
['contains ']]
def split_filter_part(filter_part):
for operator_type in operators:
for operator in operator_type:
if operator in filter_part:
name_part, value_part = filter_part.split(operator, 1)
name = name_part[name_part.find('{') + 1: name_part.rfind('}')]
value_part = value_part.strip()
v0 = value_part[0]
if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
value = value_part[1: -1].replace('\\' + v0, v0)
else:
try:
value = float(value_part)
except ValueError:
value = value_part
return name, operator_type[0].strip(), value
return [None] * 3
# Display data in table and manage filtering
@app.callback(
Output('table-sorting-filtering', 'data'),
Output('filtered-data', 'data'), # 将过滤后的数据存储在 dcc.Store 中
Input('table-sorting-filtering', "page_current"),
Input('table-sorting-filtering', "page_size"),
Input('table-sorting-filtering', 'sort_by'),
Input('table-sorting-filtering', 'filter_query'),
Input('interval-component', 'n_intervals'))
def update_table(page_current, page_size, sort_by, filter, n):
df = pd.read_csv("./data.csv")
df = df.fillna("NaN")
filtering_expressions = filter.split(' && ')
for filter_part in filtering_expressions:
col_name, operator, filter_value = split_filter_part(filter_part)
if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
# these operators match pandas series operator method names
df = df.loc[getattr(df[col_name], operator)(filter_value)]
elif operator == 'contains':
if type(filter_value) is str:
df = df.loc[df[col_name].str.contains(filter_value)]
if len(sort_by):
df = df.sort_values(
[col['column_id'] for col in sort_by],
ascending=[
col['direction'] == 'asc'
for col in sort_by
],
inplace=False
)
page = page_current
size = page_size
return df.iloc[page * size: (page + 1) * size].to_dict('records'), df.to_dict('records')
# Export button
@app.callback(
Output("download-dataframe-csv", "data"),
Input("button_export", "n_clicks"),
State('filtered-data', 'data'), # 从 dcc.Store 中获取过滤后的数据
prevent_initial_call=True,
)
def export_on_click(n_clicks, filtered_data):
df_filtered = pd.DataFrame.from_records(filtered_data)
return dcc.send_data_frame(df_filtered.to_excel, "export.xlsx")
if __name__ == '__main__':
app.run_server(debug=True, host='0.0.0.0')
在这个更新后的代码中:
-
我们添加了一个
dcc.Store
组件,用于存储过滤后的数据框。 -
update_table
回调现在还返回过滤后的数据框,该数据框存储在dcc.Store
组件中。 -
export_on_click
回调现在从dcc.Store
组件中获取过滤后的数据框,然后将其导出到 Excel 文件。
通过这些更改,不再需要使用全局变量,并且每个用户都将拥有自己的过滤数据副本以供导出。
标签:python,plotly-dash From: 72591835