标签：apple scaler keras dataset preprocessing prices import stock sklearn

sklearn.preprocessing + keras

sklearn 的数据预处理可以对业务数据进行规范化，和规范化后的数据还原，

经常跟其他的模型配合使用。

例如如下情况：

https://github.com/influxdata/influxdb-client-python/blob/master/notebooks/stock-predictions.ipynb

Example InfluxDB Jupyter notebook.

This example demonstrates how to query data from InfluxDB 2.0 using Flux and predict the stock price. (ML example using Keras)

Prerequisites

import testing dataset before running this notebook using python3 ./stock_predictions_import_data.py
install fallowing dependencies
- pip3 install keras
- pip3 install matplotlib
- pip3 install pyplot
- pip3 install tensorflow
- pip3 install sklearn

# Import a Client

import os
import sys

sys.path.insert(0, os.path.abspath('../'))

from __future__ import print_function

import math
import os

import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display
from keras.layers.core import Dense
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from influxdb_client import InfluxDBClient
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

# parameters to be set ("optimum" hyperparameters obtained from grid search):
look_back = 7
epochs = 100
batch_size = 32

# fix random seed for reproducibility
np.random.seed(7)

# read all prices using panda
#prices_dataset =  pd.read_csv('./prices-split-adjusted.csv', header=0)

# read prices from InfluxDB 2.0 
client = InfluxDBClient(url="http://localhost:8086", token="my-token", org="my-org", debug=False)
query='''
from(bucket:"my-bucket")
        |> range(start: 0, stop: now())
        |> filter(fn: (r) => r._measurement == "financial-analysis")
        |> filter(fn: (r) => r.symbol == "AAPL")
        |> filter(fn: (r) => r._field == "close")
        |> drop(columns: ["_start", "result", "_stop", "table", "_field","_measurement"])
        |> rename(columns: {_value: "close"})
'''
prices_dataset = client.query_api().query_data_frame(org="my-org", query=query)
display(prices_dataset.head())

# save Apple's stock values as type of floating point number
apple_stock_prices = prices_dataset.close.values.astype('float32')

# reshape to column vector
apple_stock_prices = apple_stock_prices.reshape(len(apple_stock_prices), 1)

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
apple_stock_prices = scaler.fit_transform(apple_stock_prices)

# split data into training set and test set
train_size = int(len(apple_stock_prices) * 0.67)
test_size = len(apple_stock_prices) - train_size
train, test = apple_stock_prices[0:train_size,:], apple_stock_prices[train_size:len(apple_stock_prices),:]

print('Split data into training set and test set... Number of training samples/ test samples:', len(train), len(test))

# convert an array of values into a time series dataset 
# in form 
#                     X                     Y
# t-look_back+1, t-look_back+2, ..., t     t+1

def create_dataset(dataset, look_back):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return np.array(dataX), np.array(dataY)

# convert Apple's stock price data into time series dataset
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

# reshape input of the LSTM to be format [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
model.fit(trainX, trainY, epochs=epochs, batch_size=batch_size)

model.summary()

# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# invert predictions and targets to unscaled
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

# shift predictions of training data for plotting
trainPredictPlot = np.empty_like(apple_stock_prices)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift predictions of test data for plotting
testPredictPlot = np.empty_like(apple_stock_prices)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(apple_stock_prices)-1, :] = testPredict

# plot baseline and predictions
plt.plot(scaler.inverse_transform(apple_stock_prices))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

preprocessing

https://scikit-learn.org/stable/modules/preprocessing.html

from sklearn import preprocessing
import numpy as np
X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])
scaler = preprocessing.StandardScaler().fit(X_train)
scaler

scaler.mean_

scaler.scale_

X_scaled = scaler.transform(X_train)

对于模型训练前需要进行规范化，

模型预测值需要反规范化的情况，例如上面的时间序列

对于这种情况，不仅仅模型需要可保存，

规范化转换器也需要可保存，

joblib提供保存功能：

https://www.codenong.com/41993565/#google_vignette

from sklearn.externals import joblib
scaler_filename ="scaler.save"
joblib.dump(scaler, scaler_filename)

# And now to load...

scaler = joblib.load(scaler_filename)

标签：apple,scaler,keras,dataset,preprocessing,prices,import,stock,sklearn
From： https://www.cnblogs.com/lightsong/p/17963926

sklearn.preprocessing + keras

sklearn.preprocessing + keras

Example InfluxDB Jupyter notebook.

Prerequisites

preprocessing

相关文章

赞助商

阅读排行