自编码器
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import numpy as np
print ("start")
def train_model():
mnist=tf.keras.datasets.mnist
#获取数据,训练集,测试集 60k训练,10K测试
(x_train,y_train),(x_test,y_test)=mnist.load_data()
#数据集格式转换
x_train = x_train.astype('float32')/255.0 - 0.5
x_test = x_test.astype('float32')/255.0 - 0.5
x_train=x_train.reshape(x_train.shape[0],-1)
x_test=x_test.reshape(x_test.shape[0],-1)
print(x_train.shape,x_test.shape)
# 输入是大小为28x28,灰度图像
img_shape = (784)
# batchsize 为16
batch_size = 16
# 输出的潜在空间的维度
latent_dim = 128
input_img = tf.keras.Input(shape=(784,))
input_img_ = tf.keras.Input(shape=(128,))
encoded = Dense(128,activation="relu")(input_img)
encoded = Dense(64,activation="relu")(encoded)
encoded = Dense(10,activation="relu")(encoded)
encoder_output = Dense(latent_dim,)(encoded)
dencoded = Dense(10,activation="relu")(encoder_output)
dencoded = Dense(64,activation="relu")(dencoded)
dencoded = Dense(128,activation="relu")(dencoded)
dencoded = Dense(784,activation="tanh")(dencoded)
autoencoder = Model(input_img,dencoded)
encoder = Model(input_img,encoder_output)
encoded_imgs = encoder.predict(x_test)
adam_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5)
autoencoder.compile(optimizer=adam_optimizer,loss="mse")
autoencoder.fit(x_train,x_train,epochs=5,batch_size=10,shuffle=True)
autoencoder.save("autoencoder.h5")
#encoder.save("encoder.h5")
encoded_imgs = encoder.predict(x_test)
print (encoded_imgs.shape)
plt.scatter(encoded_imgs[:,0],encoded_imgs[:,1],c=y_test)
plt.show()
train_model()
print ("end")
预测
#coding=utf-8
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2
print ("start")
def cv2_display(src):
cv2.imshow('src',src)
cv2.waitKey(0)
cv2.destroyAllWindows()
def predict_model():
mnist=tf.keras.datasets.mnist
#获取数据,训练集,测试集 60k训练,10K测试
(x_train,y_train),(x_test,y_test)=mnist.load_data()
x_test = x_test[:10]
cv2.imwrite("test.png",x_test[0])
#数据集格式转换
x_train = x_train.astype('float32')/255.0 - 0.5
x_test = x_test.astype('float32')/255.0 - 0.5
x_train=x_train.reshape(x_train.shape[0],-1)
x_test=x_test.reshape(x_test.shape[0],-1)
print(x_train.shape,x_test.shape)
autoencoder = load_model("autoencoder.h5")
moto_img = autoencoder.predict(x_test)
print (moto_img.shape)
moto_src = tf.reshape(moto_img[0],(28,28))
moto_src = ((moto_src + 0.5)*255.0)
moto_src = np.asarray(moto_src)
cv2.imwrite("test_output.png",moto_src)
predict_model()
print ("end")
原始图片
预测图片(自编码器预测输出的图片)
自己利用数据训练编码器解码器
编码器
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2
print ("start")
def cv2_display(src):
cv2.imshow('src',src)
cv2.waitKey(0)
cv2.destroyAllWindows()
def train_model():
mnist=tf.keras.datasets.mnist
#获取数据,训练集,测试集 60k训练,10K测试
(x_train,y_train),(x_test,y_test)=mnist.load_data()
#数据集格式转换
x_train = x_train.astype('float32')/255.0 - 0.5
x_test = x_test.astype('float32')/255.0 - 0.5
x_train=x_train.reshape(x_train.shape[0],-1)
x_test=x_test.reshape(x_test.shape[0],-1)
print(x_train.shape,x_test.shape)
# 输入是大小为28x28,灰度图像
img_shape = (784)
# batchsize 为16
batch_size = 16
# 输出的潜在空间的维度
latent_dim = 128
input_img_1 = tf.keras.Input(shape=(784,))
input_img_2 = tf.keras.Input(shape=(128,))
encoded = Dense(128,activation="relu")(input_img_1)
encoded = Dense(64,activation="relu")(encoded)
encoded = Dense(10,activation="relu")(encoded)
encoder_output = Dense(latent_dim,)(encoded)
dencoded = Dense(10,activation="relu")(input_img_2)
dencoded = Dense(64,activation="relu")(dencoded)
dencoded = Dense(128,activation="relu")(dencoded)
dencoded = Dense(784,activation="tanh")(dencoded)
encoder = Model(input_img_1,encoder_output)
encoder.save("transform_128_encoder.h5")
Y_train = encoder.predict(x_train)
Y_test = encoder.predict(x_test)
np.save("Y_train.npy",Y_train)
np.save("Y_test.npy",Y_test)
train_model()
print ("end")
说明:可以将28*28的手写数字转换为128维,维度可以自定义。
解码器
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2
print ("start")
def cv2_display(src):
cv2.imshow('src',src)
cv2.waitKey(0)
cv2.destroyAllWindows()
def train_model():
mnist=tf.keras.datasets.mnist
#获取数据,训练集,测试集 60k训练,10K测试
(x_train,y_train),(x_test,y_test)=mnist.load_data()
#数据集格式转换
x_train = x_train.astype('float32')/255.0 - 0.5
x_test = x_test.astype('float32')/255.0 - 0.5
x_train=x_train.reshape(x_train.shape[0],-1)
x_test=x_test.reshape(x_test.shape[0],-1)
print(x_train.shape,x_test.shape)
# 输入是大小为28x28,灰度图像
img_shape = (784)
# batchsize 为16
batch_size = 16
# 输出的潜在空间的维度
latent_dim = 128
input_img_1 = tf.keras.Input(shape=(784,))
input_img_2 = tf.keras.Input(shape=(128,))
encoded = Dense(128,activation="relu")(input_img_1)
encoded = Dense(64,activation="relu")(encoded)
encoded = Dense(10,activation="relu")(encoded)
encoder_output = Dense(latent_dim,)(encoded)
dencoded = Dense(10,activation="relu")(input_img_2)
dencoded = Dense(64,activation="relu")(dencoded)
dencoded = Dense(128,activation="relu")(dencoded)
dencoded = Dense(784,activation="tanh")(dencoded)
dencoder = Model(input_img_2,dencoded)
Y = np.load("Y_train.npy")
adam_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001)
dencoder.compile(optimizer=adam_optimizer,loss="mse")
dencoder.fit(Y,x_train,epochs=100,batch_size=60,shuffle=True)
dencoder.save("transform_784_encoder.h5")
train_model()
print ("end")
说明:将128维的向量解码为手写数字,需要训练,相当于反操作。
预测还原数据
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model,load_model
import matplotlib.pyplot as plt
import numpy as np
import cv2
print ("start")
def cv2_display(src):
cv2.imshow('src',src)
cv2.waitKey(0)
cv2.destroyAllWindows()
def predict_model():
Y = np.load("Y_test.npy")
print (Y.shape)
dencoder = load_model("transform_784_encoder.h5")
encoded_imgs = dencoder.predict(Y)
print (encoded_imgs.shape)
predict_src = tf.reshape(encoded_imgs[0],(28,28))
predict_src = ((predict_src + 0.5)*255.0)
predict_src = np.asarray(predict_src)
cv2.imwrite("1_output.png",predict_src)
predict_model()
print ("end")
说明:可以看出来数据稍微有所不同,缺少了细节,清晰度也有所下降。
结尾
也可以将它迁移到彩色图片上去,但是虽然能够还原轮廓,但是细节部分相差太大,需要使用其他网络,达到更好的效果。
下面的是利用该方案的彩色图片输出效果。
彩色输入图片
彩色输出图片
寻找到更好的方案后会更新下一个。