Variational Autoencoder
Last updated
Last updated
理解VAE先别从网络结构上理解。 计算: p(z∣X)=p(X)p(X∣z)p(z)=∫Pxz(X∣z;θ)Pz(z)dzp(X∣z)p(z) 用KL divergence来衡量两个分布的信息损失: KL(qλ(z∣x)∣∣p(z∣x))=Eq[logqλ(z∣x)]−Eq[logp(x,z)]+logp(x) 。 稍作变换: logp(x)−KL(qλ(z∣x)∣∣p(z∣x))=Eq[logp(x,z)]−Eq[logqλ(z∣x)]=Eq[logp(x,z)]−KL(qλ(z∣x)∣∣p(z))=ELBO(λ)
右边就是要优化的:
右边第一项的log似然的期望最大化 -- 这是decoder
右边第二项的KL散度最小化 -- 这是encoder
所以跟AUtoEncoder扯上关系了。
注意:
decoder过程p(X∣z) ,并不是衡量x 与 x^的误差,而是极大似然logp(x,z)=logp(x∣z)p(z)
encoder输出正太分布均值和方差,但是做了reparemerization trick,见下面。
变分自编码器(Variational Auto-Encoder,VAE)
从神经网络结构角度:每个样本xi的loss为: li(θ,ϕ)=−Ez∼qθ(z∣xi)[logpϕ(xi∣z)]+KL(qθ(z∣xi)∣∣p(z)) 右边第一项为重构x的loss,第二项为正则项。
从概率的角度: p(x,z)=p(x∣z)p(z) 把联合概率用 likelihood 和 prior 。 p(z∣x)=p(x)p(x∣z)p(z) 推理部分包含了先验概率,不好求,于是用变分近似,用qλ(z∣x) 去近似 p(z∣x)。 用KL divergence来衡量两个分布的信息损失: KL(qλ(z∣x)∣∣p(z∣x))=Eq[logqλ(z∣x)]−Eq[logp(x,z)]+logp(x) 。 抛开先验概率这项 ,定义:ELBO(λ)=Eq[logp(x,z)]−Eq[logqλ(z∣x)] , 得 logp(x)=ELBO(λ)+KL(qλ(z∣x)∣∣p(z∣x)) , 通过Jensen’s inequality ,KL divergence始终大于等于0,所以最小化KL divergence,等价于最大化ELBO 。
对于单个样本,ELBOi(λ)=Eqλ(z∣xi)[logp(xi∣z)]−KL(qλ(z∣xi)∣∣p(z)) , 然后用stochastic gradient descent 求解。 稍微改写下,包含inference和 generative network parameters ,ELBOi(θ,ϕ)=Eqθ(z∣xi)[logpϕ(xi∣z)]−KL(qθ(z∣xi)∣∣p(z)) , 到这里 ELBOi(θ,ϕ)=−li(θ,ϕ) 。
附KL两个多维正太分布:
图来自VAE(4)——实现 有更细节的图 Caffe code to accompany my Tutorial on Variational Autoencoders
VAE.py
from __future__ import (
division,
print_function,
absolute_import
)
from six.moves import range
import tensorflow as tf
import tflearn
def encode(incoming, intermediate_dim=None, latent_dim=None):
with tf.variable_op_scope([incoming], 'Encoder') as scope:
name = scope.name
net = tflearn.fully_connected(incoming, intermediate_dim)
net = tflearn.batch_normalization(net)
net = tflearn.activation(net, activation='relu')
net = tflearn.fully_connected(net, intermediate_dim)
net = tflearn.batch_normalization(net)
net = tflearn.activation(net, activation='relu')
net = tflearn.fully_connected(net, intermediate_dim)
net = tflearn.batch_normalization(net)
h = tflearn.activation(net, activation='relu', name='H')
mean = tflearn.fully_connected(h, latent_dim, name='Mean')
log_var = tflearn.fully_connected(h, latent_dim, name='LogVariance')
std = tf.exp(0.5 * log_var, name='StandardDeviation')
epsilon = tf.random_normal(tf.shape(log_var), name='Epsilon')
z = tf.add(mean, tf.mul(std, epsilon), name='SampleLatentVariable')
#把标准差乘上正太分布,然后加到均值上去?
tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, z)
return z, mean, log_var
def decode(incoming, intermediate_dim=None, original_shape=None):
with tf.variable_op_scope([incoming], 'Decoder') as scope:
name = scope.name
net = tflearn.fully_connected(incoming, intermediate_dim)
net = tflearn.batch_normalization(net)
net = tflearn.activation(net, activation='relu')
net = tflearn.fully_connected(net, intermediate_dim)
net = tflearn.batch_normalization(net)
net = tflearn.activation(net, activation='relu')
net = tflearn.fully_connected(net, intermediate_dim)
net = tflearn.batch_normalization(net)
h = tflearn.activation(net, activation='relu', name='H')
mean = tflearn.fully_connected(h, original_shape[0], activation='sigmoid',
name='Mean')
tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, mean)
return mean
encoder.py
from __future__ import (
division,
print_function,
absolute_import
)
from six.moves import range
import tensorflow as tf
import tflearn
import vae
from tflearn.datasets import mnist
import numpy as np
from skimage import io
batch_size = 128
latent_dim = 2
intermediate_dim = 512
X, Y, testX, testY = mnist.load_data()
original_shape = X.shape[1:]
original_shape = [original_shape[i] for i in range(len(original_shape))]
input_shape = [None] + original_shape
x = tflearn.input_data(shape=input_shape)
z, z_mean, z_log_var = vae.encode(x, intermediate_dim=intermediate_dim,
latent_dim=latent_dim)
x_decoded_mean = vae.decode(z, intermediate_dim=intermediate_dim,
original_shape=original_shape)
def vae_loss(y_pred, y_true):
with tf.variable_op_scope([y_pred, y_true], 'Loss') as scope:
name = scope.name
binary_cross_entropy_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(y_pred, y_true), reduction_indices=1)
kullback_leibler_divergence_loss = - 0.5 * tf.reduce_sum(1 + z_log_var - tf.pow(z_mean, 2) - tf.exp(z_log_var), reduction_indices=1)
loss = tf.reduce_mean(binary_cross_entropy_loss + kullback_leibler_divergence_loss)
tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, loss)
return loss
vae = tflearn.regression(x_decoded_mean, optimizer='adam', loss=vae_loss,
metric=None)
vae = tflearn.DNN(vae, tensorboard_verbose=0,
checkpoint_path='model_variational_autoencoder',
max_checkpoints=10)
vae.fit(X, X, n_epoch=100, batch_size=batch_size,
run_id='varational_auto_encoder')
generator.py
from __future__ import (
division,
print_function,
absolute_import
)
from six.moves import range
import tensorflow as tf
import tflearn
from tflearn.datasets import mnist
import vae
import numpy as np
from skimage import io
original_dim = 784
latent_dim = 2
intermediate_dim = 512
model_file = 'model_variational_autoencoder-43000'
X, Y, testX, testY = mnist.load_data()
original_shape = X.shape[1:]
original_shape = [original_shape[i] for i in range(len(original_shape))]
with tf.Graph().as_default():
input_shape = [None] + original_shape
x = tflearn.input_data(shape=input_shape)
z, mean, logvar = vae.encode(x, intermediate_dim=intermediate_dim,
latent_dim=latent_dim)
encoder = tflearn.DNN(z)
optargs = {'scope_for_restore':'Encoder'}
encoder.load(model_file, optargs)
mean_encoder = tflearn.DNN(mean)
mean_encoder.load(model_file, optargs)
logvar_encoder = tflearn.DNN(logvar)
logvar_encoder.load(model_file, optargs)
with tf.Graph().as_default():
# build a digit generator that can sample from the learned distribution
decoder_input = tflearn.input_data(shape=[None, latent_dim])
gen_decoded_mean = vae.decode(decoder_input, intermediate_dim=intermediate_dim,
original_shape=original_shape)
generator = tflearn.DNN(gen_decoded_mean)
generator.load(model_file, {'scope_for_restore':'Decoder'})
digit_size = 28
n = 15
linspace = 1000
figure = np.zeros((digit_size * n, digit_size * n))
grid_x = np.linspace(-linspace, linspace, n)
grid_y = np.linspace(-linspace, linspace, n)
for i, yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[xi, yi] + [0 for k in range(2, latent_dim)]])
x_decoded = generator.predict(z_sample)
digit = np.reshape(x_decoded[0], [digit_size, digit_size])
figure[i * digit_size : (i + 1) * digit_size,
j * digit_size : (j + 1) * digit_size] = digit
figure *= 255
figure = figure.astype(np.uint8)
io.imsave('vae_z.png', figure)
figure = np.ndarray(shape=(digit_size * (n), digit_size * (n)),
dtype=np.float16)
testX = tflearn.data_utils.shuffle(X)[0][0:1]
testMean = mean_encoder.predict(testX)[0]
testLogVar = logvar_encoder.predict(testX)[0]
std = [np.exp(0.5 * testLogVar[i]) * 4 for i in range(2)]
grid_x = np.linspace(-std[0], std[0], n) + testMean[0]
grid_y = np.linspace(-std[1], std[1], n) + testMean[1]
for i, yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[xi, yi] + [testMean[k] for k in range(2, latent_dim)]])
x_decoded = generator.predict(z_sample)
digit = np.reshape(x_decoded[0], [digit_size, digit_size])
figure[i * digit_size : (i + 1) * digit_size,
j * digit_size : (j + 1) * digit_size] = digit
figure *= 255
figure = figure.astype(np.uint8)
io.imsave('vae_std.png', figure)
The Unreasonable Confusion of Variational Autoencoders Tutorial on Variational Autoencoders VAE(3)——公式与实现 变分自编码器(Variational Autoencoder, VAE)通俗教程
https://yq.aliyun.com/articles/68410 没有任何公式——直观的理解变分自动编码器VAE
encoder的参数是正太分布 P(Z∣X) 的mean和standard devation (协方差矩阵的主对角线,可以就用一个向量表示), 可以通过z=μ+σ⊙ϵ 得到。