Image-Generation-Using-Convolutional-Autoencoder/conv_autoencoder.py at main · wiguider/Image-Generation-Using-Convolutional-Autoencoder · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
import os
import pickle

import numpy as np
import tensorflow as tf
from tensorflow.keras import Model, backend as K
from tensorflow.keras.layers import Activation, BatchNormalization, Conv2D, Conv2DTranspose, Dense, Flatten, Input, Lambda, ReLU, Reshape
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam

# eager execution enables Tensorflow to evaluate operations before the graph si completely built
# This implementation does not work with eager execution
tf.compat.v1.disable_eager_execution()


class Autoencoder:
    """
    Autoencoder represents a Deep Convolutional autoencoder architecture with
    mirrored encoder and decoder components.
    """

    def __init__(self,
                 input_shape,
                 conv_filters,
                 conv_kernels,
                 conv_strides,
                 latent_space_dim):
        self.input_shape = input_shape  # [28, 28, 1]
        self.conv_filters = conv_filters  # [2, 4, 8]
        self.conv_kernels = conv_kernels  # [3, 5, 3]
        self.conv_strides = conv_strides  # [1, 2, 2]
        self.latent_space_dim = latent_space_dim  # 2

        self.encoder = None
        self.decoder = None
        self.model = None

        self._num_conv_layers = len(conv_filters)
        self._shape_before_bottleneck = None
        self._model_input = None

        self._build()

    def summary(self):
        """Prints the summary of the encoder, decoder,
        and of the whole autoencoder.
        """
        self.encoder.summary()
        self.decoder.summary()
        self.model.summary()

    def compile(self, learning_rate=0.0001):
        """
        Configures the model for training.

        Args:
            learning_rate (float, optional): The learning rate of the optimizer.
            Defaults to 0.0001.
        """
        # Optimizer that implements the Adam algorithm.
        optimizer = Adam(learning_rate=learning_rate)
        # Loss function
        mse_loss = MeanSquaredError()
        # setting the model's configuration
        self.model.compile(optimizer=optimizer, loss=mse_loss)

    def train(self, x_train, batch_size, num_epochs):
        """
        Trains the model for a fixed number of epochs (iterations on a dataset).

        Args:
            x_train (Input data): It could be:
          - A Numpy array (or array-like), or a list of arrays
            (in case the model has multiple inputs).
          - A TensorFlow tensor, or a list of tensors
            (in case the model has multiple inputs).
          - A dict mapping input names to the corresponding array/tensors,
            if the model has named inputs.
          - A `tf.data` dataset. Should return a tuple
            of either `(inputs, targets)` or
            `(inputs, targets, sample_weights)`.
          - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
            or `(inputs, targets, sample_weights)`.
          - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
            callable that takes a single argument of type
            `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
            `DatasetCreator` should be used when users prefer to specify the
            per-replica batching and sharding logic for the `Dataset`.
            See `tf.keras.utils.experimental.DatasetCreator` doc for more
            information.
            batch_size (Integer or `None`): Number of samples per gradient update.
            If unspecified, `batch_size` will default to 32.

            num_epochs (Integer): Number of epochs to train the model.
            An epoch is an iteration over the entire `x_train` data provided.
        """
        self.model.fit(x_train,
                       x_train,
                       batch_size=batch_size,
                       epochs=num_epochs,
                       shuffle=True)

    def save(self, folder_path="."):
        """Creates a folder if it does not exist in the given path.
        And, saves the parameters and weights of the model in `folder_path`.

        Args:
            folder_path (str, optional): Path of the folder.
            Defaults to the path of the current directory.
        """
        self._create_folder_if_it_doesnt_exist(folder_path)
        self._save_parameters(folder_path)
        self._save_weights(folder_path)

    def load_weights(self, weights_path):
        """Loads the weights of the model saved in the given path.

        Args:
            weights_path (str): Path of the file where the weights are saved.
        """
        self.model.load_weights(weights_path)

    def reconstruct(self, images):
        """Given a list of images, extracts their latent representations and reconstructs similar images.

        Args:
            images (Input images): A list of Numpy 3d arrays.

        Returns:
            reconstructed_images, latent_representations: Respectively, the reconstructed images and their latent representations.
        """
        latent_representations = self.encoder.predict(images)
        reconstructed_images = self.decoder.predict(latent_representations)
        return reconstructed_images, latent_representations

    @classmethod
    def load(cls, folder_path="."):
        """Loads the model from the given folder `folder_path`.

        Args:
            folder_path (str, optional): Path of the folder.
            Defaults to the path of the current directory.

        Returns:
            Autoencoder: The model saved in the given folder.
        """
        parameters_path = os.path.join(folder_path, "parameters.pkl")
        with open(parameters_path, "rb") as f:
            parameters = pickle.load(f)
        autoencoder = cls(*parameters)
        weights_path = os.path.join(folder_path, "weights.h5")
        autoencoder.load_weights(weights_path)
        return autoencoder

    def _create_folder_if_it_doesnt_exist(self, folder_path):
        """Creates a folder if it does not exist in the given path.

        Args:
            folder_path (str): Path of the folder.
        """
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

    def _save_parameters(self, folder_path):
        """Saves the parameters of the model.

        Args:
            folder_path (str): Path of the folder where the parameters will be saved.
        """
        parameters = [
            self.input_shape,
            self.conv_filters,
            self.conv_kernels,
            self.conv_strides,
            self.latent_space_dim
        ]
        save_path = os.path.join(folder_path, "parameters.pkl")
        with open(save_path, "wb") as f:
            pickle.dump(parameters, f)

    def _save_weights(self, folder_path):
        """Saves the weights of the model.

        Args:
            folder_path (str):  Path of the folder where the weights will be saved.
        """
        file_path = os.path.join(folder_path, "weights.h5")
        self.model.save_weights(file_path)

    def _build(self):
        """Builds the model.
        """
        self._build_encoder()
        self._build_decoder()
        self._build_autoencoder()

    def _build_autoencoder(self):
        """Builds the autoencoder.
        """
        model_input = self._model_input
        model_output = self.decoder(self.encoder(model_input))
        self.model = Model(model_input, model_output, name="autoencoder")

    def _build_decoder(self):
        """Builds the decoder.
        """
        decoder_input = self._add_decoder_input()
        dense_layer = self._add_dense_layer(decoder_input)
        reshape_layer = self._add_reshape_layer(dense_layer)
        conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
        decoder_output = self._add_decoder_output(conv_transpose_layers)
        self.decoder = Model(decoder_input, decoder_output, name="decoder")

    def _add_decoder_input(self):
        """Instantiates a Keras tensor and sets the input shape of the decoder.

        Returns:
            A `tensor`.
        """
        return Input(shape=self.latent_space_dim, name="decoder_input")

    def _add_dense_layer(self, decoder_input):
        """Adds a Dense layer to the decoder.

        Args:
            decoder_input (tensor): Input of the decoder

        """
        num_neurons = np.prod(self._shape_before_bottleneck)  # [1, 2, 4] -> 8
        dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
        return dense_layer

    def _add_reshape_layer(self, dense_layer):
        """Reshapes the dense layer containing the latent features.
        """
        return Reshape(self._shape_before_bottleneck)(dense_layer)

    def _add_conv_transpose_layers(self, x):
        """Adds conv transpose blocks."""
        # loop through all the conv layers in reverse order and stop at the
        # first layer
        for layer_index in reversed(range(1, self._num_conv_layers)):
            x = self._add_conv_transpose_layer(layer_index, x)
        return x

    def _add_conv_transpose_layer(self, layer_index, x):
        """Adds a conv_transpose_layer to the graph of layers in the decoder.

        Args:
            layer_index (Integer): _description_
            x (tensor): The graph of layers in the decoder.

        Returns:
            tensor: The graph of layers in the decoder plus a conv_transpose_layer.
        """
        layer_num = self._num_conv_layers - layer_index
        conv_transpose_layer = Conv2DTranspose(
                filters=self.conv_filters[layer_index],
                kernel_size=self.conv_kernels[layer_index],
                strides=self.conv_strides[layer_index],
                padding="same",
                name=f"decoder_conv_transpose_layer_{layer_num}"
        )
        x = conv_transpose_layer(x)
        x = ReLU(name=f"decoder_relu_{layer_num}")(x)
        x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
        return x

    def _add_decoder_output(self, x):
        """Adds an output layer to the graph of layers in the decoder.

        Args:
            x (tensor): The graph of layers in the decoder.

        Returns:
            tensor: The graph of layers in the decoder plus the output layer.
        """
        conv_transpose_layer = Conv2DTranspose(
                filters=self.input_shape[-1],
                kernel_size=self.conv_kernels[0],
                strides=self.conv_strides[0],
                padding="same",
                name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
        )
        x = conv_transpose_layer(x)
        output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
        return output_layer

    def _build_encoder(self):
        """Builds the encoder.
        """
        encoder_input = self._add_encoder_input()
        conv_layers = self._add_conv_layers(encoder_input)
        bottleneck = self._add_bottleneck(conv_layers)
        self._model_input = encoder_input
        self.encoder = Model(encoder_input, bottleneck, name="encoder")

    def _add_encoder_input(self):
        """Instantiates a Keras tensor and sets the input shape of the encoder.

        Returns:
            A `tensor`.
        """
        return Input(shape=self.input_shape, name="encoder_input")

    def _add_conv_layers(self, encoder_input):
        """Creates all convolutional blocks in encoder."""
        x = encoder_input
        for layer_index in range(self._num_conv_layers):
            x = self._add_conv_layer(layer_index, x)
        return x

    def _add_conv_layer(self, layer_index, x):
        """Adds a convolutional block to a graph of layers, consisting of
        conv 2d + ReLU + batch normalization.
        """
        layer_number = layer_index + 1
        conv_layer = Conv2D(
                filters=self.conv_filters[layer_index],
                kernel_size=self.conv_kernels[layer_index],
                strides=self.conv_strides[layer_index],
                padding="same",
                name=f"encoder_conv_layer_{layer_number}"
        )
        x = conv_layer(x)
        x = ReLU(name=f"encoder_relu_{layer_number}")(x)
        x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
        return x

    def _add_bottleneck(self, x):
        """Flatten data and add bottleneck (Dense layer)."""
        self._shape_before_bottleneck = K.int_shape(x)[1:]
        x = Flatten()(x)
        x = Dense(self.latent_space_dim, name="encoder_output")(x)
        return x


class VAE(Autoencoder):
    """
    VAE represents a Deep Convolutional variational autoencoder architecture
    with mirrored encoder and decoder components.

    The architecture of VAE is similar to the one of Autoencoder,
    but, the samples are mapped in a different way hence the loss function is different.
    More precisely, rather than mapping input data to points in the latent space,
    VAE maps input data to parameters of a distribution that describe where a
    datum “should” be mapped (probabilistically) in the latent space, according to its features.

    """

    def __init__(self, input_shape, conv_filters, conv_kernels, conv_strides, latent_space_dim):
        self.reconstruction_loss_weight = 1000

        super().__init__(input_shape, conv_filters, conv_kernels, conv_strides, latent_space_dim)

        self._build()

    def _calculate_combined_loss(self, y_target, y_predicted):
        """
         combines reconstruction loss and KLDivergence.
         combined_loss = reconstruction_loss_weight * reconstruction_loss + kl_loss
        """
        reconstruction_loss = tf.keras.losses.MeanSquaredError()(y_target, y_predicted)
        kl_loss = tf.keras.losses.KLDivergence()(y_target, y_predicted)

        combined_loss = self.reconstruction_loss_weight * reconstruction_loss \
                        - kl_loss
        return combined_loss

    def compile(self, learning_rate=0.0001):
        """
        Configures the model for training.

        Args:
            learning_rate (float, optional): The learning rate of the optimizer.
            Defaults to 0.0001.
        """
        optimizer = Adam(learning_rate=learning_rate)
        # The loss function is a combination of reconstruction loss and KLDivergence
        self.model.compile(optimizer=optimizer,
                           loss=self._calculate_combined_loss,
                           metrics=['MeanSquaredError', 'KLDivergence'])

    def _add_bottleneck(self, x):
        """Flatten data and add bottleneck with Guassian sampling (Dense
        layer).
        """
        self._shape_before_bottleneck = K.int_shape(x)[1:]
        x = Flatten()(x)

        # We calculate the mean of our multivariate Gaussian in the latent space.
        self.mu = Dense(self.latent_space_dim, name="mu")(x)
        # We calculate the variances of the same Gaussian's diagonal log covariance matrix.
        self.log_variance = Dense(self.latent_space_dim,
                                  name="log_variance")(x)

        def sample_point_from_normal_distribution(args):
            """
            Given mu and log_variance, defines another random variable ’epsilon’ that maintains stochasticity
            via a Hadamard product of the log variance vector with a vector whose components
            are independently sampled from a standard normal distribution, and calculates the sampled point
            from the normal distribution for each input data.
            """

            mu, log_variance = args
            epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
                                      stddev=1.)
            sampled_point = mu + K.exp(log_variance / 2) * epsilon
            return sampled_point

        x = Lambda(sample_point_from_normal_distribution,
                   name="encoder_output")([self.mu, self.log_variance])
        return x


if __name__ == "__main__":
    autoencoder = VAE(
            input_shape=(28, 28, 1),  # shape of the images in the dataset
            conv_filters=(32, 64, 64, 64),  # number of filters in each convolutional layer
            conv_kernels=(3, 3, 3, 3),  # number of kernels in each convolutional layer
            conv_strides=(1, 2, 2, 1),  # number fo strides in each convolutional layer
            latent_space_dim=2  # dimension of the latent space
    )
    autoencoder.summary()