AutonomyLab
diff --git a/‎code/autoencoder_model/scripts/attention_autoencoder.py‎
Lines changed: 90 additions & 45 deletions b/‎code/autoencoder_model/scripts/attention_autoencoder.py‎
Lines changed: 90 additions & 45 deletions
diff --git a/‎code/autoencoder_model/scripts/config_aa.py‎
Lines changed: 4 additions & 4 deletions b/‎code/autoencoder_model/scripts/config_aa.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎code/autoencoder_model/scripts/config_aa.pyc‎
13 Bytes b/‎code/autoencoder_model/scripts/config_aa.pyc‎
13 Bytes
diff --git a/‎code/autoencoder_model/scripts/config_vm.py‎
Lines changed: 97 additions & 0 deletions b/‎code/autoencoder_model/scripts/config_vm.py‎
Lines changed: 97 additions & 0 deletions
@@ -51,45 +51,90 @@
 import os
 
 
+# def encoder_model():
+#     model = Sequential()
+#
+#     # 10x128x128
+#     model.add(Conv3D(filters=128,
+#                      strides=(1, 4, 4),
+#                      kernel_size=(3, 11, 11),
+#                      padding='same',
+#                      input_shape=(int(VIDEO_LENGTH/2), 128, 128, 3)))
+#     model.add(TimeDistributed(BatchNormalization()))
+#     model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
+#     model.add(TimeDistributed(Dropout(0.5)))
+#
+#     # 10x32x32
+#     model.add(Conv3D(filters=64,
+#                      strides=(1, 2, 2),
+#                      kernel_size=(3, 5, 5),
+#                      padding='same'))
+#     model.add(TimeDistributed(BatchNormalization()))
+#     model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
+#     model.add(TimeDistributed(Dropout(0.5)))
+#
+#     # 10x16x16
+#     model.add(Conv3D(filters=64,
+#                      strides=(1, 1, 1),
+#                      kernel_size=(3, 3, 3),
+#                      padding='same'))
+#     model.add(TimeDistributed(BatchNormalization()))
+#     model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
+#     model.add(TimeDistributed(Dropout(0.5)))
+#
+#     return model
+
+
 def encoder_model():
-    model = Sequential()
+    inputs = Input(shape=(int(VIDEO_LENGTH / 2), 128, 128, 3))
 
     # 10x128x128
-    model.add(Conv3D(filters=128,
-                     strides=(1, 4, 4),
-                     kernel_size=(3, 11, 11),
-                     padding='same',
-                     input_shape=(int(VIDEO_LENGTH/2), 128, 128, 3)))
-    model.add(TimeDistributed(BatchNormalization()))
-    model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
-    model.add(TimeDistributed(Dropout(0.5)))
+    conv_1 = TimeDistributed(Conv2D(filters=64,
+                            strides=(4, 4),
+                            kernel_size=(11, 11),
+                            padding='same'))(inputs)
+    x = TimeDistributed(BatchNormalization())(conv_1)
+    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
+    x = TimeDistributed(Dropout(0.4))(x)
 
     # 10x32x32
-    model.add(Conv3D(filters=64,
-                     strides=(1, 2, 2),
-                     kernel_size=(3, 5, 5),
-                     padding='same'))
-    model.add(TimeDistributed(BatchNormalization()))
-    model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
-    model.add(TimeDistributed(Dropout(0.5)))
+    conv_2 = TimeDistributed(Conv2D(filters=128,
+                     strides=(2, 2),
+                     kernel_size=(5, 5),
+                     padding='same'))(x)
+    x = TimeDistributed(BatchNormalization())(conv_2)
+    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
+    out_2 = TimeDistributed(Dropout(0.5))(x)
 
     # 10x16x16
-    model.add(Conv3D(filters=64,
-                     strides=(1, 1, 1),
-                     kernel_size=(3, 3, 3),
-                     padding='same'))
-    model.add(TimeDistributed(BatchNormalization()))
-    model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
-    model.add(TimeDistributed(Dropout(0.5)))
+    conv_3 = TimeDistributed(Conv2D(filters=128,
+                     strides=(1, 1),
+                     kernel_size=(3, 3),
+                     padding='same'))(out_2)
+    x = TimeDistributed(BatchNormalization())(conv_3)
+    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
+    in_rep = TimeDistributed(Dropout(0.5))(x)
+
+    # res_1 = concatenate([out_2, in_rep])
+    #
+    # clstm_1 = ConvLSTM2D(filters=1,
+    #                      kernel_size=(3, 3),
+    #                      strides=(1, 1),
+    #                      padding='same',
+    #                      return_sequences=False,
+    #                      activation='relu',
+    #                      recurrent_dropout=0.2)(res_1)
+    # tr_in = add([in_rep, clstm_1])
+    model = Model(inputs=inputs, outputs=in_rep)
 
     return model
 
 
 def decoder_model():
-    inputs = Input(shape=(int(VIDEO_LENGTH/2), 16, 16, 64))
+    inputs = Input(shape=(int(VIDEO_LENGTH/2), 16, 16, 128))
 
     # 10x16x16
-    convlstm_1 = ConvLSTM2D(filters=64,
+    convlstm_1 = ConvLSTM2D(filters=128,
                             kernel_size=(3, 3),
                             strides=(1, 1),
                             padding='same',
@@ -99,27 +144,27 @@ def decoder_model():
     x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
     out_1 = TimeDistributed(Dropout(0.5))(x)
 
-    flat_1 = TimeDistributed(Flatten())(out_1)
-    aclstm_1 = GRU(units=16 * 16,
-                   recurrent_dropout=0.2,
-                   return_sequences=True)(flat_1)
-    x = TimeDistributed(BatchNormalization())(aclstm_1)
-    dense_1 = TimeDistributed(Dense(units=16 * 16, activation='softmax'))(x)
-    a1_reshape = Reshape(target_shape=(int(VIDEO_LENGTH/2), 16, 16, 1))(dense_1)
-    a1 = AttnLossLayer()(a1_reshape)
-    dot_1 = multiply([out_1, a1])
+    # flat_1 = TimeDistributed(Flatten())(out_1)
+    # aclstm_1 = GRU(units=16 * 16,
+    #                recurrent_dropout=0.2,
+    #                return_sequences=True)(flat_1)
+    # x = TimeDistributed(BatchNormalization())(aclstm_1)
+    # dense_1 = TimeDistributed(Dense(units=16 * 16, activation='softmax'))(x)
+    # a1_reshape = Reshape(target_shape=(int(VIDEO_LENGTH/2), 16, 16, 1))(dense_1)
+    # a1 = AttnLossLayer()(a1_reshape)
+    # dot_1 = multiply([out_1, a1])
 
     convlstm_2 = ConvLSTM2D(filters=64,
                             kernel_size=(3, 3),
                             strides=(1, 1),
                             padding='same',
                             return_sequences=True,
-                            recurrent_dropout=0.2)(dot_1)
+                            recurrent_dropout=0.2)(out_1)
     x = TimeDistributed(BatchNormalization())(convlstm_2)
     h_2 = TimeDistributed(LeakyReLU(alpha=0.2))(x)
     out_2 = UpSampling3D(size=(1, 2, 2))(h_2)
 
-    skip_upsamp_1 = UpSampling3D(size=(1, 2, 2))(dot_1)
+    skip_upsamp_1 = UpSampling3D(size=(1, 2, 2))(out_1)
     res_1 = concatenate([out_2, skip_upsamp_1])
 
     # 10x32x32
@@ -330,14 +375,14 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
     encoder = encoder_model()
     decoder = decoder_model()
     autoencoder = autoencoder_model(encoder, decoder)
-    autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A)
+    autoencoder.compile(loss="mean_squared_error", optimizer=OPTIM_A)
 
     # Build attention layer output
-    intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
-    mask_gen_1 = Sequential()
-    mask_gen_1.add(encoder)
-    mask_gen_1.add(intermediate_decoder)
-    mask_gen_1.compile(loss='mean_absolute_error', optimizer=OPTIM_A)
+    # intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
+    # mask_gen_1 = Sequential()
+    # mask_gen_1.add(encoder)
+    # mask_gen_1.add(intermediate_decoder)
+    # mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_A)
 
     run_utilities(encoder, decoder, autoencoder, ENC_WEIGHTS, DEC_WEIGHTS)
 
@@ -423,9 +468,9 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
         decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True)
 
         # Save predicted attention mask per epoch
-        predicted_attn = mask_gen_1.predict(X_train, verbose=0)
-        a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, int(VIDEO_LENGTH/2), 16, 16, 1))
-        np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred)
+        # predicted_attn = mask_gen_1.predict(X_train, verbose=0)
+        # a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, int(VIDEO_LENGTH/2), 16, 16, 1))
+        # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred)
 
     # End TensorBoard Callback
     # TC.on_train_end('_')
 
@@ -29,8 +29,8 @@
 
 VAL_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/val/'
 
-# TEST_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/test/'
-TEST_DATA_DIR= '/local_home/JAAD_Dataset/fun_experiments/resized/'
+TEST_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/test/'
+# TEST_DATA_DIR= '/local_home/JAAD_Dataset/fun_experiments/resized/'
 
 MODEL_DIR = './../' + path_var + 'models'
 if not os.path.exists(MODEL_DIR):
@@ -69,14 +69,14 @@
 IMG_SIZE = (128, 128, 3)
 ATTN_COEFF = 0
 KL_COEFF = 0
-RAM_DECIMATE = True
+RAM_DECIMATE = False
 
 # -------------------------------------------------
 # Network configuration:
 print ("Loading network/training configuration.")
 print ("Config file: " + str(__name__))
 
-BATCH_SIZE = 20
+BATCH_SIZE = 10
 NB_EPOCHS_AUTOENCODER = 40
 
 OPTIM_A = Adam(lr=0.0001, beta_1=0.5)
 
@@ -0,0 +1,97 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from keras.optimizers import SGD
+from keras.optimizers import Adam
+from keras.optimizers import adadelta
+from keras.optimizers import rmsprop
+from keras.layers import Layer
+from keras import backend as K
+K.set_image_dim_ordering('tf')
+import socket
+import os
+
+# -------------------------------------------------
+# Background config:
+hostname = socket.gethostname()
+if hostname == 'baymax':
+    path_var = 'baymax/'
+elif hostname == 'walle':
+    path_var = 'walle/'
+elif hostname == 'bender':
+    path_var = 'bender/'
+else:
+    path_var = 'zhora/'
+
+DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/train/'
+# DATA_DIR= '/local_home/data/KITTI_data/'
+
+VAL_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/val/'
+
+TEST_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/test/'
+# TEST_DATA_DIR= '/local_home/JAAD_Dataset/fun_experiments/resized/'
+
+MODEL_DIR = './../' + path_var + 'models'
+if not os.path.exists(MODEL_DIR):
+    os.mkdir(MODEL_DIR)
+
+CHECKPOINT_DIR = './../' + path_var + 'checkpoints'
+if not os.path.exists(CHECKPOINT_DIR):
+    os.mkdir(CHECKPOINT_DIR)
+
+ATTN_WEIGHTS_DIR = './../' + path_var + 'attn_weights'
+if not os.path.exists(ATTN_WEIGHTS_DIR):
+    os.mkdir(ATTN_WEIGHTS_DIR)
+
+GEN_IMAGES_DIR = './../' + path_var + 'generated_images'
+if not os.path.exists(GEN_IMAGES_DIR):
+    os.mkdir(GEN_IMAGES_DIR)
+
+LOG_DIR = './../' + path_var + 'logs'
+if not os.path.exists(LOG_DIR):
+    os.mkdir(LOG_DIR)
+
+TF_LOG_DIR = './../' + path_var + 'tf_logs'
+if not os.path.exists(TF_LOG_DIR):
+    os.mkdir(TF_LOG_DIR)
+
+TEST_RESULTS_DIR = './../' + path_var + 'test_results'
+if not os.path.exists(TEST_RESULTS_DIR):
+    os.mkdir(TEST_RESULTS_DIR)
+
+PRINT_MODEL_SUMMARY = True
+SAVE_MODEL = True
+PLOT_MODEL = True
+SAVE_GENERATED_IMAGES = True
+SHUFFLE = True
+VIDEO_LENGTH = 32
+IMG_SIZE = (128, 128, 3)
+ATTN_COEFF = 0
+KL_COEFF = 0
+RAM_DECIMATE = False
+
+# -------------------------------------------------
+# Network configuration:
+print ("Loading network/training configuration.")
+print ("Config file: " + str(__name__))
+
+BATCH_SIZE = 10
+NB_EPOCHS_AUTOENCODER = 40
+
+OPTIM_A = Adam(lr=0.0001, beta_1=0.5)
+# OPTIM_A = SGD(lr=0.000001, momentum=0.5, nesterov=True)
+# OPTIM_A = rmsprop(lr=0.00001)
+
+lr_schedule = [10, 20, 30]  # epoch_step
+
+def schedule(epoch_idx):
+    if (epoch_idx + 1) < lr_schedule[0]:
+        return 0.0001
+    elif (epoch_idx + 1) < lr_schedule[1]:
+        return 0.0001  # lr_decay_ratio = 10
+    elif (epoch_idx + 1) < lr_schedule[2]:
+        return 0.0001
+    return 0.0001
+
+