Skip to content

Commit cfa569f

Browse files
committed
Separating out motion vector
1 parent 27731e8 commit cfa569f

11 files changed

Lines changed: 817 additions & 52 deletions

File tree

code/autoencoder_model/scripts/attention_autoencoder.py

Lines changed: 90 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -51,45 +51,90 @@
5151
import os
5252

5353

54+
# def encoder_model():
55+
# model = Sequential()
56+
#
57+
# # 10x128x128
58+
# model.add(Conv3D(filters=128,
59+
# strides=(1, 4, 4),
60+
# kernel_size=(3, 11, 11),
61+
# padding='same',
62+
# input_shape=(int(VIDEO_LENGTH/2), 128, 128, 3)))
63+
# model.add(TimeDistributed(BatchNormalization()))
64+
# model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
65+
# model.add(TimeDistributed(Dropout(0.5)))
66+
#
67+
# # 10x32x32
68+
# model.add(Conv3D(filters=64,
69+
# strides=(1, 2, 2),
70+
# kernel_size=(3, 5, 5),
71+
# padding='same'))
72+
# model.add(TimeDistributed(BatchNormalization()))
73+
# model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
74+
# model.add(TimeDistributed(Dropout(0.5)))
75+
#
76+
# # 10x16x16
77+
# model.add(Conv3D(filters=64,
78+
# strides=(1, 1, 1),
79+
# kernel_size=(3, 3, 3),
80+
# padding='same'))
81+
# model.add(TimeDistributed(BatchNormalization()))
82+
# model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
83+
# model.add(TimeDistributed(Dropout(0.5)))
84+
#
85+
# return model
86+
87+
5488
def encoder_model():
55-
model = Sequential()
89+
inputs = Input(shape=(int(VIDEO_LENGTH / 2), 128, 128, 3))
5690

5791
# 10x128x128
58-
model.add(Conv3D(filters=128,
59-
strides=(1, 4, 4),
60-
kernel_size=(3, 11, 11),
61-
padding='same',
62-
input_shape=(int(VIDEO_LENGTH/2), 128, 128, 3)))
63-
model.add(TimeDistributed(BatchNormalization()))
64-
model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
65-
model.add(TimeDistributed(Dropout(0.5)))
92+
conv_1 = TimeDistributed(Conv2D(filters=64,
93+
strides=(4, 4),
94+
kernel_size=(11, 11),
95+
padding='same'))(inputs)
96+
x = TimeDistributed(BatchNormalization())(conv_1)
97+
x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
98+
x = TimeDistributed(Dropout(0.4))(x)
6699

67100
# 10x32x32
68-
model.add(Conv3D(filters=64,
69-
strides=(1, 2, 2),
70-
kernel_size=(3, 5, 5),
71-
padding='same'))
72-
model.add(TimeDistributed(BatchNormalization()))
73-
model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
74-
model.add(TimeDistributed(Dropout(0.5)))
101+
conv_2 = TimeDistributed(Conv2D(filters=128,
102+
strides=(2, 2),
103+
kernel_size=(5, 5),
104+
padding='same'))(x)
105+
x = TimeDistributed(BatchNormalization())(conv_2)
106+
x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
107+
out_2 = TimeDistributed(Dropout(0.5))(x)
75108

76109
# 10x16x16
77-
model.add(Conv3D(filters=64,
78-
strides=(1, 1, 1),
79-
kernel_size=(3, 3, 3),
80-
padding='same'))
81-
model.add(TimeDistributed(BatchNormalization()))
82-
model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
83-
model.add(TimeDistributed(Dropout(0.5)))
110+
conv_3 = TimeDistributed(Conv2D(filters=128,
111+
strides=(1, 1),
112+
kernel_size=(3, 3),
113+
padding='same'))(out_2)
114+
x = TimeDistributed(BatchNormalization())(conv_3)
115+
x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
116+
in_rep = TimeDistributed(Dropout(0.5))(x)
117+
118+
# res_1 = concatenate([out_2, in_rep])
119+
#
120+
# clstm_1 = ConvLSTM2D(filters=1,
121+
# kernel_size=(3, 3),
122+
# strides=(1, 1),
123+
# padding='same',
124+
# return_sequences=False,
125+
# activation='relu',
126+
# recurrent_dropout=0.2)(res_1)
127+
# tr_in = add([in_rep, clstm_1])
128+
model = Model(inputs=inputs, outputs=in_rep)
84129

85130
return model
86131

87132

88133
def decoder_model():
89-
inputs = Input(shape=(int(VIDEO_LENGTH/2), 16, 16, 64))
134+
inputs = Input(shape=(int(VIDEO_LENGTH/2), 16, 16, 128))
90135

91136
# 10x16x16
92-
convlstm_1 = ConvLSTM2D(filters=64,
137+
convlstm_1 = ConvLSTM2D(filters=128,
93138
kernel_size=(3, 3),
94139
strides=(1, 1),
95140
padding='same',
@@ -99,27 +144,27 @@ def decoder_model():
99144
x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
100145
out_1 = TimeDistributed(Dropout(0.5))(x)
101146

102-
flat_1 = TimeDistributed(Flatten())(out_1)
103-
aclstm_1 = GRU(units=16 * 16,
104-
recurrent_dropout=0.2,
105-
return_sequences=True)(flat_1)
106-
x = TimeDistributed(BatchNormalization())(aclstm_1)
107-
dense_1 = TimeDistributed(Dense(units=16 * 16, activation='softmax'))(x)
108-
a1_reshape = Reshape(target_shape=(int(VIDEO_LENGTH/2), 16, 16, 1))(dense_1)
109-
a1 = AttnLossLayer()(a1_reshape)
110-
dot_1 = multiply([out_1, a1])
147+
# flat_1 = TimeDistributed(Flatten())(out_1)
148+
# aclstm_1 = GRU(units=16 * 16,
149+
# recurrent_dropout=0.2,
150+
# return_sequences=True)(flat_1)
151+
# x = TimeDistributed(BatchNormalization())(aclstm_1)
152+
# dense_1 = TimeDistributed(Dense(units=16 * 16, activation='softmax'))(x)
153+
# a1_reshape = Reshape(target_shape=(int(VIDEO_LENGTH/2), 16, 16, 1))(dense_1)
154+
# a1 = AttnLossLayer()(a1_reshape)
155+
# dot_1 = multiply([out_1, a1])
111156

112157
convlstm_2 = ConvLSTM2D(filters=64,
113158
kernel_size=(3, 3),
114159
strides=(1, 1),
115160
padding='same',
116161
return_sequences=True,
117-
recurrent_dropout=0.2)(dot_1)
162+
recurrent_dropout=0.2)(out_1)
118163
x = TimeDistributed(BatchNormalization())(convlstm_2)
119164
h_2 = TimeDistributed(LeakyReLU(alpha=0.2))(x)
120165
out_2 = UpSampling3D(size=(1, 2, 2))(h_2)
121166

122-
skip_upsamp_1 = UpSampling3D(size=(1, 2, 2))(dot_1)
167+
skip_upsamp_1 = UpSampling3D(size=(1, 2, 2))(out_1)
123168
res_1 = concatenate([out_2, skip_upsamp_1])
124169

125170
# 10x32x32
@@ -330,14 +375,14 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
330375
encoder = encoder_model()
331376
decoder = decoder_model()
332377
autoencoder = autoencoder_model(encoder, decoder)
333-
autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A)
378+
autoencoder.compile(loss="mean_squared_error", optimizer=OPTIM_A)
334379

335380
# Build attention layer output
336-
intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
337-
mask_gen_1 = Sequential()
338-
mask_gen_1.add(encoder)
339-
mask_gen_1.add(intermediate_decoder)
340-
mask_gen_1.compile(loss='mean_absolute_error', optimizer=OPTIM_A)
381+
# intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
382+
# mask_gen_1 = Sequential()
383+
# mask_gen_1.add(encoder)
384+
# mask_gen_1.add(intermediate_decoder)
385+
# mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_A)
341386

342387
run_utilities(encoder, decoder, autoencoder, ENC_WEIGHTS, DEC_WEIGHTS)
343388

@@ -423,9 +468,9 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
423468
decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True)
424469

425470
# Save predicted attention mask per epoch
426-
predicted_attn = mask_gen_1.predict(X_train, verbose=0)
427-
a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, int(VIDEO_LENGTH/2), 16, 16, 1))
428-
np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred)
471+
# predicted_attn = mask_gen_1.predict(X_train, verbose=0)
472+
# a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, int(VIDEO_LENGTH/2), 16, 16, 1))
473+
# np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred)
429474

430475
# End TensorBoard Callback
431476
# TC.on_train_end('_')

code/autoencoder_model/scripts/config_aa.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929

3030
VAL_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/val/'
3131

32-
# TEST_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/test/'
33-
TEST_DATA_DIR= '/local_home/JAAD_Dataset/fun_experiments/resized/'
32+
TEST_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/test/'
33+
# TEST_DATA_DIR= '/local_home/JAAD_Dataset/fun_experiments/resized/'
3434

3535
MODEL_DIR = './../' + path_var + 'models'
3636
if not os.path.exists(MODEL_DIR):
@@ -69,14 +69,14 @@
6969
IMG_SIZE = (128, 128, 3)
7070
ATTN_COEFF = 0
7171
KL_COEFF = 0
72-
RAM_DECIMATE = True
72+
RAM_DECIMATE = False
7373

7474
# -------------------------------------------------
7575
# Network configuration:
7676
print ("Loading network/training configuration.")
7777
print ("Config file: " + str(__name__))
7878

79-
BATCH_SIZE = 20
79+
BATCH_SIZE = 10
8080
NB_EPOCHS_AUTOENCODER = 40
8181

8282
OPTIM_A = Adam(lr=0.0001, beta_1=0.5)
13 Bytes
Binary file not shown.
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from __future__ import absolute_import
2+
from __future__ import division
3+
from __future__ import print_function
4+
5+
from keras.optimizers import SGD
6+
from keras.optimizers import Adam
7+
from keras.optimizers import adadelta
8+
from keras.optimizers import rmsprop
9+
from keras.layers import Layer
10+
from keras import backend as K
11+
K.set_image_dim_ordering('tf')
12+
import socket
13+
import os
14+
15+
# -------------------------------------------------
16+
# Background config:
17+
hostname = socket.gethostname()
18+
if hostname == 'baymax':
19+
path_var = 'baymax/'
20+
elif hostname == 'walle':
21+
path_var = 'walle/'
22+
elif hostname == 'bender':
23+
path_var = 'bender/'
24+
else:
25+
path_var = 'zhora/'
26+
27+
DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/train/'
28+
# DATA_DIR= '/local_home/data/KITTI_data/'
29+
30+
VAL_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/val/'
31+
32+
TEST_DATA_DIR= '/local_home/JAAD_Dataset/iros/resized_imgs_128/test/'
33+
# TEST_DATA_DIR= '/local_home/JAAD_Dataset/fun_experiments/resized/'
34+
35+
MODEL_DIR = './../' + path_var + 'models'
36+
if not os.path.exists(MODEL_DIR):
37+
os.mkdir(MODEL_DIR)
38+
39+
CHECKPOINT_DIR = './../' + path_var + 'checkpoints'
40+
if not os.path.exists(CHECKPOINT_DIR):
41+
os.mkdir(CHECKPOINT_DIR)
42+
43+
ATTN_WEIGHTS_DIR = './../' + path_var + 'attn_weights'
44+
if not os.path.exists(ATTN_WEIGHTS_DIR):
45+
os.mkdir(ATTN_WEIGHTS_DIR)
46+
47+
GEN_IMAGES_DIR = './../' + path_var + 'generated_images'
48+
if not os.path.exists(GEN_IMAGES_DIR):
49+
os.mkdir(GEN_IMAGES_DIR)
50+
51+
LOG_DIR = './../' + path_var + 'logs'
52+
if not os.path.exists(LOG_DIR):
53+
os.mkdir(LOG_DIR)
54+
55+
TF_LOG_DIR = './../' + path_var + 'tf_logs'
56+
if not os.path.exists(TF_LOG_DIR):
57+
os.mkdir(TF_LOG_DIR)
58+
59+
TEST_RESULTS_DIR = './../' + path_var + 'test_results'
60+
if not os.path.exists(TEST_RESULTS_DIR):
61+
os.mkdir(TEST_RESULTS_DIR)
62+
63+
PRINT_MODEL_SUMMARY = True
64+
SAVE_MODEL = True
65+
PLOT_MODEL = True
66+
SAVE_GENERATED_IMAGES = True
67+
SHUFFLE = True
68+
VIDEO_LENGTH = 32
69+
IMG_SIZE = (128, 128, 3)
70+
ATTN_COEFF = 0
71+
KL_COEFF = 0
72+
RAM_DECIMATE = False
73+
74+
# -------------------------------------------------
75+
# Network configuration:
76+
print ("Loading network/training configuration.")
77+
print ("Config file: " + str(__name__))
78+
79+
BATCH_SIZE = 10
80+
NB_EPOCHS_AUTOENCODER = 40
81+
82+
OPTIM_A = Adam(lr=0.0001, beta_1=0.5)
83+
# OPTIM_A = SGD(lr=0.000001, momentum=0.5, nesterov=True)
84+
# OPTIM_A = rmsprop(lr=0.00001)
85+
86+
lr_schedule = [10, 20, 30] # epoch_step
87+
88+
def schedule(epoch_idx):
89+
if (epoch_idx + 1) < lr_schedule[0]:
90+
return 0.0001
91+
elif (epoch_idx + 1) < lr_schedule[1]:
92+
return 0.0001 # lr_decay_ratio = 10
93+
elif (epoch_idx + 1) < lr_schedule[2]:
94+
return 0.0001
95+
return 0.0001
96+
97+

0 commit comments

Comments
 (0)