5151import os
5252
5353
54+ # def encoder_model():
55+ # model = Sequential()
56+ #
57+ # # 10x128x128
58+ # model.add(Conv3D(filters=128,
59+ # strides=(1, 4, 4),
60+ # kernel_size=(3, 11, 11),
61+ # padding='same',
62+ # input_shape=(int(VIDEO_LENGTH/2), 128, 128, 3)))
63+ # model.add(TimeDistributed(BatchNormalization()))
64+ # model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
65+ # model.add(TimeDistributed(Dropout(0.5)))
66+ #
67+ # # 10x32x32
68+ # model.add(Conv3D(filters=64,
69+ # strides=(1, 2, 2),
70+ # kernel_size=(3, 5, 5),
71+ # padding='same'))
72+ # model.add(TimeDistributed(BatchNormalization()))
73+ # model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
74+ # model.add(TimeDistributed(Dropout(0.5)))
75+ #
76+ # # 10x16x16
77+ # model.add(Conv3D(filters=64,
78+ # strides=(1, 1, 1),
79+ # kernel_size=(3, 3, 3),
80+ # padding='same'))
81+ # model.add(TimeDistributed(BatchNormalization()))
82+ # model.add(TimeDistributed(LeakyReLU(alpha=0.2)))
83+ # model.add(TimeDistributed(Dropout(0.5)))
84+ #
85+ # return model
86+
87+
5488def encoder_model ():
55- model = Sequential ( )
89+ inputs = Input ( shape = ( int ( VIDEO_LENGTH / 2 ), 128 , 128 , 3 ) )
5690
5791 # 10x128x128
58- model .add (Conv3D (filters = 128 ,
59- strides = (1 , 4 , 4 ),
60- kernel_size = (3 , 11 , 11 ),
61- padding = 'same' ,
62- input_shape = (int (VIDEO_LENGTH / 2 ), 128 , 128 , 3 )))
63- model .add (TimeDistributed (BatchNormalization ()))
64- model .add (TimeDistributed (LeakyReLU (alpha = 0.2 )))
65- model .add (TimeDistributed (Dropout (0.5 )))
92+ conv_1 = TimeDistributed (Conv2D (filters = 64 ,
93+ strides = (4 , 4 ),
94+ kernel_size = (11 , 11 ),
95+ padding = 'same' ))(inputs )
96+ x = TimeDistributed (BatchNormalization ())(conv_1 )
97+ x = TimeDistributed (LeakyReLU (alpha = 0.2 ))(x )
98+ x = TimeDistributed (Dropout (0.4 ))(x )
6699
67100 # 10x32x32
68- model . add ( Conv3D (filters = 64 ,
69- strides = (1 , 2 , 2 ),
70- kernel_size = (3 , 5 , 5 ),
71- padding = 'same' ))
72- model . add ( TimeDistributed (BatchNormalization ()))
73- model . add ( TimeDistributed (LeakyReLU (alpha = 0.2 )))
74- model . add ( TimeDistributed (Dropout (0.5 )))
101+ conv_2 = TimeDistributed ( Conv2D (filters = 128 ,
102+ strides = (2 , 2 ),
103+ kernel_size = (5 , 5 ),
104+ padding = 'same' ))( x )
105+ x = TimeDistributed (BatchNormalization ())( conv_2 )
106+ x = TimeDistributed (LeakyReLU (alpha = 0.2 ))( x )
107+ out_2 = TimeDistributed (Dropout (0.5 ))( x )
75108
76109 # 10x16x16
77- model .add (Conv3D (filters = 64 ,
78- strides = (1 , 1 , 1 ),
79- kernel_size = (3 , 3 , 3 ),
80- padding = 'same' ))
81- model .add (TimeDistributed (BatchNormalization ()))
82- model .add (TimeDistributed (LeakyReLU (alpha = 0.2 )))
83- model .add (TimeDistributed (Dropout (0.5 )))
110+ conv_3 = TimeDistributed (Conv2D (filters = 128 ,
111+ strides = (1 , 1 ),
112+ kernel_size = (3 , 3 ),
113+ padding = 'same' ))(out_2 )
114+ x = TimeDistributed (BatchNormalization ())(conv_3 )
115+ x = TimeDistributed (LeakyReLU (alpha = 0.2 ))(x )
116+ in_rep = TimeDistributed (Dropout (0.5 ))(x )
117+
118+ # res_1 = concatenate([out_2, in_rep])
119+ #
120+ # clstm_1 = ConvLSTM2D(filters=1,
121+ # kernel_size=(3, 3),
122+ # strides=(1, 1),
123+ # padding='same',
124+ # return_sequences=False,
125+ # activation='relu',
126+ # recurrent_dropout=0.2)(res_1)
127+ # tr_in = add([in_rep, clstm_1])
128+ model = Model (inputs = inputs , outputs = in_rep )
84129
85130 return model
86131
87132
88133def decoder_model ():
89- inputs = Input (shape = (int (VIDEO_LENGTH / 2 ), 16 , 16 , 64 ))
134+ inputs = Input (shape = (int (VIDEO_LENGTH / 2 ), 16 , 16 , 128 ))
90135
91136 # 10x16x16
92- convlstm_1 = ConvLSTM2D (filters = 64 ,
137+ convlstm_1 = ConvLSTM2D (filters = 128 ,
93138 kernel_size = (3 , 3 ),
94139 strides = (1 , 1 ),
95140 padding = 'same' ,
@@ -99,27 +144,27 @@ def decoder_model():
99144 x = TimeDistributed (LeakyReLU (alpha = 0.2 ))(x )
100145 out_1 = TimeDistributed (Dropout (0.5 ))(x )
101146
102- flat_1 = TimeDistributed (Flatten ())(out_1 )
103- aclstm_1 = GRU (units = 16 * 16 ,
104- recurrent_dropout = 0.2 ,
105- return_sequences = True )(flat_1 )
106- x = TimeDistributed (BatchNormalization ())(aclstm_1 )
107- dense_1 = TimeDistributed (Dense (units = 16 * 16 , activation = 'softmax' ))(x )
108- a1_reshape = Reshape (target_shape = (int (VIDEO_LENGTH / 2 ), 16 , 16 , 1 ))(dense_1 )
109- a1 = AttnLossLayer ()(a1_reshape )
110- dot_1 = multiply ([out_1 , a1 ])
147+ # flat_1 = TimeDistributed(Flatten())(out_1)
148+ # aclstm_1 = GRU(units=16 * 16,
149+ # recurrent_dropout=0.2,
150+ # return_sequences=True)(flat_1)
151+ # x = TimeDistributed(BatchNormalization())(aclstm_1)
152+ # dense_1 = TimeDistributed(Dense(units=16 * 16, activation='softmax'))(x)
153+ # a1_reshape = Reshape(target_shape=(int(VIDEO_LENGTH/2), 16, 16, 1))(dense_1)
154+ # a1 = AttnLossLayer()(a1_reshape)
155+ # dot_1 = multiply([out_1, a1])
111156
112157 convlstm_2 = ConvLSTM2D (filters = 64 ,
113158 kernel_size = (3 , 3 ),
114159 strides = (1 , 1 ),
115160 padding = 'same' ,
116161 return_sequences = True ,
117- recurrent_dropout = 0.2 )(dot_1 )
162+ recurrent_dropout = 0.2 )(out_1 )
118163 x = TimeDistributed (BatchNormalization ())(convlstm_2 )
119164 h_2 = TimeDistributed (LeakyReLU (alpha = 0.2 ))(x )
120165 out_2 = UpSampling3D (size = (1 , 2 , 2 ))(h_2 )
121166
122- skip_upsamp_1 = UpSampling3D (size = (1 , 2 , 2 ))(dot_1 )
167+ skip_upsamp_1 = UpSampling3D (size = (1 , 2 , 2 ))(out_1 )
123168 res_1 = concatenate ([out_2 , skip_upsamp_1 ])
124169
125170 # 10x32x32
@@ -330,14 +375,14 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
330375 encoder = encoder_model ()
331376 decoder = decoder_model ()
332377 autoencoder = autoencoder_model (encoder , decoder )
333- autoencoder .compile (loss = "mean_absolute_error " , optimizer = OPTIM_A )
378+ autoencoder .compile (loss = "mean_squared_error " , optimizer = OPTIM_A )
334379
335380 # Build attention layer output
336- intermediate_decoder = Model (inputs = decoder .layers [0 ].input , outputs = decoder .layers [10 ].output )
337- mask_gen_1 = Sequential ()
338- mask_gen_1 .add (encoder )
339- mask_gen_1 .add (intermediate_decoder )
340- mask_gen_1 .compile (loss = 'mean_absolute_error ' , optimizer = OPTIM_A )
381+ # intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
382+ # mask_gen_1 = Sequential()
383+ # mask_gen_1.add(encoder)
384+ # mask_gen_1.add(intermediate_decoder)
385+ # mask_gen_1.compile(loss='mean_squared_error ', optimizer=OPTIM_A)
341386
342387 run_utilities (encoder , decoder , autoencoder , ENC_WEIGHTS , DEC_WEIGHTS )
343388
@@ -423,9 +468,9 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
423468 decoder .save_weights (os .path .join (CHECKPOINT_DIR , 'decoder_epoch_' + str (epoch ) + '.h5' ), True )
424469
425470 # Save predicted attention mask per epoch
426- predicted_attn = mask_gen_1 .predict (X_train , verbose = 0 )
427- a_pred = np .reshape (predicted_attn , newshape = (BATCH_SIZE , int (VIDEO_LENGTH / 2 ), 16 , 16 , 1 ))
428- np .save (os .path .join (ATTN_WEIGHTS_DIR , 'attention_weights_gen1_' + str (epoch ) + '.npy' ), a_pred )
471+ # predicted_attn = mask_gen_1.predict(X_train, verbose=0)
472+ # a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, int(VIDEO_LENGTH/2), 16, 16, 1))
473+ # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred)
429474
430475 # End TensorBoard Callback
431476 # TC.on_train_end('_')
0 commit comments