@@ -116,9 +116,37 @@ def pretrained_c3d():
116116 print (c3d .summary ())
117117
118118 inputs = Input (shape = (16 , 128 , 208 , 3 ))
119- resized = TimeDistributed (Lambda (lambda image : tf .image .resize_images (image , (112 , 112 ))))(inputs )
120119
121- c3d_out = c3d (resized )
120+ # lstm_1 = ConvLSTM2D(filters=256,
121+ # kernel_size=(3, 3),
122+ # strides=(1, 1),
123+ # padding='same',
124+ # return_sequences=True,
125+ # recurrent_dropout=0.5)(conv_10)
126+ # lstm_1 = TimeDistributed(BatchNormalization())(lstm_1)
127+ # lstm_1 = TimeDistributed(LeakyReLU(alpha=0.2))(lstm_1)
128+ #
129+ # lstm_2 = ConvLSTM2D(filters=256,
130+ # kernel_size=(3, 3),
131+ # strides=(1, 1),
132+ # padding='same',
133+ # return_sequences=True,
134+ # recurrent_dropout=0.5)(lstm_1)
135+ # lstm_2 = TimeDistributed(BatchNormalization())(lstm_2)
136+ # lstm_2 = TimeDistributed(LeakyReLU(alpha=0.2))(lstm_2)
137+ #
138+ # lstm_3 = ConvLSTM2D(filters=256,
139+ # kernel_size=(3, 3),
140+ # strides=(1, 1),
141+ # padding='same',
142+ # return_sequences=False,
143+ # recurrent_dropout=0.5)(lstm_2)
144+ # lstm_3 = BatchNormalization()(lstm_3)
145+ # lstm_3 = LeakyReLU(alpha=0.2)(lstm_3)
146+ #
147+ # resized = TimeDistributed(Lambda(lambda image: tf.image.resize_images(image, (112, 112))))(inputs)
148+
149+ c3d_out = c3d (inputs )
122150
123151 dense = Dense (units = 1024 , activation = 'relu' , kernel_regularizer = regularizers .l2 (0.01 ))(c3d_out )
124152 x = BatchNormalization ()(dense )
@@ -139,6 +167,78 @@ def pretrained_c3d():
139167 return model
140168
141169
170+ def c3d_scratch ():
171+ model = Sequential ()
172+ model .add (Conv3D (filters = 64 ,
173+ kernel_size = (3 , 3 , 3 ),
174+ activation = 'relu' ,
175+ padding = 'same' ,
176+ name = 'conv1' ,
177+ input_shape = (16 , 128 , 208 , 3 )))
178+ model .add (MaxPooling3D (pool_size = (1 , 2 , 2 ), strides = (1 , 2 , 2 ),
179+ padding = 'valid' , name = 'pool1' ))
180+ # 2nd layer group
181+ model .add (Conv3D (filters = 64 ,
182+ kernel_size = (3 , 3 , 3 ),
183+ activation = 'relu' ,
184+ padding = 'same' ,
185+ name = 'conv2' ))
186+ model .add (MaxPooling3D (pool_size = (2 , 2 , 2 ), strides = (2 , 2 , 2 ),
187+ padding = 'valid' , name = 'pool2' ))
188+ # 3rd layer group
189+ model .add (Conv3D (filters = 128 ,
190+ kernel_size = (3 , 3 , 3 ),
191+ activation = 'relu' ,
192+ padding = 'same' ,
193+ name = 'conv3a' ))
194+ model .add (Conv3D (filters = 256 ,
195+ kernel_size = (3 , 3 , 3 ),
196+ activation = 'relu' ,
197+ padding = 'same' ,
198+ name = 'conv3b' ))
199+ model .add (MaxPooling3D (pool_size = (2 , 2 , 2 ), strides = (2 , 2 , 2 ),
200+ padding = 'valid' , name = 'pool3' ))
201+ # 4th layer group
202+ model .add (Conv3D (filters = 512 ,
203+ kernel_size = (3 , 3 , 3 ),
204+ activation = 'relu' ,
205+ padding = 'same' ,
206+ name = 'conv4a' ))
207+ model .add (Conv3D (filters = 512 ,
208+ kernel_size = (3 , 3 , 3 ),
209+ activation = 'relu' ,
210+ padding = 'same' ,
211+ name = 'conv4b' ))
212+ model .add (MaxPooling3D (pool_size = (2 , 2 , 2 ), strides = (2 , 2 , 2 ),
213+ padding = 'valid' , name = 'pool4' ))
214+ # 5th layer group
215+ model .add (Conv3D (filters = 512 ,
216+ kernel_size = (3 , 3 , 3 ),
217+ activation = 'relu' ,
218+ padding = 'same' ,
219+ name = 'conv5a' ))
220+ model .add (Conv3D (filters = 512 ,
221+ kernel_size = (3 , 3 , 3 ),
222+ activation = 'relu' ,
223+ padding = 'same' ,
224+ name = 'conv5b' ))
225+ model .add (ZeroPadding3D (padding = ((0 , 0 ), (0 , 1 ), (0 , 1 )), name = 'zeropad5' ))
226+ model .add (MaxPooling3D (pool_size = (2 , 2 , 2 ), strides = (2 , 2 , 2 ),
227+ padding = 'valid' , name = 'pool5' ))
228+ model .add (Flatten ())
229+
230+ # FC layers group
231+ model .add (Dense (512 , activation = 'relu' , name = 'fc6' ))
232+ model .add (BatchNormalization ())
233+ model .add (Dropout (0.5 ))
234+ model .add (Dense (256 , activation = 'relu' , name = 'fc7' ))
235+ model .add (BatchNormalization ())
236+ model .add (Dropout (.5 ))
237+ model .add (Dense (len (simple_ped_set ), activation = 'sigmoid' , name = 'fc8' ))
238+
239+ return model
240+
241+
142242def set_trainability (model , trainable ):
143243 model .trainable = trainable
144244 for layer in model .layers :
@@ -420,37 +520,35 @@ def get_action_classes(action_labels):
420520 if len (a_clean ) == 0 :
421521 a_clean = ['no ped' ]
422522
423- ped_actions_per_frame = list (set (a_clean ))
523+ ped_actions_per_frame = list (set ([ a . lower () for a in a_clean ] ))
424524 simple_ped_actions_per_frame = []
425525 encoded_ped_action = np .zeros (shape = (len (simple_ped_set )), dtype = np .float32 )
426526 for action in ped_actions_per_frame :
427527 # Get ped action number and map it to simple set
428- if action not in ped_actions :
528+ if action . lower () not in ped_actions :
429529 print ("Unknown action in labels. Exiting." )
430530 print (action )
431531 exit (0 )
432532 if action .lower () == 'standing' :
433533 ped_action = simple_ped_set .index ('standing' )
434534 simple_ped_actions_per_frame .append (ped_action )
435535 if action .lower () == 'crossing' :
436- ped_action = simple_ped_set .index ('standing ' )
536+ ped_action = simple_ped_set .index ('crossing ' )
437537 simple_ped_actions_per_frame .append (ped_action )
438538 if action .lower () == 'no ped' :
439- ped_action = simple_ped_set .index ('standing ' )
539+ ped_action = simple_ped_set .index ('no ped ' )
440540 simple_ped_actions_per_frame .append (ped_action )
441541
442- ped_action = ped_actions .index (action )
443- if ((ped_action == ped_actions .index ('nod' )) or
444- (ped_action == ped_actions .index ('looking' )) or
445- (ped_action == ped_actions .index ('nod' )) or
446- (ped_action == ped_actions .index ('handwave' ))):
447- continue
448- else :
449- ped_action = map_to_simple (ped_action )
450- simple_ped_actions_per_frame .append (ped_action )
542+ # ped_action = ped_actions.index(action)
543+ # if ((ped_action == ped_actions.index('nod')) or
544+ # (ped_action == ped_actions.index('looking')) or
545+ # (ped_action == ped_actions.index('nod')) or
546+ # (ped_action == ped_actions.index('handwave'))):
547+ # continue
548+ # else:
549+ # ped_action = map_to_simple(ped_action)
550+ # simple_ped_actions_per_frame.append(ped_action)
451551
452- simple_ped_actions_per_frame = set (simple_ped_actions_per_frame )
453- print (a_clean )
454552 # if 5 in simple_ped_action_per_frame:
455553 # action = 5
456554 # if 6 in simple_ped_action_per_frame:
@@ -459,24 +557,25 @@ def get_action_classes(action_labels):
459557 # action = 1
460558 # if 4 in simple_ped_action_per_frame:
461559 # action = 4
462- # if 0 in simple_ped_action_per_frame:
463- # action = 0
464- # if 2 in simple_ped_action_per_frame:
465- # action = 2
466- # if 3 in simple_ped_action_per_frame:
467- # action = 3
560+
561+ # if 2 in simple_ped_actions_per_frame:
562+ # act = 2
563+ # if 0 in simple_ped_actions_per_frame:
564+ # act = 0
565+ # if 1 in simple_ped_actions_per_frame:
566+ # act = 1
468567 #
469- # encoded_ped_action = to_categorical(action , len(simple_ped_set))
470- # count[action ] = count[action ] + 1
568+ # encoded_ped_action = to_categorical(act , len(simple_ped_set))
569+ # count[act ] = count[act ] + 1
471570
472571 for action in simple_ped_actions_per_frame :
473572 count [action ] = count [action ] + 1
474573 # Add all unique categorical one-hot vectors
475574 encoded_ped_action = encoded_ped_action + to_categorical (action , len (simple_ped_set ))
476575
477- if (sum (encoded_ped_action ) == 0 ):
478- print ( simple_ped_actions_per_frame )
479- print ( a_clean )
576+ # if (sum(encoded_ped_action) == 0):
577+ # print (ped_actions_per_frame )
578+ # print (encoded_ped_action )
480579
481580 # if (sum(encoded_ped_action) > 1):
482581 # print (simple_ped_action_per_frame)
@@ -485,15 +584,18 @@ def get_action_classes(action_labels):
485584
486585 ped_action_classes = np .asarray (ped_action_classes )
487586 ped_action_classes = np .reshape (ped_action_classes , newshape = (ped_action_classes .shape [0 :2 ]))
488- exit (0 )
489587 return ped_action_classes , count
490588
491589
492590def remove_zero_classes (videos_list , simple_ped_actions_per_frame ):
591+ r_indices = []
493592 for i in range (len (videos_list )):
494- # Approaching count
495- if (len (list (simple_ped_actions_per_frame [videos_list [i , CLASS_TARGET_INDEX ]])) == 0 ):
496- np .delete (videos_list , i , axis = 0 )
593+ # if (len(list(simple_ped_actions_per_frame[videos_list[i, CLASS_TARGET_INDEX]])) == 0):
594+ if sum (simple_ped_actions_per_frame [videos_list [i , CLASS_TARGET_INDEX ]]) == 0 :
595+ r_indices .append (i )
596+
597+ for i in sorted (r_indices , reverse = True ):
598+ videos_list = np .delete (videos_list , i , axis = 0 )
497599
498600 return videos_list
499601
@@ -709,7 +811,7 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
709811 ped_action_classes , ped_class_count = get_action_classes (action_labels = action_labels )
710812 print ("Training Stats: " + str (ped_class_count ))
711813
712- videos_list = remove_zero_classes (videos_list , ped_action_classes )
814+ # videos_list = remove_zero_classes(videos_list, ped_action_classes)
713815 classwise_videos_list , count = get_classwise_data (videos_list , ped_action_classes )
714816 videos_list = prob_subsample (classwise_videos_list , count )
715817
@@ -726,12 +828,14 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
726828 # Load test action annotations
727829 test_action_labels = hkl .load (os .path .join (TEST_DATA_DIR , 'annotations_test_208.hkl' ))
728830 test_ped_action_classes , test_ped_class_count = get_action_classes (test_action_labels )
831+ test_videos_list = remove_zero_classes (test_videos_list , test_ped_action_classes )
729832 print ("Test Stats: " + str (test_ped_class_count ))
730833
731834 # Build the Spatio-temporal Autoencoder
732835 print ("Creating models." )
733836 # Build stacked classifier
734837 classifier = pretrained_c3d ()
838+ # classifier = c3d_scratch()
735839 classifier .compile (loss = "binary_crossentropy" ,
736840 optimizer = OPTIM_C ,
737841 # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy'])
@@ -742,9 +846,9 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
742846 n_videos = videos_list .shape [0 ]
743847 n_test_videos = test_videos_list .shape [0 ]
744848 NB_ITERATIONS = int (n_videos / BATCH_SIZE )
745- # NB_ITERATIONS = 1
849+ # NB_ITERATIONS = 5
746850 NB_TEST_ITERATIONS = int (n_test_videos / BATCH_SIZE )
747- # NB_TEST_ITERATIONS = 1
851+ # NB_TEST_ITERATIONS = 5
748852
749853 # Setup TensorBoard Callback
750854 TC_cla = tb_callback .TensorBoard (log_dir = TF_LOG_CLA_DIR , histogram_freq = 0 , write_graph = False , write_images = False )
@@ -807,10 +911,26 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
807911 for j in range (int (VIDEO_LENGTH )):
808912 class_num_past = np .argmax (y_orig_classes [k , j ])
809913 class_num_y = np .argmax (ped_pred_class [k ])
810- cv2 .putText (pred_seq , 'truth: ' + simple_ped_set [class_num_past ],
914+ # label_true = simple_ped_set[class_num_past]
915+ # label_pred = simple_ped_set[class_num_y]
916+
917+ label_true = str (y_orig_classes [k , j ])
918+ label_pred = str ([round (float (i ), 2 ) for i in ped_pred_class [k ]])
919+
920+ # if (y_orig_classes[k, j] > 0.5):
921+ # label_true = "crossing"
922+ # else:
923+ # label_true = "not crossing"
924+ #
925+ # if (ped_pred_class[k] > 0.5):
926+ # label_pred = "crossing"
927+ # else:
928+ # label_pred = "not crossing"
929+
930+ cv2 .putText (pred_seq , 'truth: ' + label_true ,
811931 (2 + j * (208 ), 94 + k * 128 ), font , 0.5 , (255 , 255 , 255 ), 1 ,
812932 cv2 .LINE_AA )
813- cv2 .putText (pred_seq , simple_ped_set [ class_num_y ] ,
933+ cv2 .putText (pred_seq , label_pred ,
814934 (2 + j * (208 ), 114 + k * 128 ), font , 0.5 , (255 , 255 , 255 ), 1 ,
815935 cv2 .LINE_AA )
816936
@@ -849,19 +969,38 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
849969 for j in range (int (VIDEO_LENGTH )):
850970 class_num_past = np .argmax (y_orig_classes [k , j ])
851971 class_num_y = np .argmax (test_ped_pred_class [k ])
852- cv2 .putText (pred_seq , 'truth: ' + simple_ped_set [class_num_past ],
972+ # label_true = simple_ped_set[class_num_past]
973+ # label_pred = simple_ped_set[class_num_y]
974+ label_true = str (y_orig_classes [k , j ])
975+ label_pred = str ([round (float (i ), 2 ) for i in ped_pred_class [k ]])
976+
977+ #
978+ # if (y_orig_classes[k, j] > 0.5):
979+ # label_true = "crossing"
980+ # else:
981+ # label_true = "not crossing"
982+ #
983+ # if (test_ped_pred_class[k] > 0.5):
984+ # label_pred = "crossing"
985+ # else:
986+ # label_pred = "not crossing"
987+
988+ cv2 .putText (pred_seq , 'truth: ' + label_true ,
853989 (2 + j * (208 ), 94 + k * 128 ), font , 0.5 , (255 , 255 , 255 ), 1 ,
854990 cv2 .LINE_AA )
855- cv2 .putText (pred_seq , simple_ped_set [ class_num_y ] ,
991+ cv2 .putText (pred_seq , label_pred ,
856992 (2 + j * (208 ), 114 + k * 128 ), font , 0.5 , (255 , 255 , 255 ), 1 ,
857993 cv2 .LINE_AA )
858994
859995 cv2 .imwrite (os .path .join (CLA_GEN_IMAGES_DIR , str (epoch ) + "_" + str (index ) + "_cla_test_pred.png" ), pred_seq )
860996
861- # then after each epoch/iteration
997+ # then after each epoch
862998 avg_c_loss = np .mean (np .asarray (c_loss , dtype = np .float32 ), axis = 0 )
863999 avg_test_c_loss = np .mean (np .asarray (test_c_loss , dtype = np .float32 ), axis = 0 )
8641000
1001+ print (np .asarray (y_train_true ))
1002+ print (np .asarray (y_train_pred ))
1003+
8651004 train_prec , train_rec , train_fbeta , train_support = get_sklearn_metrics (np .asarray (y_train_true ),
8661005 np .asarray (y_train_pred ),
8671006 avg = None )
0 commit comments