Skip to content

Commit 17f0f4d

Browse files
committed
Change
1 parent 326b38a commit 17f0f4d

3 files changed

Lines changed: 188 additions & 68 deletions

File tree

code/autoencoder_model/scripts/config_sigc.py

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -84,36 +84,17 @@
8484
RAM_DECIMATE = True
8585
RETRAIN_CLASSIFIER = True
8686
CLASS_TARGET_INDEX = 8
87-
ROT_MAX = 10
88-
SFT_H_MAX = 0.05
89-
SFT_V_MAX = 0.05
87+
ROT_MAX = 5
88+
SFT_H_MAX = 0.02
89+
SFT_V_MAX = 0.02
9090
ZOOM_MAX = 0.2
9191
BRIGHT_RANGE_L = 0.5
9292
BRIGHT_RANGE_H = 1.5
9393

9494
ped_actions = ['slow down', 'standing', 'walking', 'speed up', 'nod', 'unknown',
9595
'clear path', 'handwave', 'crossing', 'looking', 'no ped']
9696

97-
simple_ped_set = ['standing' ,'crossing', 'no ped']
98-
99-
100-
101-
102-
103-
driver_actions = ['moving slow', 'slowing down', 'standing', 'speeding up', 'moving fast']
104-
simple_driver_set = ['slow down', 'stop', 'speed up']
105-
106-
joint_action_set = ['moving slow', 'slowing down', 'standing', 'speeding up', 'moving fast',
107-
'slow down', 'standing', 'moving fast', 'speed up', 'look', 'nod', 'unknown',
108-
'moving slow', 'flasher signal', 'looking' , 'handwave', 'clear path',
109-
'stopped', 'slowing down', 'crossing', 'speeding up']
110-
111-
formatted_joint_action_set = ['car moving slow', 'car slowing down', 'car standing', 'car speeding up', 'car moving fast',
112-
'ped slow down', 'ped standing', 'ped moving fast', 'ped speed up', 'ped look',
113-
'ped nod', 'ped unknown', 'ped moving slow', 'ped flasher signal', 'ped looking' ,
114-
'ped handwave', 'ped clear path', 'ped stopped', 'ped slowing down', 'ped crossing',
115-
'ped speeding up']
116-
97+
simple_ped_set = ['standing', 'crossing', 'no ped']
11798

11899
# -------------------------------------------------
119100
# Network configuration:
@@ -144,9 +125,9 @@
144125

145126
def schedule(epoch_idx):
146127
if (epoch_idx + 1) < lr_schedule[0]:
147-
return 0.00001
128+
return 0.0001
148129
elif (epoch_idx + 1) < lr_schedule[1]:
149-
return 0.000001 # lr_decay_ratio = 10
130+
return 0.00001 # lr_decay_ratio = 10
150131
elif (epoch_idx + 1) < lr_schedule[2]:
151-
return 0.0000001
152-
return 0.0000001
132+
return 0.000001
133+
return 0.000001

code/autoencoder_model/scripts/sigmoid_classifier.py

Lines changed: 179 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,37 @@ def pretrained_c3d():
116116
print (c3d.summary())
117117

118118
inputs = Input(shape=(16, 128, 208, 3))
119-
resized = TimeDistributed(Lambda(lambda image: tf.image.resize_images(image, (112, 112))))(inputs)
120119

121-
c3d_out = c3d(resized)
120+
# lstm_1 = ConvLSTM2D(filters=256,
121+
# kernel_size=(3, 3),
122+
# strides=(1, 1),
123+
# padding='same',
124+
# return_sequences=True,
125+
# recurrent_dropout=0.5)(conv_10)
126+
# lstm_1 = TimeDistributed(BatchNormalization())(lstm_1)
127+
# lstm_1 = TimeDistributed(LeakyReLU(alpha=0.2))(lstm_1)
128+
#
129+
# lstm_2 = ConvLSTM2D(filters=256,
130+
# kernel_size=(3, 3),
131+
# strides=(1, 1),
132+
# padding='same',
133+
# return_sequences=True,
134+
# recurrent_dropout=0.5)(lstm_1)
135+
# lstm_2 = TimeDistributed(BatchNormalization())(lstm_2)
136+
# lstm_2 = TimeDistributed(LeakyReLU(alpha=0.2))(lstm_2)
137+
#
138+
# lstm_3 = ConvLSTM2D(filters=256,
139+
# kernel_size=(3, 3),
140+
# strides=(1, 1),
141+
# padding='same',
142+
# return_sequences=False,
143+
# recurrent_dropout=0.5)(lstm_2)
144+
# lstm_3 = BatchNormalization()(lstm_3)
145+
# lstm_3 = LeakyReLU(alpha=0.2)(lstm_3)
146+
#
147+
# resized = TimeDistributed(Lambda(lambda image: tf.image.resize_images(image, (112, 112))))(inputs)
148+
149+
c3d_out = c3d(inputs)
122150

123151
dense = Dense(units=1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(c3d_out)
124152
x = BatchNormalization()(dense)
@@ -139,6 +167,78 @@ def pretrained_c3d():
139167
return model
140168

141169

170+
def c3d_scratch():
171+
model = Sequential()
172+
model.add(Conv3D(filters=64,
173+
kernel_size=(3, 3, 3),
174+
activation='relu',
175+
padding='same',
176+
name='conv1',
177+
input_shape=(16, 128, 208, 3)))
178+
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
179+
padding='valid', name='pool1'))
180+
# 2nd layer group
181+
model.add(Conv3D(filters=64,
182+
kernel_size=(3, 3, 3),
183+
activation='relu',
184+
padding='same',
185+
name='conv2'))
186+
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
187+
padding='valid', name='pool2'))
188+
# 3rd layer group
189+
model.add(Conv3D(filters=128,
190+
kernel_size=(3, 3, 3),
191+
activation='relu',
192+
padding='same',
193+
name='conv3a'))
194+
model.add(Conv3D(filters=256,
195+
kernel_size=(3, 3, 3),
196+
activation='relu',
197+
padding='same',
198+
name='conv3b'))
199+
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
200+
padding='valid', name='pool3'))
201+
# 4th layer group
202+
model.add(Conv3D(filters=512,
203+
kernel_size=(3, 3, 3),
204+
activation='relu',
205+
padding='same',
206+
name='conv4a'))
207+
model.add(Conv3D(filters=512,
208+
kernel_size=(3, 3, 3),
209+
activation='relu',
210+
padding='same',
211+
name='conv4b'))
212+
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
213+
padding='valid', name='pool4'))
214+
# 5th layer group
215+
model.add(Conv3D(filters=512,
216+
kernel_size=(3, 3, 3),
217+
activation='relu',
218+
padding='same',
219+
name='conv5a'))
220+
model.add(Conv3D(filters=512,
221+
kernel_size=(3, 3, 3),
222+
activation='relu',
223+
padding='same',
224+
name='conv5b'))
225+
model.add(ZeroPadding3D(padding=((0, 0), (0, 1), (0, 1)), name='zeropad5'))
226+
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
227+
padding='valid', name='pool5'))
228+
model.add(Flatten())
229+
230+
# FC layers group
231+
model.add(Dense(512, activation='relu', name='fc6'))
232+
model.add(BatchNormalization())
233+
model.add(Dropout(0.5))
234+
model.add(Dense(256, activation='relu', name='fc7'))
235+
model.add(BatchNormalization())
236+
model.add(Dropout(.5))
237+
model.add(Dense(len(simple_ped_set), activation='sigmoid', name='fc8'))
238+
239+
return model
240+
241+
142242
def set_trainability(model, trainable):
143243
model.trainable = trainable
144244
for layer in model.layers:
@@ -420,37 +520,35 @@ def get_action_classes(action_labels):
420520
if len(a_clean) == 0:
421521
a_clean = ['no ped']
422522

423-
ped_actions_per_frame = list(set(a_clean))
523+
ped_actions_per_frame = list(set([a.lower() for a in a_clean]))
424524
simple_ped_actions_per_frame = []
425525
encoded_ped_action = np.zeros(shape=(len(simple_ped_set)), dtype=np.float32)
426526
for action in ped_actions_per_frame:
427527
# Get ped action number and map it to simple set
428-
if action not in ped_actions:
528+
if action.lower() not in ped_actions:
429529
print ("Unknown action in labels. Exiting.")
430530
print (action)
431531
exit(0)
432532
if action.lower() == 'standing':
433533
ped_action = simple_ped_set.index('standing')
434534
simple_ped_actions_per_frame.append(ped_action)
435535
if action.lower() == 'crossing':
436-
ped_action = simple_ped_set.index('standing')
536+
ped_action = simple_ped_set.index('crossing')
437537
simple_ped_actions_per_frame.append(ped_action)
438538
if action.lower() == 'no ped':
439-
ped_action = simple_ped_set.index('standing')
539+
ped_action = simple_ped_set.index('no ped')
440540
simple_ped_actions_per_frame.append(ped_action)
441541

442-
ped_action = ped_actions.index(action)
443-
if ((ped_action == ped_actions.index('nod')) or
444-
(ped_action == ped_actions.index('looking')) or
445-
(ped_action == ped_actions.index('nod')) or
446-
(ped_action == ped_actions.index('handwave'))):
447-
continue
448-
else:
449-
ped_action = map_to_simple(ped_action)
450-
simple_ped_actions_per_frame.append(ped_action)
542+
# ped_action = ped_actions.index(action)
543+
# if ((ped_action == ped_actions.index('nod')) or
544+
# (ped_action == ped_actions.index('looking')) or
545+
# (ped_action == ped_actions.index('nod')) or
546+
# (ped_action == ped_actions.index('handwave'))):
547+
# continue
548+
# else:
549+
# ped_action = map_to_simple(ped_action)
550+
# simple_ped_actions_per_frame.append(ped_action)
451551

452-
simple_ped_actions_per_frame = set(simple_ped_actions_per_frame)
453-
print (a_clean)
454552
# if 5 in simple_ped_action_per_frame:
455553
# action = 5
456554
# if 6 in simple_ped_action_per_frame:
@@ -459,24 +557,25 @@ def get_action_classes(action_labels):
459557
# action = 1
460558
# if 4 in simple_ped_action_per_frame:
461559
# action = 4
462-
# if 0 in simple_ped_action_per_frame:
463-
# action = 0
464-
# if 2 in simple_ped_action_per_frame:
465-
# action = 2
466-
# if 3 in simple_ped_action_per_frame:
467-
# action = 3
560+
561+
# if 2 in simple_ped_actions_per_frame:
562+
# act = 2
563+
# if 0 in simple_ped_actions_per_frame:
564+
# act = 0
565+
# if 1 in simple_ped_actions_per_frame:
566+
# act = 1
468567
#
469-
# encoded_ped_action = to_categorical(action, len(simple_ped_set))
470-
# count[action] = count[action] + 1
568+
# encoded_ped_action = to_categorical(act, len(simple_ped_set))
569+
# count[act] = count[act] + 1
471570

472571
for action in simple_ped_actions_per_frame:
473572
count[action] = count[action] + 1
474573
# Add all unique categorical one-hot vectors
475574
encoded_ped_action = encoded_ped_action + to_categorical(action, len(simple_ped_set))
476575

477-
if (sum(encoded_ped_action) == 0):
478-
print(simple_ped_actions_per_frame)
479-
print(a_clean)
576+
# if (sum(encoded_ped_action) == 0):
577+
# print (ped_actions_per_frame)
578+
# print (encoded_ped_action)
480579

481580
# if (sum(encoded_ped_action) > 1):
482581
# print (simple_ped_action_per_frame)
@@ -485,15 +584,18 @@ def get_action_classes(action_labels):
485584

486585
ped_action_classes = np.asarray(ped_action_classes)
487586
ped_action_classes = np.reshape(ped_action_classes, newshape=(ped_action_classes.shape[0:2]))
488-
exit(0)
489587
return ped_action_classes, count
490588

491589

492590
def remove_zero_classes(videos_list, simple_ped_actions_per_frame):
591+
r_indices = []
493592
for i in range(len(videos_list)):
494-
# Approaching count
495-
if (len(list(simple_ped_actions_per_frame[videos_list[i, CLASS_TARGET_INDEX]])) == 0):
496-
np.delete(videos_list, i, axis=0)
593+
# if (len(list(simple_ped_actions_per_frame[videos_list[i, CLASS_TARGET_INDEX]])) == 0):
594+
if sum(simple_ped_actions_per_frame[videos_list[i, CLASS_TARGET_INDEX]]) == 0:
595+
r_indices.append(i)
596+
597+
for i in sorted(r_indices, reverse=True):
598+
videos_list = np.delete(videos_list, i, axis=0)
497599

498600
return videos_list
499601

@@ -709,7 +811,7 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
709811
ped_action_classes, ped_class_count = get_action_classes(action_labels=action_labels)
710812
print("Training Stats: " + str(ped_class_count))
711813

712-
videos_list = remove_zero_classes(videos_list, ped_action_classes)
814+
# videos_list = remove_zero_classes(videos_list, ped_action_classes)
713815
classwise_videos_list, count = get_classwise_data(videos_list, ped_action_classes)
714816
videos_list = prob_subsample(classwise_videos_list, count)
715817

@@ -726,12 +828,14 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
726828
# Load test action annotations
727829
test_action_labels = hkl.load(os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl'))
728830
test_ped_action_classes, test_ped_class_count = get_action_classes(test_action_labels)
831+
test_videos_list = remove_zero_classes(test_videos_list, test_ped_action_classes)
729832
print("Test Stats: " + str(test_ped_class_count))
730833

731834
# Build the Spatio-temporal Autoencoder
732835
print ("Creating models.")
733836
# Build stacked classifier
734837
classifier = pretrained_c3d()
838+
# classifier = c3d_scratch()
735839
classifier.compile(loss="binary_crossentropy",
736840
optimizer=OPTIM_C,
737841
# metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy'])
@@ -742,9 +846,9 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
742846
n_videos = videos_list.shape[0]
743847
n_test_videos = test_videos_list.shape[0]
744848
NB_ITERATIONS = int(n_videos/BATCH_SIZE)
745-
# NB_ITERATIONS = 1
849+
# NB_ITERATIONS = 5
746850
NB_TEST_ITERATIONS = int(n_test_videos/BATCH_SIZE)
747-
# NB_TEST_ITERATIONS = 1
851+
# NB_TEST_ITERATIONS = 5
748852

749853
# Setup TensorBoard Callback
750854
TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False)
@@ -807,10 +911,26 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
807911
for j in range(int(VIDEO_LENGTH )):
808912
class_num_past = np.argmax(y_orig_classes[k, j])
809913
class_num_y = np.argmax(ped_pred_class[k])
810-
cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_past],
914+
# label_true = simple_ped_set[class_num_past]
915+
# label_pred = simple_ped_set[class_num_y]
916+
917+
label_true = str(y_orig_classes[k, j])
918+
label_pred = str([round(float(i), 2) for i in ped_pred_class[k]])
919+
920+
# if (y_orig_classes[k, j] > 0.5):
921+
# label_true = "crossing"
922+
# else:
923+
# label_true = "not crossing"
924+
#
925+
# if (ped_pred_class[k] > 0.5):
926+
# label_pred = "crossing"
927+
# else:
928+
# label_pred = "not crossing"
929+
930+
cv2.putText(pred_seq, 'truth: ' + label_true,
811931
(2 + j * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1,
812932
cv2.LINE_AA)
813-
cv2.putText(pred_seq, simple_ped_set[class_num_y],
933+
cv2.putText(pred_seq, label_pred,
814934
(2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
815935
cv2.LINE_AA)
816936

@@ -849,19 +969,38 @@ def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
849969
for j in range(int(VIDEO_LENGTH)):
850970
class_num_past = np.argmax(y_orig_classes[k, j])
851971
class_num_y = np.argmax(test_ped_pred_class[k])
852-
cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_past],
972+
# label_true = simple_ped_set[class_num_past]
973+
# label_pred = simple_ped_set[class_num_y]
974+
label_true = str(y_orig_classes[k, j])
975+
label_pred = str([round(float(i), 2) for i in ped_pred_class[k]])
976+
977+
#
978+
# if (y_orig_classes[k, j] > 0.5):
979+
# label_true = "crossing"
980+
# else:
981+
# label_true = "not crossing"
982+
#
983+
# if (test_ped_pred_class[k] > 0.5):
984+
# label_pred = "crossing"
985+
# else:
986+
# label_pred = "not crossing"
987+
988+
cv2.putText(pred_seq, 'truth: ' + label_true,
853989
(2 + j * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1,
854990
cv2.LINE_AA)
855-
cv2.putText(pred_seq, simple_ped_set[class_num_y],
991+
cv2.putText(pred_seq, label_pred,
856992
(2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
857993
cv2.LINE_AA)
858994

859995
cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_test_pred.png"), pred_seq)
860996

861-
# then after each epoch/iteration
997+
# then after each epoch
862998
avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0)
863999
avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32), axis=0)
8641000

1001+
print (np.asarray(y_train_true))
1002+
print (np.asarray(y_train_pred))
1003+
8651004
train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics(np.asarray(y_train_true),
8661005
np.asarray(y_train_pred),
8671006
avg=None)

0 commit comments

Comments
 (0)