-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprepare_data.py
More file actions
105 lines (85 loc) · 3.57 KB
/
prepare_data.py
File metadata and controls
105 lines (85 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
from extractor import Extractor
import cv2
from videolibs import crop_center_square
"""
trick: Convert frame from BGR to RGB
image = image[:, :, [2,1,0]]
"""
def load_video(path, max_frames, resize):
cap = cv2.VideoCapture(path)
frames = []
try:
while True:
ret, frame = cap.read()
if not ret:
break
frame = crop_center_square(frame)
frame = cv2.resize(frame, resize)
frame = frame[:, :, [2, 1, 0]]
frames.append(frame)
if len(frames) == max_frames:
break
finally:
cap.release()
return np.array(frames)
"""
Finally, we can put all the pieces together to create our data processing utility.
Note: Here the Ellipsis object selected all dimensions. (three dots ...)
Accessing and slicing multidimensional Arrays/NumPy indexing.
.numpy(): convert a Tensor to Numpy array.
"""
def prepare_all_videos(df, root_dir, label_processor, img_size, max_seq_length, nb_features):
num_samples = len(df)
video_paths = df["video_name"].values.tolist()
labels = df["tag"].values
labels = label_processor(labels[..., None]).numpy()
# `frame_masks` and `frame_features` are what we will feed to our sequence model.
# `frame_masks` will contain a bunch of booleans denoting if a timestep is
# masked with padding or not.
frame_masks = np.zeros(shape=(num_samples, max_seq_length), dtype="bool")
frame_features = np.zeros(
shape=(num_samples, max_seq_length, nb_features), dtype="float32"
)
# init extractor model
model = Extractor(image_shape=(img_size, img_size, 3))
# For each video.
for idx, path in enumerate(video_paths):
# Gather all its frames and add a batch dimension.
frames = load_video(
path=os.path.join(root_dir, path),
max_frames=max_seq_length,
resize=(img_size, img_size)
)
frames = frames[None, ...]
# Initialize placeholders to store the masks and features of the current video.
temp_frame_mask = np.zeros(shape=(1, max_seq_length, ), dtype="bool")
temp_frame_features = np.zeros(
shape=(1, max_seq_length, nb_features), dtype="float32"
)
# Extract features from the frames of the current video.
for i, batch in enumerate(frames):
video_length = batch.shape[0]
length = min(max_seq_length, video_length)
for j in range(length):
temp_frame_features[i, j, :] = model.extract(
batch[None, j, :]
)
temp_frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked
# numpy.sequeeze() function is used when we want to remove single-dimensional entries from the shape of an array.
frame_features[idx,] = temp_frame_features.squeeze()
frame_masks[idx,] = temp_frame_mask.squeeze()
# # Save the sequence.
# save_path = r'data\\tmp\\'
# np.save(open(save_path + 'frame_features.npy', 'wb'), frame_features)
# np.save(open(save_path + 'frame_mask.npy','wb'), frame_masks)
# np.save(open(save_path + 'labels.npy','wb'), labels)
return (frame_features, frame_masks), labels
def load_all_features(features_file, mask_file, lables_file):
frame_features = np.load(open(features_file, mode='rb'))
frame_masks = np.load(open(mask_file, mode='rb'))
labels = np.load(open(lables_file, mode='rb'))
return (frame_features, frame_masks), labels