From 0214cafd9cd00cc8676fe0063c35cab5391f09e9 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Thu, 2 Jul 2020 14:25:01 -0400 Subject: [PATCH 01/25] Initial commit of collect_images --- scripts/collect_images.py | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 scripts/collect_images.py diff --git a/scripts/collect_images.py b/scripts/collect_images.py new file mode 100644 index 00000000..cb31b4a0 --- /dev/null +++ b/scripts/collect_images.py @@ -0,0 +1,44 @@ +import datetime +import json + +# from Embedded2.src.jetson.db ... import ... + +""" +Put this file one folder up from the stored images. +Eg. /local/b/embedvis/imgs contains images, /local/b/embedvis/collect_images.py + +Collect images of non-goggle detections from the database. +Upload images to Google Drive. +Email end-user with the Drive link. +""" + + +def query_db(): + """Get image filenames. Probably just a SQL query.""" + pass + + +def upload_images(imgs): + """ + For each filename returned by query_db, upload image + and its relevant metadata (eg. face coords) to Drive. + @param imgs: [str, str, ...] + """ + + current_date = datetime.datetime.now().strftime("%m-%d-%Y") + + # TODO how should metadata be transferred? JSON file? + with open(current_date + '.json', 'w') as meta_file: + for i in imgs: + # 1. append image metadata + # 2. upload image + image_metadata = [] + json.dump(image_metadata, meta_file) + pass + + # upload metadata json file to Drive + + +if __name__ == "__main__": + # call the methods + pass From bf3390368f324ea499702ce6115212fd250363fc Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Tue, 7 Jul 2020 09:40:21 -0400 Subject: [PATCH 02/25] Initial collect_images commit. Deleted login.json from Git --- scripts/collect_images.py | 68 +++++++++++++++++++++++++--------- src/jetson/db/db_connection.py | 4 +- src/jetson/db/login.json | 11 ------ 3 files changed, 54 insertions(+), 29 deletions(-) delete mode 100644 src/jetson/db/login.json diff --git a/scripts/collect_images.py b/scripts/collect_images.py index cb31b4a0..c5eeb7c9 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -2,6 +2,7 @@ import json # from Embedded2.src.jetson.db ... import ... +# from wherever import email method """ Put this file one folder up from the stored images. @@ -13,32 +14,65 @@ """ -def query_db(): - """Get image filenames. Probably just a SQL query.""" - pass +# TODO rename method +def get_metadata(): + """ + Get image filenames and other relevant metadata from the database. + @return: A list of dictionaries with the metadata for each image TODO describe the metadata + + Query: + SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, + i.image_name, i.init_vector from bbox AS b, image as i where b.image_name == i.image_name and b.goggles == False + """ + # make sql connection + # execute query + + # for everything returned: + # combine everything into a dictionary + # append dictionary to list + # return list of dictionaries + # TODO just json.dump entire list? + return [] -def upload_images(imgs): +# TODO don't need this method if json.dump ing all dictionaries at once +def organize_metadata(metadata): """ - For each filename returned by query_db, upload image - and its relevant metadata (eg. face coords) to Drive. - @param imgs: [str, str, ...] + Create metadata file needed for decrypting images. + @param metadata: the list of dictionaries returned by get_metadata """ - current_date = datetime.datetime.now().strftime("%m-%d-%Y") + with open(meta_file, 'w') as m: + for x in metadata: + # append image metadata - # TODO how should metadata be transferred? JSON file? - with open(current_date + '.json', 'w') as meta_file: - for i in imgs: - # 1. append image metadata - # 2. upload image + # use metadata param image_metadata = [] - json.dump(image_metadata, meta_file) - pass + json.dump(image_metadata, m) - # upload metadata json file to Drive +def upload_files(metadata): + """ + For each filename returned by get_metadata, upload image + to Drive. Upload the day's metadata file. + @param metadata: the list of dictionaries returned by get_metadata + """ + + for image in metadata: + # upload image using rclone + pass + + # upload metadata json file to Drive + # subprocess rclone copy meta_file [name of Drive in rclone]: + + +# TODO call Seoyoung's method to email if __name__ == "__main__": + current_date = datetime.datetime.now().strftime("%m-%d-%Y") + meta_file = current_date + '.json' + # call the methods - pass + metadata = get_metadata() + organize_metadata(metadata) + upload_files(metadata) diff --git a/src/jetson/db/db_connection.py b/src/jetson/db/db_connection.py index 24e6565b..72e95892 100644 --- a/src/jetson/db/db_connection.py +++ b/src/jetson/db/db_connection.py @@ -1,5 +1,7 @@ +import datetime + import mysql.connector -from .config import get_config +from config import get_config from contextlib import contextmanager, closing diff --git a/src/jetson/db/login.json b/src/jetson/db/login.json deleted file mode 100644 index 65b553e4..00000000 --- a/src/jetson/db/login.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "SQL_HOST" : "mydb.itap.purdue.edu", - "USER_NAME" : "", - "PASSWORD" : "", - "KEYSPACE" : "pawar4", - - "FTPHOST" : "128.46.75.117", - "FTPUSER" : "", - "FTPPASS" : "" - -} \ No newline at end of file From bafd011334f104d4906b6b114d49fc28ff02254e Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Fri, 10 Jul 2020 16:11:38 -0400 Subject: [PATCH 03/25] Add prepare_images. Refactoring. --- scripts/collect_images.py | 75 +++++++++++++++++++++---------------- scripts/extract_features.py | 2 +- scripts/prepare_images.py | 50 +++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 34 deletions(-) create mode 100644 scripts/prepare_images.py diff --git a/scripts/collect_images.py b/scripts/collect_images.py index c5eeb7c9..5c0b6639 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -1,18 +1,23 @@ +import argparse import datetime import json -# from Embedded2.src.jetson.db ... import ... +from Embedded2.src.jetson.db.db_connection import sql_cursor + # from wherever import email method """ Put this file one folder up from the stored images. -Eg. /local/b/embedvis/imgs contains images, /local/b/embedvis/collect_images.py +Eg. on the HELPS machine: if /local/b/embedvis/imgs contains images, +this file's path should be /local/b/embedvis/collect_images.py Collect images of non-goggle detections from the database. -Upload images to Google Drive. +Upload images and metadata to Google Drive. Email end-user with the Drive link. """ +METADATA_FILE = 'metadata.json' + # TODO rename method def get_metadata(): @@ -22,36 +27,37 @@ def get_metadata(): Query: SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, - i.image_name, i.init_vector from bbox AS b, image as i where b.image_name == i.image_name and b.goggles == False + i.image_name, i.init_vector from bbox AS b, image as i where b.image_name=i.image_name and b.goggles=False """ - # make sql connection - # execute query - # for everything returned: - # combine everything into a dictionary - # append dictionary to list + metadata = [] - # return list of dictionaries - # TODO just json.dump entire list? - return [] - -# TODO don't need this method if json.dump ing all dictionaries at once -def organize_metadata(metadata): - """ - Create metadata file needed for decrypting images. - @param metadata: the list of dictionaries returned by get_metadata - """ - - with open(meta_file, 'w') as m: - for x in metadata: - # append image metadata - - # use metadata param - image_metadata = [] - json.dump(image_metadata, m) - - -def upload_files(metadata): + # make sql connection + # execute query + with sql_cursor() as cursor: + try: + cursor.execute('USE goggles') + cursor.execute('SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, ' + 'i.image_name, i.init_vector from bbox AS b, image as i where ' + 'b.image_name=i.image_name and b.goggles=False') + + for (image_name, x_min, y_min, x_max, y_max, image_name, init_vector) in cursor: + metadata.append({'image_name': image_name, + 'x_min': x_min, + 'y_min': y_min, + 'x_max': x_max, + 'y_max': y_max, + 'init_vector': init_vector + }) + except Exception as e: + print(e) + + with open(METADATA_FILE, 'w') as meta_file: + json.dump(metadata, meta_file) + return metadata + + +def upload_files(metadata, dir): """ For each filename returned by get_metadata, upload image to Drive. Upload the day's metadata file. @@ -60,19 +66,22 @@ def upload_files(metadata): for image in metadata: # upload image using rclone + # subprocess rclone copy os.path.join(dir, image['image_name']) [Drive name] pass # upload metadata json file to Drive - # subprocess rclone copy meta_file [name of Drive in rclone]: + # subprocess rclone copy METADATA_FILE [Drive name]: # TODO call Seoyoung's method to email if __name__ == "__main__": + parser = argparse.ArgumentParser('Collect images.') + parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload') + current_date = datetime.datetime.now().strftime("%m-%d-%Y") meta_file = current_date + '.json' # call the methods metadata = get_metadata() - organize_metadata(metadata) - upload_files(metadata) + upload_files(metadata, args.directory) diff --git a/scripts/extract_features.py b/scripts/extract_features.py index c9346a03..9aaa7a50 100644 --- a/scripts/extract_features.py +++ b/scripts/extract_features.py @@ -83,7 +83,7 @@ def hook(model, input, output): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run classification on a dataset') parser.add_argument('--directory', '-d', type=str, required=True, help='(Relative) Directory location of dataset') - parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable cuda") + parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") args = parser.parse_args() diff --git a/scripts/prepare_images.py b/scripts/prepare_images.py new file mode 100644 index 00000000..4d78b22e --- /dev/null +++ b/scripts/prepare_images.py @@ -0,0 +1,50 @@ +import argparse +import json +import os + +""" +After having run collect_images, decrypt the associated images +(if necessary) and combine images together into a short video (using metadata). +""" + +METADATA_FILE = 'metadata.json' + + +def decrypt_images(dir): + # ask for decryption key + with open(os.path.join(dir, METADATA_FILE)) as meta_file: + metadata = json.load(meta_file) + # use face coords to find where to decrypt in video frame + # decrypt + pass + + +def make_videos(dir): + # use a heuristic (such as images within 5 seconds of each other) + # to combine similar images into one video for easier viewing + with open(os.path.join(dir, METADATA_FILE)) as meta_file: + metadata = json.load(meta_file) + # for each image, if within 5 seconds of the previous one, + # concatenate them and make them into a video + pass + + +if __name__ == "__main__": + parser = argparse.ArgumentParser('Combine images into a short video for easier viewing.' + 'Decrypt if needed.') + parser.add_argument('--directory', '-d', type=str, required=True, help='Folder of images to be prepared.') + parser.add_argument('--decrypt', default=False, action='store_true', help='Decrypt faces in the images.') + parser.add_argument('--make_videos', '-m', default=False, action='store_true', + help='Combine frames from the same time period into a single video.') + + args = parser.parse_args() + + if not args.decrypt and not args.make_videos: + print('No options selected. Please select at least one of --decrypt or --make_videos.') + exit(0) + + if args.decrypt: + decrypt_images(args.directory) + + if args.make_videos(): + make_videos(args.directory) From 3872d31c267f2cd3d82e628a7dfda8ce6c57d4a7 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Sun, 12 Jul 2020 20:43:25 -0400 Subject: [PATCH 04/25] Minor fixes to work with database querying. --- scripts/collect_images.py | 14 ++++++++------ src/jetson/db/db_connection.py | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/collect_images.py b/scripts/collect_images.py index 5c0b6639..04ba2467 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -2,7 +2,7 @@ import datetime import json -from Embedded2.src.jetson.db.db_connection import sql_cursor +from src.jetson.db.db_connection import sql_cursor # from wherever import email method @@ -43,10 +43,10 @@ def get_metadata(): for (image_name, x_min, y_min, x_max, y_max, image_name, init_vector) in cursor: metadata.append({'image_name': image_name, - 'x_min': x_min, - 'y_min': y_min, - 'x_max': x_max, - 'y_max': y_max, + 'x_min': float(x_min), # JSON cannot serialize Decimals. + 'y_min': float(y_min), # If there is a better way to do this, someone let me know. + 'x_max': float(x_max), + 'y_max': float(y_max), 'init_vector': init_vector }) except Exception as e: @@ -57,11 +57,13 @@ def get_metadata(): return metadata +# TODO make folder with date to contain images and metadata file def upload_files(metadata, dir): """ For each filename returned by get_metadata, upload image to Drive. Upload the day's metadata file. @param metadata: the list of dictionaries returned by get_metadata + @param dir: the folder containing the images to upload """ for image in metadata: @@ -78,9 +80,9 @@ def upload_files(metadata, dir): if __name__ == "__main__": parser = argparse.ArgumentParser('Collect images.') parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload') + args = parser.parse_args() current_date = datetime.datetime.now().strftime("%m-%d-%Y") - meta_file = current_date + '.json' # call the methods metadata = get_metadata() diff --git a/src/jetson/db/db_connection.py b/src/jetson/db/db_connection.py index 72e95892..6ed54f85 100644 --- a/src/jetson/db/db_connection.py +++ b/src/jetson/db/db_connection.py @@ -1,9 +1,9 @@ import datetime import mysql.connector -from config import get_config from contextlib import contextmanager, closing +from src.jetson.db.config import get_config class Table: def __init__(self): From 55e6f8f14c5ee845fe22ea8c52795dcdb4eec05f Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Sun, 12 Jul 2020 21:31:09 -0400 Subject: [PATCH 05/25] Initial commit. Renaming variables and understanding evaluator.py --- scripts/annotator.py | 199 ++++++++++++++++++++++ scripts/evaluator.py | 384 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 583 insertions(+) create mode 100644 scripts/annotator.py create mode 100644 scripts/evaluator.py diff --git a/scripts/annotator.py b/scripts/annotator.py new file mode 100644 index 00000000..6a85df7f --- /dev/null +++ b/scripts/annotator.py @@ -0,0 +1,199 @@ +from __future__ import print_function +import os +import argparse +import torch +import torch.backends.cudnn as cudnn +import numpy as np +from data import cfg_mnet, cfg_re50 +from layers.functions.prior_box import PriorBox +from utils.nms.py_cpu_nms import py_cpu_nms +import cv2 +from models.retinaface import RetinaFace +from utils.box_utils import decode, decode_landm +import time +import json + +parser = argparse.ArgumentParser(description='Retinaface') + +parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth', + type=str, help='Trained state_dict file path to open') +parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50') +parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference') +parser.add_argument('--confidence_threshold', default=0.5, type=float, help='confidence_threshold') +parser.add_argument('--top_k', default=1000, type=int, help='top_k') +parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold') +parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k') +parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results') +parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str, help='directory to store detected labels') +parser.add_argument('--input_directory', default='test_videos/', type=str, help='directory where test videos are located') + + +args = parser.parse_args() +CLASSES = ['Glasses/', 'Goggles/', 'Neither/'] +CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/', 'Pose_45_degrees_up/', + 'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/'] + +def check_keys(model, pretrained_state_dict): + ckpt_keys = set(pretrained_state_dict.keys()) + model_keys = set(model.state_dict().keys()) + used_pretrained_keys = model_keys & ckpt_keys + unused_pretrained_keys = ckpt_keys - model_keys + missing_keys = model_keys - ckpt_keys + print('Missing keys:{}'.format(len(missing_keys))) + print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) + print('Used keys:{}'.format(len(used_pretrained_keys))) + assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' + return True + + +def remove_prefix(state_dict, prefix): + ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' + print('remove prefix \'{}\''.format(prefix)) + f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x + return {f(key): value for key, value in state_dict.items()} + + +def load_model(model, pretrained_path, load_to_cpu): + print('Loading pretrained model from {}'.format(pretrained_path)) + if load_to_cpu: + pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) + else: + device = torch.cuda.current_device() + pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) + if "state_dict" in pretrained_dict.keys(): + pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') + else: + pretrained_dict = remove_prefix(pretrained_dict, 'module.') + check_keys(model, pretrained_dict) + model.load_state_dict(pretrained_dict, strict=False) + return model + + +def create_directories(root_directory, create_class_condition_directories=False): + if not os.path.isdir(root_directory): + os.mkdir(root_directory) + + + +def get_storage_location(output_directory, video_filename, input_directory): + save_dir = output_directory + video_filename.strip(input_directory).strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_' + #create_directories(save_dir) + + return save_dir + + + +def get_videos(input_directory): + filenames = [] + for dirName, subdirList, fileList in os.walk(input_directory): + for filename in fileList: + ext = '.' + filename.split('.')[-1] + if ext in ['.mov','.mp4','.avi', '.MOV']: + filenames.append(dirName + '/' + filename) + + return filenames + + +if __name__ == '__main__': + create_directories(args.output_directory, create_class_condition_directories=True) + + torch.set_grad_enabled(False) + cfg = None + if args.network == "mobile0.25": + cfg = cfg_mnet + elif args.network == "resnet50": + cfg = cfg_re50 + # net and model + net = RetinaFace(cfg=cfg, phase = 'test') + net = load_model(net, args.trained_model, args.cpu) + net.eval() + print('Finished loading model!') + print(net) + cudnn.benchmark = True + device = torch.device("cpu" if args.cpu else "cuda") + net = net.to(device) + + resize = 0.4 + + video_files = get_videos(args.input_directory) + + for video in video_files: + cap = cv2.VideoCapture(video) + storage_location = get_storage_location(args.output_directory, video, args.input_directory) + create_directories(storage_location) + print ("Video: ", video) + + + # testing begin + if cap.isOpened(): + frame_number = 0 + while True: + ret, img_raw = cap.read() + if not ret: + break + img = np.float32(img_raw) + img = cv2.resize(img, (int(img.shape[1]*resize), int(img.shape[0]*resize))) + + im_height, im_width, _ = img.shape + scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) + img -= (104, 117, 123) + img = img.transpose(2, 0, 1) + img = torch.from_numpy(img).unsqueeze(0) + img = img.to(device) + scale = scale.to(device) + + tic = time.time() + loc, conf, landms = net(img) # forward pass + #print('net forward time: {:.4f}'.format(time.time() - tic)) + + priorbox = PriorBox(cfg, image_size=(im_height, im_width)) + priors = priorbox.forward() + priors = priors.to(device) + prior_data = priors.data + boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) + boxes = boxes * scale / resize + boxes = boxes.cpu().numpy() + scores = conf.squeeze(0).data.cpu().numpy()[:, 1] + landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) + scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], + img.shape[3], img.shape[2], img.shape[3], img.shape[2], + img.shape[3], img.shape[2]]) + scale1 = scale1.to(device) + landms = landms * scale1 / resize + landms = landms.cpu().numpy() + + # ignore low scores + inds = np.where(scores > args.confidence_threshold)[0] + boxes = boxes[inds] + landms = landms[inds] + scores = scores[inds] + + # keep top-K before NMS + order = scores.argsort()[::-1][:args.top_k] + boxes = boxes[order] + landms = landms[order] + scores = scores[order] + + # do NMS + dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) + keep = py_cpu_nms(dets, args.nms_threshold) + # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) + dets = dets[keep, :] + landms = landms[keep] + + # keep top-K faster NMS + dets = dets[:args.keep_top_k, :] + landms = landms[:args.keep_top_k, :] + + #dets = np.concatenate((dets, landms), axis=1) + output_file = storage_location + str(frame_number) + '.txt' + f = open(output_file, "w") + for detection in dets: + for coord in detection: + f.write(str(coord) + " ") + f.write("\n") + f.close() + + frame_number += 1 + + exit(0) diff --git a/scripts/evaluator.py b/scripts/evaluator.py new file mode 100644 index 00000000..96965698 --- /dev/null +++ b/scripts/evaluator.py @@ -0,0 +1,384 @@ +import os +import cv2 +import argparse +import torch +import time +import warnings +import json +import numpy as np + +from main import FaceDetector, Classifier + +VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] + +""" +Use this script with annotator.py . Videos to be evaluated should be in this structure: + +- [Top-level dir] +--- Goggles +---------- [goggle videos] +--- Glasses +---------- [glasses videos] +--- Neither +---------- [neither videos] +""" + + +class Evaluator(): + def __init__(self, cuda, detector, classifier, input_directory, annotation_path): + """ + Evaluates face detection and goggle classification performance. + Goggle Classification accuracy is given by average class accuracy and individual + video accuracy. + Face detection accuracy is given by precision and recall values. + + Args: + cuda: A bool value that specifies if cuda shall be used + detector: A string path to a .pth weights file for a face detection model + classifier: A string path to a .pth weights file for a goggle classification model + input_directory: Directory containing test videos to run Evaluator on + annotation_path: Directory containing annotation files + """ + + if cuda and torch.cuda.is_available(): + torch.set_default_tensor_type('torch.cuda.FloatTensor') + self.device = torch.device('cuda:0') + else: + torch.set_default_tensor_type('torch.FloatTensor') + self.device = torch.device('cpu') + + if os.path.exists("eval/det_results.txt"): + os.remove("eval/det_results.txt") + + self.detector = FaceDetector(detector=detector, cuda=cuda and torch.cuda.is_available(), + set_default_dev=True) + self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device) + self.video_filenames = self.get_video_files(input_directory) + self.results = {'Goggles': + {'average_class_accuracy': 0.0, + 'number_of_videos': 0, + 'individual_video_results': {} + }, + 'Glasses': + {'average_class_accuracy': 0.0, + 'number_of_videos': 0, + 'individual_video_results': {} + }, + 'Neither': + {'average_class_accuracy': 0.0, + 'number_of_videos': 0, + 'individual_video_results': {} + } + } + self.class_label = '' + self.condition = '' + self.cap = '' + self.video = '' + self.evaluate(annotation_path) + + def evaluate(self, annotation_path: str): + """ + Evaluates every video file in the input directory containing test videos and + stores results in self.results. + To understand the format of self.results dict, check the constructor + + Args: + annotation_path - Directory containing all the annotations of face detections + """ + total_videos_processed = 0 + for video_file in self.video_filenames: + self.video = video_file + print(f"Processing {self.video} ...") + + self.class_label = self.get_class_label() + self.condition = self.get_condition() + self.cap = cv2.VideoCapture(self.video) + + if self.cap.isOpened(): + classification_result = self.evaluate_classifications() # Also contains boxes + self.record_results(classification_result) + total_videos_processed += 1 + print(f"{self.video} : Done") + + else: + print(f"Unable to open video {self.video}") + continue + self.calculate_average_class_accuracy() + detection_results = self.evaluate_detections(annotation_path, "eval/det_results.txt") + + print(f"\n {total_videos_processed} videos processed!") + + def calculate_average_class_accuracy(self): + """ + Calculates the average class accuracy for each class and stores it in the + self.results dict. + """ + for class_label in self.results: + if self.results[class_label]['number_of_videos'] > 0: + self.results[class_label]['average_class_accuracy'] = self.results[class_label][ + 'average_class_accuracy'] / \ + self.results[class_label]['number_of_videos'] + + def record_results(self, result): + """ + Records all the results in the self.results dict + + Args: + result(List) - contains the classification accuracy and inference time + """ + self.results[self.class_label]['number_of_videos'] += 1 + self.results[self.class_label]['average_class_accuracy'] += result[0] + self.results[self.class_label]['individual_video_results'][self.video] = {} + self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0] + self.results[self.class_label]['individual_video_results'][self.video]["inference_time"] = result[1] + self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition + + def record_detections(self, file, detections): + """ + Save detections in a file for evaluation + Args: + file (str): Records detections here + detections (List): contains all the bounding boxes and confidence values + """ + f = open(file, "a+") + for detection in detections: + for element in detection: + f.write(str(element)) + f.write("|") + f.write("\n") + f.close() + + def infer(self): + """ + Performs inference on a video by using the face detection + and goggle classification models + It returns: + 1) inference_dict: the number of inferences for each class. + 2) average_inference_time: a float containing the average inference time for the whole video + """ + bboxes = [] + preds = [] + inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0} + frame_counter = 0 + start_time = time.time() + + while True: + ret, img = self.cap.read() + if not ret: + break + # img = cv2.resize(img, (640, 480)) #Set this to the input shape of image for faster processing. (Remember to do the same in annotator) + frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str( + frame_counter) + boxes = self.detector.detect(img) # Also contains confidence + box_no_conf = [] + if len(boxes) != 0: + for box in boxes: + x1 = max(0, box[0]) + y1 = max(0, box[1]) + x2 = min(img.shape[1], box[2]) + y2 = min(img.shape[0], box[3]) + conf = box[4] + face = img[int(y1):int(y2), int(x1):int(x2), :] + label = self.classifier.classifyFace(face) + preds.append(label.item()) + bboxes.append([frame_id, x1, y1, x2, y2, conf]) + + inference_dict["Goggles"] += preds.count(1) + inference_dict["Glasses"] += preds.count(0) + inference_dict["Neither"] += preds.count(2) + + total_time = time.time() - start_time + if frame_counter > 0: + average_inference_time = total_time / frame_counter + else: + average_inference_time = -1 # Empty video file + + # TODO make eval/det_results.txt a global variable DETECTION_FILE + self.record_detections("eval/det_results.txt", bboxes) + return inference_dict, average_inference_time + + def get_class_label(self): + """ + Get class label [Goggles / Glasses / Neither] that the image belongs to + """ + if '/Goggles/' in self.video or '/goggles/' in self.video: + class_label = 'Goggles' + elif '/Glasses/' in self.video or '/glasses/' in self.video: + class_label = 'Glasses' + else: + class_label = 'Neither' + + return class_label + + def get_condition(self): + """ + Get condition [Ideal, low_lighting etc. ] that the image belongs to + """ + return self.video.split('/')[-2] + + def get_ground_truth_detections(self, directory): + """ + Get ground truth detection labels (from annotation file) + """ + ground_truths = {} + + for file in os.listdir(directory): + f = open(directory + file, "r") + key = file.strip('.txt') + content = f.readlines() + f.close() + + content = [list(map(float, x.strip(' \n').split(' '))) for x in content] + ground_truths[key] = content + + return ground_truths + + def evaluate_classifications(self): + """ + Returns the accuracy (percentage_of_correct_predictions) of the + predictions for a video + """ + inferences, inference_time = self.infer() + if sum(inferences.values()) == 0: + percentage_of_correct_predictions = 0 + else: + percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values()) + + return percentage_of_correct_predictions, inference_time + + def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5): + """ + Calculates the recall and precision of face detection for a video. + TODO explain what that means... seems like overlap of x and y coords? + + @param annotations_dir: directory containing annotation files (created by annotator.py) + @param detection_dir: directory of predicted detections TODO ??? + @param overlap_threshold: greater than threshold counts as correct, less than is incorrect + """ + + ground_truth_detections = self.get_ground_truth_detections(annotations_dir) + with open(detection_dir, 'r') as f: + # TODO verify variable name accurate + predicted_detections = f.readlines() + + total_ground_truths = 0 + for frame_id in ground_truth_detections: + total_ground_truths += len(ground_truth_detections[frame_id]) + + # TODO ugly parsing and such here. Need to debug it. ==1 means...? + if any(predicted_detections) == 1: + splitlines = [x.strip().split('|') for x in predicted_detections] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[5]) for x in splitlines]) + bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines]) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + sorted_scores = np.sort(-confidence) + bboxes = bboxes[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + nd = len(image_ids) + true_pos = np.zeros(nd) + false_pos = np.zeros(nd) + + # TODO for frame in frames? + for d in range(nd): + try: + bbox = bboxes[d, :].astype(float) + max_overlap = -np.inf + bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32) + if bbox_ground_truth_detections.size > 0: + # TODO max and min variable names are backwards? + ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0]) + iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1]) + ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2]) + iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3]) + iw = np.maximum(ixmax - ixmin, 0.) + ih = np.maximum(iymax - iymin, 0.) + # TODO debug. inters = intersection? uni = union? Overlaps is actual value? + inters = iw * ih + uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) + + (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) * + (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters) + overlaps = inters / uni + max_overlap = np.max(overlaps) + # jmax = np.argmax(overlaps) + + if max_overlap > overlap_threshold: + true_pos[d] = 1. + else: + false_pos[d] = 1. + + except KeyError: + continue + + print("Total ground truths: ", total_ground_truths) + false_pos = np.cumsum(false_pos) + true_pos = np.cumsum(true_pos) + recall = true_pos / float(total_ground_truths) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps) + else: + recall = -1. + precision = -1. + ap = -1. + + print("Precision: ", precision) + print("Recall: ", recall) + + return precision[len(precision)], recall[len(recall)] # final precision, recall + + def get_video_files(self, input_directory: str): + """ + Gets all the video files in the input directory + """ + filenames = [] + for dirName, subdirList, fileList in os.walk(input_directory): + for filename in fileList: + ext = '.' + filename.split('.')[-1] + if ext in VIDEO_EXT: + filenames.append(dirName + '/' + filename) + + return filenames + + def get_evaluator_results(self): + """ + Returns the dict containing all the test results (self.results) + """ + + return self.results + + +def main(): + if not args.input_directory: + raise Exception("Invalid input directory") + evaluator = Evaluator(args.cuda, args.detector, args.classifier, args.input_directory, args.annotation_path) + individual_video_results = evaluator.get_evaluator_results() + + with open(args.output_file, 'w') as json_file: + json.dump(individual_video_results, json_file, indent=4) + + print(f"\n Output saved at {args.output_file}") + + +if __name__ == "__main__": + warnings.filterwarnings("once") + parser = argparse.ArgumentParser(description="Face detection") + parser.add_argument('--detector', '-t', type=str, default='model_weights/blazeface.pth', + help="Path to a trained face detector .pth file") + parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str, + help="Path to a trained classifier .pth file") + parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") + parser.add_argument('--output_file', type=str, default='eval/test1.json', + help="Name of evaluation log") + parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files") + parser.add_argument('--annotation_path', type=str, required=True, help="Path to annotation files") + # TODO add store_true args for detection, evaluation (to do separately if desired) + + args = parser.parse_args() + + main() + + exit() From 0e2d495329b521961d741882b453c0e312c06aba Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Tue, 14 Jul 2020 09:21:39 -0400 Subject: [PATCH 06/25] Progress --- scripts/annotator.py | 109 ++++++++++++++++++++++++------------------- scripts/evaluator.py | 24 +++++----- src/jetson/main.py | 70 +++++++++++++-------------- 3 files changed, 105 insertions(+), 98 deletions(-) diff --git a/scripts/annotator.py b/scripts/annotator.py index 6a85df7f..6377bbde 100644 --- a/scripts/annotator.py +++ b/scripts/annotator.py @@ -4,34 +4,26 @@ import torch import torch.backends.cudnn as cudnn import numpy as np -from data import cfg_mnet, cfg_re50 -from layers.functions.prior_box import PriorBox -from utils.nms.py_cpu_nms import py_cpu_nms +from src.jetson.models.Retinaface.data.config import cfg_mnet, cfg_re50 +from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox +from src.jetson.models.utils.box_utils import nms_numpy, decode_landm, decode import cv2 -from models.retinaface import RetinaFace -from utils.box_utils import decode, decode_landm +from src.jetson.models.Retinaface.retinaface import RetinaFace import time import json -parser = argparse.ArgumentParser(description='Retinaface') +""" +Run the face detector model on TestVideos (on the Drive, also args.input_directory). +Save bbox detections to SEPARATE text files for evaluation by evaluator.py +""" -parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth', - type=str, help='Trained state_dict file path to open') -parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50') -parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference') -parser.add_argument('--confidence_threshold', default=0.5, type=float, help='confidence_threshold') -parser.add_argument('--top_k', default=1000, type=int, help='top_k') -parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold') -parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k') -parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results') -parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str, help='directory to store detected labels') -parser.add_argument('--input_directory', default='test_videos/', type=str, help='directory where test videos are located') +# TODO there's gotta be a better way than saving to text files - -args = parser.parse_args() CLASSES = ['Glasses/', 'Goggles/', 'Neither/'] -CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/', 'Pose_45_degrees_up/', - 'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/'] +CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/', + 'Pose_45_degrees_up/', + 'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/'] + def check_keys(model, pretrained_state_dict): ckpt_keys = set(pretrained_state_dict.keys()) @@ -39,23 +31,23 @@ def check_keys(model, pretrained_state_dict): used_pretrained_keys = model_keys & ckpt_keys unused_pretrained_keys = ckpt_keys - model_keys missing_keys = model_keys - ckpt_keys - print('Missing keys:{}'.format(len(missing_keys))) - print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) - print('Used keys:{}'.format(len(used_pretrained_keys))) + print('Missing keys: {}'.format(len(missing_keys))) + print('Unused checkpoint keys: {}'.format(len(unused_pretrained_keys))) + print('Used keys: {}'.format(len(used_pretrained_keys))) assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' return True def remove_prefix(state_dict, prefix): - ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' + """ Old style model is stored with all names of parameters sharing common prefix 'module.' """ print('remove prefix \'{}\''.format(prefix)) f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x return {f(key): value for key, value in state_dict.items()} -def load_model(model, pretrained_path, load_to_cpu): +def load_model(model, pretrained_path, load_to_cuda): print('Loading pretrained model from {}'.format(pretrained_path)) - if load_to_cpu: + if not load_to_cuda: pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) else: device = torch.cuda.current_device() @@ -69,33 +61,53 @@ def load_model(model, pretrained_path, load_to_cpu): return model -def create_directories(root_directory, create_class_condition_directories=False): +def create_directory(root_directory): if not os.path.isdir(root_directory): os.mkdir(root_directory) - def get_storage_location(output_directory, video_filename, input_directory): - save_dir = output_directory + video_filename.strip(input_directory).strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_' - #create_directories(save_dir) + # TODO ugly filename strip + save_dir = os.path.join(output_directory, video_filename.strip(input_directory) + .strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_') return save_dir - def get_videos(input_directory): filenames = [] for dirName, subdirList, fileList in os.walk(input_directory): for filename in fileList: ext = '.' + filename.split('.')[-1] - if ext in ['.mov','.mp4','.avi', '.MOV']: + if ext in ['.mov', '.mp4', '.avi', '.MOV']: filenames.append(dirName + '/' + filename) return filenames if __name__ == '__main__': - create_directories(args.output_directory, create_class_condition_directories=True) + parser = argparse.ArgumentParser(description='Retinaface') + + parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth', + type=str, help='Trained face detector state_dict path') + parser.add_argument('--network', default='resnet50', help='Backbone network. mobile0.25 or resnet50') + # TODO make CUDA arg instead + parser.add_argument('--cuda', '-c', action="store_true", default=False, help='Use CUDA') + parser.add_argument('--confidence_threshold', default=0.5, type=float, help='Bounding box IoU required to count as ' + 'correct') + parser.add_argument('--top_k', default=1000, type=int, help='top_k') + parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold') + parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k') + # TODO not currently used + parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results') + parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str, + help='directory to store detected labels') + parser.add_argument('--input_directory', default='test_videos/', type=str, + help='directory where test videos are located') + + args = parser.parse_args() + + create_directory(args.output_directory) torch.set_grad_enabled(False) cfg = None @@ -103,14 +115,17 @@ def get_videos(input_directory): cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 - # net and model - net = RetinaFace(cfg=cfg, phase = 'test') - net = load_model(net, args.trained_model, args.cpu) + + # load the network + net = RetinaFace(cfg=cfg, phase='test') + + # load the model weights # TODO rename method load_model + net = load_model(net, args.trained_model, args.cuda) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True - device = torch.device("cpu" if args.cpu else "cuda") + device = torch.device("cuda:0" if args.cuda else "cpu") net = net.to(device) resize = 0.4 @@ -120,9 +135,8 @@ def get_videos(input_directory): for video in video_files: cap = cv2.VideoCapture(video) storage_location = get_storage_location(args.output_directory, video, args.input_directory) - create_directories(storage_location) - print ("Video: ", video) - + create_directory(storage_location) + print("Video: ", video) # testing begin if cap.isOpened(): @@ -132,7 +146,9 @@ def get_videos(input_directory): if not ret: break img = np.float32(img_raw) - img = cv2.resize(img, (int(img.shape[1]*resize), int(img.shape[0]*resize))) + img = cv2.resize(img, (int(img.shape[1] * resize), int(img.shape[0] * resize))) + + # TODO does this vvv code appear in Retinaface/ ? Or possibly in main.py im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) @@ -144,7 +160,7 @@ def get_videos(input_directory): tic = time.time() loc, conf, landms = net(img) # forward pass - #print('net forward time: {:.4f}'.format(time.time() - tic)) + # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() @@ -176,8 +192,7 @@ def get_videos(input_directory): # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) - keep = py_cpu_nms(dets, args.nms_threshold) - # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) + keep = nms_numpy(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] @@ -185,8 +200,8 @@ def get_videos(input_directory): dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] - #dets = np.concatenate((dets, landms), axis=1) - output_file = storage_location + str(frame_number) + '.txt' + # dets = np.concatenate((dets, landms), axis=1) + output_file = os.path.join(storage_location, 'frame' + str(frame_number) + '.txt') f = open(output_file, "w") for detection in dets: for coord in detection: diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 96965698..08307468 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -7,22 +7,18 @@ import json import numpy as np -from main import FaceDetector, Classifier +from src.jetson.main import FaceDetector, Classifier VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] """ -Use this script with annotator.py . Videos to be evaluated should be in this structure: - -- [Top-level dir] ---- Goggles ----------- [goggle videos] ---- Glasses ----------- [glasses videos] ---- Neither ----------- [neither videos] +Use this script with annotator.py. +Videos to be evaluated should be from the TestVideos folder on the Drive. """ +# TODO - TODO TODO don't do face detection? Would have to manually label faces but we're using a +# TODO - SOTA face detection model that could just empirically be observed to work + class Evaluator(): def __init__(self, cuda, detector, classifier, input_directory, annotation_path): @@ -37,7 +33,7 @@ def __init__(self, cuda, detector, classifier, input_directory, annotation_path) detector: A string path to a .pth weights file for a face detection model classifier: A string path to a .pth weights file for a goggle classification model input_directory: Directory containing test videos to run Evaluator on - annotation_path: Directory containing annotation files + annotation_path: Directory containing annotation files (output by annotator.py) """ if cuda and torch.cuda.is_available(): @@ -249,7 +245,7 @@ def evaluate_classifications(self): def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5): """ Calculates the recall and precision of face detection for a video. - TODO explain what that means... seems like overlap of x and y coords? + TODO explain what that means... seems like overlap of x and y coords? I.e. IoU? @param annotations_dir: directory containing annotation files (created by annotator.py) @param detection_dir: directory of predicted detections TODO ??? @@ -297,6 +293,7 @@ def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold= iw = np.maximum(ixmax - ixmin, 0.) ih = np.maximum(iymax - iymin, 0.) # TODO debug. inters = intersection? uni = union? Overlaps is actual value? + # TODO import IoU from box_utils should work inters = iw * ih uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) + (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) * @@ -374,7 +371,8 @@ def main(): parser.add_argument('--output_file', type=str, default='eval/test1.json', help="Name of evaluation log") parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files") - parser.add_argument('--annotation_path', type=str, required=True, help="Path to annotation files") + parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation " + "files") # TODO add store_true args for detection, evaluation (to do separately if desired) args = parser.parse_args() diff --git a/src/jetson/main.py b/src/jetson/main.py index 54ceee25..8c77f1dd 100644 --- a/src/jetson/main.py +++ b/src/jetson/main.py @@ -10,27 +10,28 @@ from torch.autograd import Variable from torchvision import transforms -from models.utils.transform import BaseTransform -from models.utils.box_utils import decode, do_nms, postprocess +from src.jetson.models.utils.transform import BaseTransform +from src.jetson.models.utils.box_utils import decode, do_nms, postprocess import sys import os import inspect -from AES import Encryption as AESEncryptor +from src.jetson.AES import Encryption as AESEncryptor from threading import Thread import multiprocessing from multiprocessing import Process, Queue, Value -from models.Retinaface.layers.functions.prior_box import PriorBox -from models.Retinaface.data import cfg_mnet as cfg -from models.Retinaface.data import cfg_inference as infer_params +from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox +from src.jetson.models.Retinaface.data import cfg_mnet as cfg +from src.jetson.models.Retinaface.data import cfg_inference as infer_params fileCount = Value('i', 0) -encryptRet = Queue() #Shared memory queue to allow child encryption process to return to parent +encryptRet = Queue() # Shared memory queue to allow child encryption process to return to parent + class FaceDetector: - def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default_dev=False): + def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_default_dev=False): """ Creates a FaceDetector object Args: @@ -54,7 +55,6 @@ def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default elif ('.pth' in detector and 'blazeface' in detector): from models.BlazeFace.blazeface import BlazeFace - self.net = BlazeFace(self.device) self.net.load_weights(detector) self.net.load_anchors("models/BlazeFace/anchors.npy") @@ -66,17 +66,16 @@ def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default elif ('.pth' in detector and 'mobile' in detector): from models.Retinaface.retinaface import RetinaFace, load_model - self.net = RetinaFace(cfg=cfg, phase = 'test') + self.net = RetinaFace(cfg=cfg, phase='test') self.net = load_model(self.net, detector, True) self.model_name = 'retinaface' - self.image_shape = infer_params["image_shape"] #(H, W) + self.image_shape = infer_params["image_shape"] # (H, W) self.resize = infer_params["resize"] self.transformer = BaseTransform((self.image_shape[1], self.image_shape[0]), (104, 117, 123)) priorbox = PriorBox(cfg, image_size=self.image_shape) priors = priorbox.forward() self.prior_data = priors.data - self.detection_threshold = detection_threshold if cuda and torch.cuda.is_available(): self.device = torch.device("cuda:0") @@ -88,7 +87,6 @@ def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default self.net.to(self.device) self.net.eval() - def detect(self, image: np.ndarray): """ @@ -147,8 +145,8 @@ def detect(self, elif (self.model_name == 'retinaface'): img = (self.transformer(image)[0]).transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) - loc, conf, _ = self.net(img) # forward pass: Returns bounding box location, confidence and facial landmark locations - + loc, conf, _ = self.net( + img) # forward pass: Returns bounding box location, confidence and facial landmark locations boxes = decode(loc.data.squeeze(0), self.prior_data, cfg['variance']) boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize) @@ -161,9 +159,6 @@ def detect(self, return bboxes - - - class VideoCapturer(object): def __init__(self, src=0): ''' @@ -179,7 +174,6 @@ def __init__(self, src=0): self.t1.daemon = True self.t1.start() - def update(self): '''Get next frame in video stream''' while self.running.value: @@ -196,6 +190,7 @@ def close(self): self.running.value = False self.t1.join() + class Classifier: def __init__(self, classifier): ''' @@ -207,7 +202,7 @@ def __init__(self, classifier): self.classifier = classifier def classifyFace(self, - face: np.ndarray): + face: np.ndarray): ''' This method initializaes the transforms and classifies the face region Args: @@ -243,8 +238,8 @@ def classifyFace(self, return pred def classifyFrame(self, - img: np.ndarray, - boxes: List[Tuple[np.float64]]): + img: np.ndarray, + boxes: List[Tuple[np.float64]]): ''' This method loops through all the bounding boxes in an image, calls classifyFace method to classify face region and finally draws a box around the face. @@ -270,9 +265,9 @@ def classifyFrame(self, label.append(int(self.classifyFace(face).data)) - return label + class Encryptor(object): def __init__(self): ''' @@ -281,7 +276,6 @@ def __init__(self): self.encryptor = AESEncryptor() self.key = self.encryptor.key - def encryptFace(self, coordinates: List[Tuple[int]], img: np.ndarray): ''' @@ -298,8 +292,8 @@ def encryptFace(self, coordinates: List[Tuple[int]], return encryptedImg - def encryptFrame(self, img:np.ndarray, - boxes:List[Tuple[np.float64]]): + def encryptFrame(self, img: np.ndarray, + boxes: List[Tuple[np.float64]]): ''' This method takes the face coordinates, encrypts the facial region, writes encrypted image to file filesystem Args: @@ -333,7 +327,7 @@ def writeImg(img, output_dir): global fileCount face_file_name = os.path.join(output_dir, f'{fileCount.value}.jpg') - #TODO: Remove this print statement after db integration + # TODO: Remove this print statement after db integration print("writing ", face_file_name) if args.write_imgs: cv2.imwrite(face_file_name, img) @@ -370,16 +364,16 @@ def drawFrame(boxes, frame, fps): index = 0 for box in boxes: frame = cv2.putText(frame, - 'label: %s' % class_names[label[index]], - (int(box[0]), int(box[1]-40)), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, - (0, 0, 255)) + 'label: %s' % class_names[label[index]], + (int(box[0]), int(box[1] - 40)), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, + (0, 0, 255)) frame = cv2.putText(frame, - 'fps: %.3f' % fps, - (int(box[0]), int(box[1]-20)), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, (0, 0, 255)) + 'fps: %.3f' % fps, + (int(box[0]), int(box[1] - 20)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, (0, 0, 255)) index += 1 @@ -409,13 +403,13 @@ def drawFrame(boxes, frame, fps): encryptor = Encryptor() run_face_detection: bool = True - while run_face_detection: #main video detection loop that will iterate until ESC key is entered + while run_face_detection: # main video detection loop that will iterate until ESC key is entered start_time = time.time() frame = capturer.get_frame() boxes = detector.detect(frame) - encryptedImg = frame.copy() #copy memory for encrypting image separate from unencrypted image + encryptedImg = frame.copy() # copy memory for encrypting image separate from unencrypted image if len(boxes) != 0: p1 = Process(target=encryptWorker, args=(encryptor, encryptedImg, boxes, args.output_dir, args.write_imgs)) @@ -427,7 +421,7 @@ def drawFrame(boxes, frame, fps): fps = 1 / (time.time() - start_time) drawFrame(boxes, frame, fps) - #remove frame creation and drawing before deployment + # remove frame creation and drawing before deployment p1.join() if cv2.waitKey(1) == 27: From dc6203d0875efc46c43419aa82af51d7b93d7f6d Mon Sep 17 00:00:00 2001 From: ZPBerg <31778364+ZPBerg@users.noreply.github.com> Date: Tue, 14 Jul 2020 09:27:05 -0400 Subject: [PATCH 07/25] Update evaluator branch with master changes (#4) * Added evaluator for detector and classifier * Made necessary changes to main to run evaluator * detector_type as an argument, compare to list of strings * Retinaface works with GPU Co-authored-by: Aditya Chakraborty Co-authored-by: Aditya Chakraborty <31283807+adityachakra16@users.noreply.github.com> --- src/jetson/main.py | 62 +++++++++++++++++----------- src/jetson/models/utils/box_utils.py | 2 +- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/jetson/main.py b/src/jetson/main.py index 8c77f1dd..eda67324 100644 --- a/src/jetson/main.py +++ b/src/jetson/main.py @@ -4,6 +4,7 @@ from typing import List, Set, Dict, Tuple, Optional import cv2 +from enum import Enum from PIL import Image import numpy as np import torch @@ -28,14 +29,16 @@ fileCount = Value('i', 0) encryptRet = Queue() # Shared memory queue to allow child encryption process to return to parent +DETECTOR_TYPES = ['blazeface', 'retinaface', 'ssd'] class FaceDetector: - def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_default_dev=False): + def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, cuda=True, set_default_dev=False): """ Creates a FaceDetector object Args: detector: A string path to a trained pth file for a ssd model trained in face detection + detector_type: A DetectorType describing which face detector is being used detection_threshold: The minimum threshold for a detection to be considered valid cuda: Whether or not to enable CUDA set_default_dev: Whether or not to set the default device for PyTorch @@ -43,17 +46,16 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul self.device = torch.device("cpu") - if ('.pth' in detector and 'ssd' in detector): - from models.SSD.ssd import build_ssd + if detector_type == 'ssd': + from src.jetson.models.SSD.ssd import build_ssd self.net = build_ssd('test', 300, 2) self.model_name = 'ssd' self.net.load_state_dict(torch.load(detector, map_location=self.device)) self.transformer = BaseTransform(self.net.size, (104, 117, 123)) - - elif ('.pth' in detector and 'blazeface' in detector): - from models.BlazeFace.blazeface import BlazeFace + elif detector_type == 'blazeface': + from src.jetson.models.BlazeFace.blazeface import BlazeFace self.net = BlazeFace(self.device) self.net.load_weights(detector) @@ -63,8 +65,8 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul self.net.min_suppression_threshold = 0.3 self.transformer = BaseTransform(128, None) - elif ('.pth' in detector and 'mobile' in detector): - from models.Retinaface.retinaface import RetinaFace, load_model + elif detector_type == 'retinaface': + from src.jetson.models.Retinaface.retinaface import RetinaFace, load_model self.net = RetinaFace(cfg=cfg, phase='test') self.net = load_model(self.net, detector, True) @@ -74,7 +76,7 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul self.transformer = BaseTransform((self.image_shape[1], self.image_shape[0]), (104, 117, 123)) priorbox = PriorBox(cfg, image_size=self.image_shape) priors = priorbox.forward() - self.prior_data = priors.data + self.prior_data = priors.data.to(device) self.detection_threshold = detection_threshold if cuda and torch.cuda.is_available(): @@ -98,7 +100,7 @@ def detect(self, The bounding boxes of the face(s) that were detected formatted (upper left corner(x, y) , lower right corner(x,y)) """ - if (self.model_name == 'ssd'): + if self.model_name == 'ssd': x = torch.from_numpy(self.transformer(image)[0]).permute(2, 0, 1) x = Variable(x.unsqueeze(0)).to(self.device) y = self.net(x) @@ -109,12 +111,13 @@ def detect(self, while j < detections.shape[2] and detections[0, 1, j, 0] > self.detection_threshold: pt = (detections[0, 1, j, 1:] * scale).cpu().numpy() x1, y1, x2, y2 = pt - bboxes.append((x1, y1, x2, y2)) + conf = detections[0, 1, j, 0].item() + bboxes.append((x1, y1, x2, y2, conf)) j += 1 return bboxes - elif (self.model_name == 'blazeface'): + elif self.model_name == 'blazeface': img = self.transformer(image)[0].astype(np.float32) detections = self.net.predict_on_image(img) @@ -130,6 +133,7 @@ def detect(self, xmin = detections[i, 1] * image.shape[1] ymax = detections[i, 2] * image.shape[0] xmax = detections[i, 3] * image.shape[1] + conf = detections[i, 16] img = img / 127.5 - 1.0 @@ -137,14 +141,15 @@ def detect(self, kp_x = detections[i, 4 + k * 2] * img.shape[1] kp_y = detections[i, 4 + k * 2 + 1] * img.shape[0] - bboxes.append((xmin, ymin, xmax, ymax)) + bboxes.append((xmin, ymin, xmax, ymax, conf)) return bboxes - - elif (self.model_name == 'retinaface'): + elif self.model_name == 'retinaface': img = (self.transformer(image)[0]).transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) + img = img.to(device) + loc, conf, _ = self.net( img) # forward pass: Returns bounding box location, confidence and facial landmark locations @@ -152,9 +157,7 @@ def detect(self, boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize) dets = do_nms(boxes, scores, infer_params["nms_thresh"]) - bboxes = [] - for det in dets: - bboxes.append(tuple(dets[0][0:4])) + bboxes = [tuple(det[0:5]) for det in dets] return bboxes @@ -192,14 +195,16 @@ def close(self): class Classifier: - def __init__(self, classifier): + def __init__(self, classifier, cuda: bool): ''' Performs classification of facial region into three classes - [Goggles, Glasses, Neither] Args: classifier - Trained classifier model (Currently, mobilenetv2) + cuda - True if Nvidia GPU is used ''' self.fps = 0 self.classifier = classifier + self.device = cuda def classifyFace(self, face: np.ndarray): @@ -228,7 +233,7 @@ def classifyFace(self, ]) transformed_face = transform(pil_face) face_batch = transformed_face.unsqueeze(0) - device = torch.device("cuda:0" if args.cuda and torch.cuda.is_available() else "cpu") + device = torch.device("cuda:0" if self.device and torch.cuda.is_available() else "cpu") with torch.no_grad(): face_batch = face_batch.to(device) labels = classifier(face_batch) @@ -253,7 +258,7 @@ def classifyFrame(self, label = [] for box in boxes: - x1, y1, x2, y2 = [int(b) for b in box] + x1, y1, x2, y2 = [int(b) for b in box[0:4]] # draw boxes within the frame x1 = max(0, x1) y1 = max(0, y1) @@ -301,7 +306,7 @@ def encryptFrame(self, img: np.ndarray, boxes: facial Coordinates ''' for box in boxes: - x1, y1, x2, y2 = [int(b) for b in box] + x1, y1, x2, y2 = [int(b) for b in box[0:4]] # draw boxes within the frame x1 = max(0, x1) y1 = max(0, y1) @@ -383,13 +388,19 @@ def drawFrame(boxes, frame, fps): if __name__ == "__main__": warnings.filterwarnings("once") parser = argparse.ArgumentParser(description="Face detection") - parser.add_argument('--detector', '-t', type=str, required=True, help="Path to a trained ssd .pth file") + parser.add_argument('--detector', '-d', type=str, required=True, help="Path to a trained face detector .pth file") + parser.add_argument('--detector_type', '-t', type=str, required=True, help="Type of face detector. One of " + "blazeface, ssd, or retinaface.") parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable cuda") parser.add_argument('--classifier', type=str, help="Path to a trained classifier .pth file") parser.add_argument('--write_imgs', default=False, help='Write images to output_dir') parser.add_argument('--output_dir', default='encrypted_imgs', type=str, help="Where to output encrypted images") args = parser.parse_args() + if args.detector_type not in DETECTOR_TYPES: + print('Please include a valid detector type (\'blazeface\', \'ssd\', or \'retinaface\'') + exit(1) + device = torch.device('cpu') if args.cuda and torch.cuda.is_available(): device = torch.device('cuda:0') @@ -398,8 +409,9 @@ def drawFrame(boxes, frame, fps): g.eval() capturer = VideoCapturer() - detector = FaceDetector(detector=args.detector, cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True) - classifier = Classifier(g) + detector = FaceDetector(detector=args.detector, detector_type=args.detector_type, + cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True) + classifier = Classifier(g, args.cuda) encryptor = Encryptor() run_face_detection: bool = True diff --git a/src/jetson/models/utils/box_utils.py b/src/jetson/models/utils/box_utils.py index bed236b3..03a5f513 100644 --- a/src/jetson/models/utils/box_utils.py +++ b/src/jetson/models/utils/box_utils.py @@ -376,7 +376,7 @@ def postprocess(boxes, conf, image_shape, detection_threshold, resize_factor): Returns boxes and confidence scores that are above confidence threshold """ scale = torch.Tensor([image_shape[1], image_shape[0], image_shape[1], image_shape[0]]) - boxes = (boxes * scale / resize_factor).numpy() + boxes = (boxes * scale / resize_factor).to('cpu').numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores From 5eb307cd71b5540c4edbe48e4e4e13e1181a5280 Mon Sep 17 00:00:00 2001 From: ZPBerg <31778364+ZPBerg@users.noreply.github.com> Date: Tue, 14 Jul 2020 09:27:05 -0400 Subject: [PATCH 08/25] Update evaluator branch with master changes (#4) * Added evaluator for detector and classifier * Made necessary changes to main to run evaluator * detector_type as an argument, compare to list of strings * Retinaface works with GPU Co-authored-by: Aditya Chakraborty Co-authored-by: Aditya Chakraborty <31283807+adityachakra16@users.noreply.github.com> --- scripts/annotator.py | 3 +- scripts/evaluator.py | 16 ++++--- src/jetson/main.py | 66 ++++++++++++++++------------ src/jetson/models/utils/box_utils.py | 2 +- 4 files changed, 52 insertions(+), 35 deletions(-) diff --git a/scripts/annotator.py b/scripts/annotator.py index 6377bbde..40dd34d8 100644 --- a/scripts/annotator.py +++ b/scripts/annotator.py @@ -17,7 +17,8 @@ Save bbox detections to SEPARATE text files for evaluation by evaluator.py """ -# TODO there's gotta be a better way than saving to text files +# TODO there's gotta be a better way than saving to 47,000+ text files +# TODO add instructions for running annotator and evaluator CLASSES = ['Glasses/', 'Goggles/', 'Neither/'] CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/', diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 08307468..31c98bf8 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -43,10 +43,10 @@ def __init__(self, cuda, detector, classifier, input_directory, annotation_path) torch.set_default_tensor_type('torch.FloatTensor') self.device = torch.device('cpu') - if os.path.exists("eval/det_results.txt"): - os.remove("eval/det_results.txt") + if os.path.exists("det_results.txt"): + os.remove("det_results.txt") - self.detector = FaceDetector(detector=detector, cuda=cuda and torch.cuda.is_available(), + self.detector = FaceDetector(detector=detector, detector_type='retinaface', cuda=cuda and torch.cuda.is_available(), set_default_dev=True) self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device) self.video_filenames = self.get_video_files(input_directory) @@ -100,7 +100,11 @@ def evaluate(self, annotation_path: str): print(f"Unable to open video {self.video}") continue self.calculate_average_class_accuracy() - detection_results = self.evaluate_detections(annotation_path, "eval/det_results.txt") + + # ------- classification ^^^ detection vvv + + # TODO why is this returning something + #detection_results = self.evaluate_detections(annotation_path, "det_results.txt") print(f"\n {total_videos_processed} videos processed!") @@ -189,8 +193,8 @@ def infer(self): else: average_inference_time = -1 # Empty video file - # TODO make eval/det_results.txt a global variable DETECTION_FILE - self.record_detections("eval/det_results.txt", bboxes) + # TODO make det_results.txt a global variable DETECTION_FILE + self.record_detections("det_results.txt", bboxes) return inference_dict, average_inference_time def get_class_label(self): diff --git a/src/jetson/main.py b/src/jetson/main.py index 8c77f1dd..074d2614 100644 --- a/src/jetson/main.py +++ b/src/jetson/main.py @@ -4,6 +4,7 @@ from typing import List, Set, Dict, Tuple, Optional import cv2 +from enum import Enum from PIL import Image import numpy as np import torch @@ -23,19 +24,21 @@ import multiprocessing from multiprocessing import Process, Queue, Value from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox -from src.jetson.models.Retinaface.data import cfg_mnet as cfg -from src.jetson.models.Retinaface.data import cfg_inference as infer_params +from src.jetson.models.Retinaface.data.config import cfg_mnet as cfg +from src.jetson.models.Retinaface.data.config import cfg_inference as infer_params fileCount = Value('i', 0) encryptRet = Queue() # Shared memory queue to allow child encryption process to return to parent +DETECTOR_TYPES = ['blazeface', 'retinaface', 'ssd'] class FaceDetector: - def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_default_dev=False): + def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, cuda=True, set_default_dev=False): """ Creates a FaceDetector object Args: detector: A string path to a trained pth file for a ssd model trained in face detection + detector_type: A DetectorType describing which face detector is being used detection_threshold: The minimum threshold for a detection to be considered valid cuda: Whether or not to enable CUDA set_default_dev: Whether or not to set the default device for PyTorch @@ -43,17 +46,16 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul self.device = torch.device("cpu") - if ('.pth' in detector and 'ssd' in detector): - from models.SSD.ssd import build_ssd + if detector_type == 'ssd': + from src.jetson.models.SSD.ssd import build_ssd self.net = build_ssd('test', 300, 2) self.model_name = 'ssd' self.net.load_state_dict(torch.load(detector, map_location=self.device)) self.transformer = BaseTransform(self.net.size, (104, 117, 123)) - - elif ('.pth' in detector and 'blazeface' in detector): - from models.BlazeFace.blazeface import BlazeFace + elif detector_type == 'blazeface': + from src.jetson.models.BlazeFace.blazeface import BlazeFace self.net = BlazeFace(self.device) self.net.load_weights(detector) @@ -63,8 +65,8 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul self.net.min_suppression_threshold = 0.3 self.transformer = BaseTransform(128, None) - elif ('.pth' in detector and 'mobile' in detector): - from models.Retinaface.retinaface import RetinaFace, load_model + elif detector_type == 'retinaface': + from src.jetson.models.Retinaface.retinaface import RetinaFace, load_model self.net = RetinaFace(cfg=cfg, phase='test') self.net = load_model(self.net, detector, True) @@ -74,7 +76,7 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul self.transformer = BaseTransform((self.image_shape[1], self.image_shape[0]), (104, 117, 123)) priorbox = PriorBox(cfg, image_size=self.image_shape) priors = priorbox.forward() - self.prior_data = priors.data + self.prior_data = priors.data.to("cuda:0" if cuda else "cpu") self.detection_threshold = detection_threshold if cuda and torch.cuda.is_available(): @@ -98,7 +100,7 @@ def detect(self, The bounding boxes of the face(s) that were detected formatted (upper left corner(x, y) , lower right corner(x,y)) """ - if (self.model_name == 'ssd'): + if self.model_name == 'ssd': x = torch.from_numpy(self.transformer(image)[0]).permute(2, 0, 1) x = Variable(x.unsqueeze(0)).to(self.device) y = self.net(x) @@ -109,12 +111,13 @@ def detect(self, while j < detections.shape[2] and detections[0, 1, j, 0] > self.detection_threshold: pt = (detections[0, 1, j, 1:] * scale).cpu().numpy() x1, y1, x2, y2 = pt - bboxes.append((x1, y1, x2, y2)) + conf = detections[0, 1, j, 0].item() + bboxes.append((x1, y1, x2, y2, conf)) j += 1 return bboxes - elif (self.model_name == 'blazeface'): + elif self.model_name == 'blazeface': img = self.transformer(image)[0].astype(np.float32) detections = self.net.predict_on_image(img) @@ -130,6 +133,7 @@ def detect(self, xmin = detections[i, 1] * image.shape[1] ymax = detections[i, 2] * image.shape[0] xmax = detections[i, 3] * image.shape[1] + conf = detections[i, 16] img = img / 127.5 - 1.0 @@ -137,14 +141,15 @@ def detect(self, kp_x = detections[i, 4 + k * 2] * img.shape[1] kp_y = detections[i, 4 + k * 2 + 1] * img.shape[0] - bboxes.append((xmin, ymin, xmax, ymax)) + bboxes.append((xmin, ymin, xmax, ymax, conf)) return bboxes - - elif (self.model_name == 'retinaface'): + elif self.model_name == 'retinaface': img = (self.transformer(image)[0]).transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) + img = img.to(self.device) + loc, conf, _ = self.net( img) # forward pass: Returns bounding box location, confidence and facial landmark locations @@ -152,9 +157,7 @@ def detect(self, boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize) dets = do_nms(boxes, scores, infer_params["nms_thresh"]) - bboxes = [] - for det in dets: - bboxes.append(tuple(dets[0][0:4])) + bboxes = [tuple(det[0:5]) for det in dets] return bboxes @@ -192,14 +195,16 @@ def close(self): class Classifier: - def __init__(self, classifier): + def __init__(self, classifier, cuda: bool): ''' Performs classification of facial region into three classes - [Goggles, Glasses, Neither] Args: classifier - Trained classifier model (Currently, mobilenetv2) + cuda - True if Nvidia GPU is used ''' self.fps = 0 self.classifier = classifier + self.device = cuda def classifyFace(self, face: np.ndarray): @@ -228,7 +233,7 @@ def classifyFace(self, ]) transformed_face = transform(pil_face) face_batch = transformed_face.unsqueeze(0) - device = torch.device("cuda:0" if args.cuda and torch.cuda.is_available() else "cpu") + device = torch.device("cuda:0" if self.device and torch.cuda.is_available() else "cpu") with torch.no_grad(): face_batch = face_batch.to(device) labels = classifier(face_batch) @@ -253,7 +258,7 @@ def classifyFrame(self, label = [] for box in boxes: - x1, y1, x2, y2 = [int(b) for b in box] + x1, y1, x2, y2 = [int(b) for b in box[0:4]] # draw boxes within the frame x1 = max(0, x1) y1 = max(0, y1) @@ -301,7 +306,7 @@ def encryptFrame(self, img: np.ndarray, boxes: facial Coordinates ''' for box in boxes: - x1, y1, x2, y2 = [int(b) for b in box] + x1, y1, x2, y2 = [int(b) for b in box[0:4]] # draw boxes within the frame x1 = max(0, x1) y1 = max(0, y1) @@ -383,13 +388,19 @@ def drawFrame(boxes, frame, fps): if __name__ == "__main__": warnings.filterwarnings("once") parser = argparse.ArgumentParser(description="Face detection") - parser.add_argument('--detector', '-t', type=str, required=True, help="Path to a trained ssd .pth file") + parser.add_argument('--detector', '-d', type=str, required=True, help="Path to a trained face detector .pth file") + parser.add_argument('--detector_type', '-t', type=str, required=True, help="Type of face detector. One of " + "blazeface, ssd, or retinaface.") parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable cuda") parser.add_argument('--classifier', type=str, help="Path to a trained classifier .pth file") parser.add_argument('--write_imgs', default=False, help='Write images to output_dir') parser.add_argument('--output_dir', default='encrypted_imgs', type=str, help="Where to output encrypted images") args = parser.parse_args() + if args.detector_type not in DETECTOR_TYPES: + print('Please include a valid detector type (\'blazeface\', \'ssd\', or \'retinaface\'') + exit(1) + device = torch.device('cpu') if args.cuda and torch.cuda.is_available(): device = torch.device('cuda:0') @@ -398,8 +409,9 @@ def drawFrame(boxes, frame, fps): g.eval() capturer = VideoCapturer() - detector = FaceDetector(detector=args.detector, cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True) - classifier = Classifier(g) + detector = FaceDetector(detector=args.detector, detector_type=args.detector_type, + cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True) + classifier = Classifier(g, args.cuda) encryptor = Encryptor() run_face_detection: bool = True diff --git a/src/jetson/models/utils/box_utils.py b/src/jetson/models/utils/box_utils.py index bed236b3..03a5f513 100644 --- a/src/jetson/models/utils/box_utils.py +++ b/src/jetson/models/utils/box_utils.py @@ -376,7 +376,7 @@ def postprocess(boxes, conf, image_shape, detection_threshold, resize_factor): Returns boxes and confidence scores that are above confidence threshold """ scale = torch.Tensor([image_shape[1], image_shape[0], image_shape[1], image_shape[0]]) - boxes = (boxes * scale / resize_factor).numpy() + boxes = (boxes * scale / resize_factor).to('cpu').numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores From 75abbfe7de34d82fb80cf0179f532028701539fd Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Wed, 15 Jul 2020 17:49:45 -0400 Subject: [PATCH 09/25] Update files, fix face_extractor getting files --- scripts/evaluator.py | 4 ++-- scripts/face_extractor.py | 16 ++++++++++------ src/jetson/main.py | 2 +- src/jetson/models/Retinaface/data/__init__.py | 2 +- src/jetson/models/utils/box_utils.py | 1 + 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 31c98bf8..4cac8fae 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -358,7 +358,7 @@ def main(): evaluator = Evaluator(args.cuda, args.detector, args.classifier, args.input_directory, args.annotation_path) individual_video_results = evaluator.get_evaluator_results() - with open(args.output_file, 'w') as json_file: + with open(args.output_file, 'w+') as json_file: json.dump(individual_video_results, json_file, indent=4) print(f"\n Output saved at {args.output_file}") @@ -372,7 +372,7 @@ def main(): parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str, help="Path to a trained classifier .pth file") parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") - parser.add_argument('--output_file', type=str, default='eval/test1.json', + parser.add_argument('--output_file', type=str, default='results.json', help="Name of evaluation log") parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files") parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation " diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py index a73c5afb..5197f9fe 100644 --- a/scripts/face_extractor.py +++ b/scripts/face_extractor.py @@ -7,11 +7,11 @@ import numpy as np from tqdm import tqdm -from face_detector_threaded import FaceDetector +from src.jetson.main import FaceDetector """ Given a folder of images or videos, run a face detector (literally a FaceDetector) on all images -or videos in the folder. Detects and crop all faces in the images or every 1/rate frames from the videos. +or videos in the folder. Detect and crop all faces in the images or every 1/rate frames from the videos. Save the resulting crops as .jpgs in an output folder. """ @@ -27,7 +27,8 @@ def get_images(input_dir): @return: List of image filenames. """ files = [glob(f"{input_dir}/*{e}") for e in IMAGE_EXT] - return files[0] + files = [file for subfile in files for file in subfile] + return files def get_videos(input_dir): @@ -37,16 +38,18 @@ def get_videos(input_dir): @return: List of video filenames. """ files = [glob(f"{input_dir}/*{e}") for e in VIDEO_EXT] - return files[0] + files = [file for subfile in files for file in subfile] + return files def crop_and_save_img(frame, file_num, output_dir): """Run frame through FaceDetector and save the cropped face image.""" if frame is not None and not 0: + print("Searching for a face") boxes = face_detector.detect(frame) for box in boxes: # Get individual coordinates as integers - x1, y1, x2, y2 = [int(b) for b in box] + x1, y1, x2, y2, _ = [int(b) for b in box] face = frame[y1:y2, x1:x2] if face is None or 0 in face.shape: continue @@ -90,6 +93,7 @@ def crop_faces_from_videos(output_dir): parser.add_argument("--input_dir", default="videos", type=str, help="Input directory containing the videos/images.") parser.add_argument('--output_dir', default='face_imgs', type=str, help="Output directory for the extracted faces.") parser.add_argument('--trained_model', default='blazeface.pth', type=str, help="Path to the face detector model.") + parser.add_argument('--detector_type', type=str, help='One of blazeface, ssd, retinaface') parser.add_argument('--images', default=False, action='store_true', help='Crop faces from images instead of videos.') parser.add_argument('--rate', default=5, type=int, help="Crop faces from every 1/rate frames of the video.") @@ -97,7 +101,7 @@ def crop_faces_from_videos(output_dir): 'are all sideways, enable this.') args = parser.parse_args() - face_detector = FaceDetector(trained_model=args.trained_model) + face_detector = FaceDetector(args.trained_model, args.detector_type) filenames = get_images(args.input_dir) if args.images else get_videos(args.input_dir) if not os.path.isdir(args.output_dir): diff --git a/src/jetson/main.py b/src/jetson/main.py index 074d2614..e50c16a0 100644 --- a/src/jetson/main.py +++ b/src/jetson/main.py @@ -37,7 +37,7 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c """ Creates a FaceDetector object Args: - detector: A string path to a trained pth file for a ssd model trained in face detection + detector: A string path to a trained pth file for a face detection model detector_type: A DetectorType describing which face detector is being used detection_threshold: The minimum threshold for a detection to be considered valid cuda: Whether or not to enable CUDA diff --git a/src/jetson/models/Retinaface/data/__init__.py b/src/jetson/models/Retinaface/data/__init__.py index 311ea72e..5c6eb077 100644 --- a/src/jetson/models/Retinaface/data/__init__.py +++ b/src/jetson/models/Retinaface/data/__init__.py @@ -1 +1 @@ -from models.Retinaface.data.config import cfg_mnet, cfg_re50, cfg_inference +from src.jetson.models.Retinaface.data.config import cfg_mnet, cfg_re50, cfg_inference diff --git a/src/jetson/models/utils/box_utils.py b/src/jetson/models/utils/box_utils.py index 03a5f513..50e6ea77 100644 --- a/src/jetson/models/utils/box_utils.py +++ b/src/jetson/models/utils/box_utils.py @@ -376,6 +376,7 @@ def postprocess(boxes, conf, image_shape, detection_threshold, resize_factor): Returns boxes and confidence scores that are above confidence threshold """ scale = torch.Tensor([image_shape[1], image_shape[0], image_shape[1], image_shape[0]]) + scale = scale.to(boxes.device) boxes = (boxes * scale / resize_factor).to('cpu').numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] From bbd0e54614237b1b08535ce4726549669ab864cf Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Thu, 16 Jul 2020 14:26:58 -0400 Subject: [PATCH 10/25] Add scripts/utils for misc. helper functions. Auto-rotate videos. --- scripts/evaluator.py | 25 +++++++++++++++++-------- scripts/face_extractor.py | 17 +++++++++++------ scripts/utils.py | 32 ++++++++++++++++++++++++++++++++ src/jetson/main.py | 12 ++++++++---- 4 files changed, 68 insertions(+), 18 deletions(-) create mode 100644 scripts/utils.py diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 4cac8fae..dc59d450 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -1,12 +1,14 @@ -import os -import cv2 import argparse -import torch +import json +import os import time import warnings -import json + +import cv2 import numpy as np +import torch +from scripts.utils import check_rotation, correct_rotation from src.jetson.main import FaceDetector, Classifier VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] @@ -21,7 +23,7 @@ class Evaluator(): - def __init__(self, cuda, detector, classifier, input_directory, annotation_path): + def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path): """ Evaluates face detection and goggle classification performance. Goggle Classification accuracy is given by average class accuracy and individual @@ -46,7 +48,7 @@ def __init__(self, cuda, detector, classifier, input_directory, annotation_path) if os.path.exists("det_results.txt"): os.remove("det_results.txt") - self.detector = FaceDetector(detector=detector, detector_type='retinaface', cuda=cuda and torch.cuda.is_available(), + self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(), set_default_dev=True) self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device) self.video_filenames = self.get_video_files(input_directory) @@ -104,6 +106,7 @@ def evaluate(self, annotation_path: str): # ------- classification ^^^ detection vvv # TODO why is this returning something + # TODO make it an optional arg to evaluate face detection #detection_results = self.evaluate_detections(annotation_path, "det_results.txt") print(f"\n {total_videos_processed} videos processed!") @@ -162,10 +165,15 @@ def infer(self): frame_counter = 0 start_time = time.time() + # check if the video needs to be rotated + rotate_code = check_rotation(self.video) + while True: ret, img = self.cap.read() if not ret: break + if rotate_code is not None: + correct_rotation(img, rotate_code) # img = cv2.resize(img, (640, 480)) #Set this to the input shape of image for faster processing. (Remember to do the same in annotator) frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str( frame_counter) @@ -355,7 +363,7 @@ def get_evaluator_results(self): def main(): if not args.input_directory: raise Exception("Invalid input directory") - evaluator = Evaluator(args.cuda, args.detector, args.classifier, args.input_directory, args.annotation_path) + evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory, args.annotation_path) individual_video_results = evaluator.get_evaluator_results() with open(args.output_file, 'w+') as json_file: @@ -367,8 +375,9 @@ def main(): if __name__ == "__main__": warnings.filterwarnings("once") parser = argparse.ArgumentParser(description="Face detection") - parser.add_argument('--detector', '-t', type=str, default='model_weights/blazeface.pth', + parser.add_argument('--detector', '-d', type=str, default='model_weights/blazeface.pth', help="Path to a trained face detector .pth file") + parser.add_argument('--detector_type', '-t', type=str, help="One of blazeface, retinaface, ssd") parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str, help="Path to a trained classifier .pth file") parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py index 5197f9fe..b5bd6104 100644 --- a/scripts/face_extractor.py +++ b/scripts/face_extractor.py @@ -1,12 +1,15 @@ import argparse -from glob import glob +import math import os import warnings +from glob import glob + import cv2 import numpy as np from tqdm import tqdm +from scripts.utils import check_rotation, correct_rotation from src.jetson.main import FaceDetector """ @@ -27,6 +30,7 @@ def get_images(input_dir): @return: List of image filenames. """ files = [glob(f"{input_dir}/*{e}") for e in IMAGE_EXT] + # convert the 2d list into a 1d list files = [file for subfile in files for file in subfile] return files @@ -38,6 +42,7 @@ def get_videos(input_dir): @return: List of video filenames. """ files = [glob(f"{input_dir}/*{e}") for e in VIDEO_EXT] + # convert the 2d list into a 1d list files = [file for subfile in files for file in subfile] return files @@ -45,11 +50,10 @@ def get_videos(input_dir): def crop_and_save_img(frame, file_num, output_dir): """Run frame through FaceDetector and save the cropped face image.""" if frame is not None and not 0: - print("Searching for a face") boxes = face_detector.detect(frame) for box in boxes: # Get individual coordinates as integers - x1, y1, x2, y2, _ = [int(b) for b in box] + x1, y1, x2, y2, _ = [int(math.ceil(b)) for b in box] face = frame[y1:y2, x1:x2] if face is None or 0 in face.shape: continue @@ -72,15 +76,15 @@ def crop_faces_from_videos(output_dir): for video_file in filenames: print(f"Opening {video_file}") video = cv2.VideoCapture(video_file) + rotate_code = check_rotation(video_file) file_len = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) for frame_num in tqdm(range(file_len)): ret, frame = video.read() if frame_num % args.rate == 0: - # If the video is shot horizontally, flip it so it's in the right orientation - if args.horiz: - frame = cv2.transpose(frame) + if rotate_code is not None: + frame = correct_rotation(frame, rotate_code) crop_and_save_img(frame, file_num, output_dir) file_num += 1 @@ -101,6 +105,7 @@ def crop_faces_from_videos(output_dir): 'are all sideways, enable this.') args = parser.parse_args() + # the FaceDetector will use CUDA if possible face_detector = FaceDetector(args.trained_model, args.detector_type) filenames = get_images(args.input_dir) if args.images else get_videos(args.input_dir) diff --git a/scripts/utils.py b/scripts/utils.py new file mode 100644 index 00000000..5c8361c3 --- /dev/null +++ b/scripts/utils.py @@ -0,0 +1,32 @@ +import cv2 +import ffmpeg + +""" +check_rotation and correct_rotation adapted from +https://stackoverflow.com/questions/53097092/frame-from-video-is-upside-down-after-extracting +to handle the fact that some videos store rotation metadata while others do not, +and OpenCV can't tell the difference +""" + + +def check_rotation(path_video_file): + # this returns meta-data of the video file in form of a dictionary + meta_dict = ffmpeg.probe(path_video_file) + + # from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key + # we are looking for + rotate_code = None + if 'rotate' not in meta_dict['streams'][0]['tags'].keys(): + return rotate_code + if int(meta_dict['streams'][0]['tags']['rotate']) == 90: + rotate_code = cv2.ROTATE_90_CLOCKWISE + elif int(meta_dict['streams'][0]['tags']['rotate']) == 180: + rotate_code = cv2.ROTATE_180 + elif int(meta_dict['streams'][0]['tags']['rotate']) == 270: + rotate_code = cv2.ROTATE_90_COUNTERCLOCKWISE + + return rotate_code + + +def correct_rotation(frame, rotate_code): + return cv2.rotate(frame, rotate_code) diff --git a/src/jetson/main.py b/src/jetson/main.py index f271db4e..9f65cf8d 100644 --- a/src/jetson/main.py +++ b/src/jetson/main.py @@ -37,11 +37,7 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c """ Creates a FaceDetector object Args: -<<<<<<< HEAD - detector: A string path to a trained pth file for a face detection model -======= detector: A string path to a trained pth file for a ssd model trained in face detection ->>>>>>> dc6203d0875efc46c43419aa82af51d7b93d7f6d detector_type: A DetectorType describing which face detector is being used detection_threshold: The minimum threshold for a detection to be considered valid cuda: Whether or not to enable CUDA @@ -63,6 +59,7 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c self.net = BlazeFace(self.device) self.net.load_weights(detector) + # TODO load_anchors doesn't work if run from face_extractor self.net.load_anchors("models/BlazeFace/anchors.npy") self.model_name = 'blazeface' self.net.min_score_thresh = 0.75 @@ -161,6 +158,13 @@ def detect(self, boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize) dets = do_nms(boxes, scores, infer_params["nms_thresh"]) + # scale bbox coords back to original image size + for det in dets: + det[0] *= image.shape[1] / img.shape[3] + det[1] *= image.shape[0] / img.shape[2] + det[2] *= image.shape[1] / img.shape[3] + det[3] *= image.shape[0] / img.shape[2] + bboxes = [tuple(det[0:5]) for det in dets] return bboxes From b372438beeade888bfd5ac9ff12bb1b08498d6b3 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Sat, 18 Jul 2020 17:05:36 -0400 Subject: [PATCH 11/25] Improve results.json output, fix load_anchors not finding anchors.npy --- scripts/evaluator.py | 106 ++++++++++++++++++++----------------------- src/jetson/main.py | 5 +- 2 files changed, 50 insertions(+), 61 deletions(-) diff --git a/scripts/evaluator.py b/scripts/evaluator.py index dc59d450..28f42eb5 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -1,16 +1,18 @@ import argparse import json import os -import time import warnings import cv2 import numpy as np import torch +from tqdm import tqdm from scripts.utils import check_rotation, correct_rotation from src.jetson.main import FaceDetector, Classifier +DETECTIONS_FILE = 'det_results.txt' +CLASSIFICATION_RESULTS_FILE = 'results.json' VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] """ @@ -18,12 +20,13 @@ Videos to be evaluated should be from the TestVideos folder on the Drive. """ -# TODO - TODO TODO don't do face detection? Would have to manually label faces but we're using a +# TODO - TODO TODO don't evaluate face detection? Would have to manually label faces but we're using a # TODO - SOTA face detection model that could just empirically be observed to work +# TODO make comments with @param things class Evaluator(): - def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path): + def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path, rate=1): """ Evaluates face detection and goggle classification performance. Goggle Classification accuracy is given by average class accuracy and individual @@ -36,6 +39,7 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a classifier: A string path to a .pth weights file for a goggle classification model input_directory: Directory containing test videos to run Evaluator on annotation_path: Directory containing annotation files (output by annotator.py) + rate: Run detection and classification on every 1/rate frames """ if cuda and torch.cuda.is_available(): @@ -45,8 +49,8 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a torch.set_default_tensor_type('torch.FloatTensor') self.device = torch.device('cpu') - if os.path.exists("det_results.txt"): - os.remove("det_results.txt") + if os.path.exists(DETECTIONS_FILE): + os.remove(DETECTIONS_FILE) self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(), set_default_dev=True) @@ -72,12 +76,13 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a self.condition = '' self.cap = '' self.video = '' + self.rate = rate self.evaluate(annotation_path) def evaluate(self, annotation_path: str): """ - Evaluates every video file in the input directory containing test videos and - stores results in self.results. + Evaluates (classification and detection) every video file in the input directory + containing test videos and stores results in self.results. To understand the format of self.results dict, check the constructor Args: @@ -86,7 +91,7 @@ def evaluate(self, annotation_path: str): total_videos_processed = 0 for video_file in self.video_filenames: self.video = video_file - print(f"Processing {self.video} ...") + print(f"Processing {self.video} ..., video {total_videos_processed}/{len(self.video_filenames)}") self.class_label = self.get_class_label() self.condition = self.get_condition() @@ -107,7 +112,7 @@ def evaluate(self, annotation_path: str): # TODO why is this returning something # TODO make it an optional arg to evaluate face detection - #detection_results = self.evaluate_detections(annotation_path, "det_results.txt") + #detection_results = self.evaluate_detections(annotation_path, DETECTIONS_FILE) print(f"\n {total_videos_processed} videos processed!") @@ -124,21 +129,23 @@ def calculate_average_class_accuracy(self): def record_results(self, result): """ - Records all the results in the self.results dict + Records results of one video in the self.results dict Args: - result(List) - contains the classification accuracy and inference time + result(List) - contains the classification accuracy and inference time and of one video """ self.results[self.class_label]['number_of_videos'] += 1 + # below is just a running sum which gets divided by the number of videos at the end self.results[self.class_label]['average_class_accuracy'] += result[0] self.results[self.class_label]['individual_video_results'][self.video] = {} self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0] - self.results[self.class_label]['individual_video_results'][self.video]["inference_time"] = result[1] + self.results[self.class_label]['individual_video_results'][self.video]["num_correct"] = result[1] + self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2] self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition def record_detections(self, file, detections): """ - Save detections in a file for evaluation + Save face detections in a file for evaluation Args: file (str): Records detections here detections (List): contains all the bounding boxes and confidence values @@ -154,32 +161,27 @@ def record_detections(self, file, detections): def infer(self): """ Performs inference on a video by using the face detection - and goggle classification models + and goggle classification models. + @param rate: How often to run detection (every 1/rate frames). It returns: 1) inference_dict: the number of inferences for each class. - 2) average_inference_time: a float containing the average inference time for the whole video """ bboxes = [] preds = [] inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0} - frame_counter = 0 - start_time = time.time() # check if the video needs to be rotated rotate_code = check_rotation(self.video) + video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - while True: + for frame_num in tqdm(range(video_len)): ret, img = self.cap.read() - if not ret: - break - if rotate_code is not None: - correct_rotation(img, rotate_code) - # img = cv2.resize(img, (640, 480)) #Set this to the input shape of image for faster processing. (Remember to do the same in annotator) - frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str( - frame_counter) - boxes = self.detector.detect(img) # Also contains confidence - box_no_conf = [] - if len(boxes) != 0: + if frame_num % self.rate == 0: + if rotate_code is not None: + correct_rotation(img, rotate_code) + frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str( + frame_num) + boxes = self.detector.detect(img) # Also contains confidence for box in boxes: x1 = max(0, box[0]) y1 = max(0, box[1]) @@ -191,19 +193,12 @@ def infer(self): preds.append(label.item()) bboxes.append([frame_id, x1, y1, x2, y2, conf]) - inference_dict["Goggles"] += preds.count(1) - inference_dict["Glasses"] += preds.count(0) - inference_dict["Neither"] += preds.count(2) + inference_dict["Goggles"] += preds.count(1) + inference_dict["Glasses"] += preds.count(0) + inference_dict["Neither"] += preds.count(2) - total_time = time.time() - start_time - if frame_counter > 0: - average_inference_time = total_time / frame_counter - else: - average_inference_time = -1 # Empty video file - - # TODO make det_results.txt a global variable DETECTION_FILE - self.record_detections("det_results.txt", bboxes) - return inference_dict, average_inference_time + self.record_detections(DETECTIONS_FILE, bboxes) + return inference_dict def get_class_label(self): """ @@ -246,13 +241,13 @@ def evaluate_classifications(self): Returns the accuracy (percentage_of_correct_predictions) of the predictions for a video """ - inferences, inference_time = self.infer() + inferences = self.infer() if sum(inferences.values()) == 0: percentage_of_correct_predictions = 0 else: percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values()) - return percentage_of_correct_predictions, inference_time + return percentage_of_correct_predictions, inferences[self.class_label], sum(inferences.values()) def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5): """ @@ -356,22 +351,9 @@ def get_evaluator_results(self): """ Returns the dict containing all the test results (self.results) """ - return self.results -def main(): - if not args.input_directory: - raise Exception("Invalid input directory") - evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory, args.annotation_path) - individual_video_results = evaluator.get_evaluator_results() - - with open(args.output_file, 'w+') as json_file: - json.dump(individual_video_results, json_file, indent=4) - - print(f"\n Output saved at {args.output_file}") - - if __name__ == "__main__": warnings.filterwarnings("once") parser = argparse.ArgumentParser(description="Face detection") @@ -381,15 +363,23 @@ def main(): parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str, help="Path to a trained classifier .pth file") parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") - parser.add_argument('--output_file', type=str, default='results.json', - help="Name of evaluation log") parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files") parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation " "files") + parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.') # TODO add store_true args for detection, evaluation (to do separately if desired) args = parser.parse_args() - main() + if not args.input_directory: + raise Exception("Invalid input directory") + evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory, + args.annotation_path) + individual_video_results = evaluator.get_evaluator_results() + + with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file: + json.dump(individual_video_results, json_file, indent=4) + + print(f"\n Output saved at {args.output_file}") exit() diff --git a/src/jetson/main.py b/src/jetson/main.py index 9f65cf8d..c2586c8c 100644 --- a/src/jetson/main.py +++ b/src/jetson/main.py @@ -59,8 +59,8 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c self.net = BlazeFace(self.device) self.net.load_weights(detector) - # TODO load_anchors doesn't work if run from face_extractor - self.net.load_anchors("models/BlazeFace/anchors.npy") + # assume anchors.npy is in this location relative to the class definition + self.net.load_anchors(os.path.join(os.path.dirname(__file__), "models/BlazeFace/anchors.npy")) self.model_name = 'blazeface' self.net.min_score_thresh = 0.75 self.net.min_suppression_threshold = 0.3 @@ -235,7 +235,6 @@ def classifyFace(self, # the same transforms as applied while training model transform = transforms.Compose([ transforms.Resize(224), - transforms.RandomGrayscale(1), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) From 778c9a2bd9ae6b9664c8936db9aca90c0c69154e Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Tue, 21 Jul 2020 10:03:53 -0400 Subject: [PATCH 12/25] Documenting and refactoring --- scripts/annotator.py | 2 +- scripts/evaluator.py | 251 +++++++++++++++++++++---------------------- scripts/utils.py | 7 +- 3 files changed, 130 insertions(+), 130 deletions(-) diff --git a/scripts/annotator.py b/scripts/annotator.py index 40dd34d8..3b8ed341 100644 --- a/scripts/annotator.py +++ b/scripts/annotator.py @@ -17,7 +17,7 @@ Save bbox detections to SEPARATE text files for evaluation by evaluator.py """ -# TODO there's gotta be a better way than saving to 47,000+ text files +# TODO output large json file or something # TODO add instructions for running annotator and evaluator CLASSES = ['Glasses/', 'Goggles/', 'Neither/'] diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 28f42eb5..ef8f730f 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -6,8 +6,10 @@ import cv2 import numpy as np import torch +import torchvision from tqdm import tqdm +from scripts.goggle_classifier import get_model from scripts.utils import check_rotation, correct_rotation from src.jetson.main import FaceDetector, Classifier @@ -54,7 +56,11 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(), set_default_dev=True) - self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device) + # TODO check state_dict vs. not + model = get_model() + model.load_state_dict(torch.load(classifier, map_location=self.device)) + self.classifier = Classifier(model, self.device) + #self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device) self.video_filenames = self.get_video_files(input_directory) self.results = {'Goggles': {'average_class_accuracy': 0.0, @@ -76,6 +82,7 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a self.condition = '' self.cap = '' self.video = '' + self.video_len = 0 self.rate = rate self.evaluate(annotation_path) @@ -102,10 +109,10 @@ def evaluate(self, annotation_path: str): self.record_results(classification_result) total_videos_processed += 1 print(f"{self.video} : Done") - else: print(f"Unable to open video {self.video}") continue + self.calculate_average_class_accuracy() # ------- classification ^^^ detection vvv @@ -116,10 +123,109 @@ def evaluate(self, annotation_path: str): print(f"\n {total_videos_processed} videos processed!") + def evaluate_classifications(self): + """ + Returns the accuracy (percentage_of_correct_predictions) of the + predictions for a video + """ + inferences = self.infer() + if sum(inferences.values()) == 0: + percentage_of_correct_predictions = 0 + else: + percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values()) + + return percentage_of_correct_predictions, inferences, sum(inferences.values()) + + def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file, overlap_threshold=0.5): + """ + Calculates the recall and precision of face detection for a video. + TODO explain what that means... seems like overlap of x and y coords? I.e. IoU? + + @param ground_truth_detections_file: file containing actual face detections (created by annotator.py) + @param predicted_detections_file: file containing predicted face detections + @param overlap_threshold: IoU greater than threshold counts as correct, less than is incorrect + """ + + with open(ground_truth_detections_file) as detect_file: + ground_truth_detections = json.load(detect_file) + + with open(predicted_detections_file, 'r') as prediction_file: + predicted_detections = json.load(prediction_file) + + # TODO fix below based on detections format + total_ground_truths = 0 + for frame_id in ground_truth_detections: + total_ground_truths += len(ground_truth_detections[frame_id]) + + # TODO ugly parsing and such here. Need to debug it. ==1 means...? + if any(predicted_detections) == 1: + splitlines = [x.strip().split('|') for x in predicted_detections] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[5]) for x in splitlines]) + bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines]) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + sorted_scores = np.sort(-confidence) + bboxes = bboxes[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + nd = len(image_ids) + true_pos = np.zeros(nd) + false_pos = np.zeros(nd) + + # TODO for frame in frames? + for d in range(nd): + try: + bbox = bboxes[d, :].astype(float) + max_overlap = -np.inf + bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32) + if bbox_ground_truth_detections.size > 0: + # TODO max and min variable names are backwards? + ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0]) + iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1]) + ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2]) + iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3]) + iw = np.maximum(ixmax - ixmin, 0.) + ih = np.maximum(iymax - iymin, 0.) + # TODO debug. inters = intersection? uni = union? Overlaps is actual value? + # TODO import IoU from box_utils should work + inters = iw * ih + uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) + + (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) * + (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters) + overlaps = inters / uni + max_overlap = np.max(overlaps) + # jmax = np.argmax(overlaps) + + if max_overlap > overlap_threshold: + true_pos[d] = 1. + else: + false_pos[d] = 1. + + except KeyError: + continue + + print("Total ground truths: ", total_ground_truths) + false_pos = np.cumsum(false_pos) + true_pos = np.cumsum(true_pos) + recall = true_pos / float(total_ground_truths) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps) + else: + recall = -1. + precision = -1. + ap = -1. + + print("Precision: ", precision) + print("Recall: ", recall) + + return precision[len(precision)], recall[len(recall)] # final precision, recall + def calculate_average_class_accuracy(self): """ - Calculates the average class accuracy for each class and stores it in the - self.results dict. + Calculates the average class accuracy for each class and stores it in self.results """ for class_label in self.results: if self.results[class_label]['number_of_videos'] > 0: @@ -131,21 +237,25 @@ def record_results(self, result): """ Records results of one video in the self.results dict - Args: - result(List) - contains the classification accuracy and inference time and of one video + @param result(List) - contains the classification accuracy, + number of predictions for each label, number of detections """ self.results[self.class_label]['number_of_videos'] += 1 # below is just a running sum which gets divided by the number of videos at the end self.results[self.class_label]['average_class_accuracy'] += result[0] self.results[self.class_label]['individual_video_results'][self.video] = {} self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0] - self.results[self.class_label]['individual_video_results'][self.video]["num_correct"] = result[1] + self.results[self.class_label]['individual_video_results'][self.video]["glasses"] = result[1]['Glasses'] + self.results[self.class_label]['individual_video_results'][self.video]["goggles"] = result[1]['Goggles'] + self.results[self.class_label]['individual_video_results'][self.video]["neither"] = result[1]['Neither'] self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2] + self.results[self.class_label]['individual_video_results'][self.video]["num_frames"] = self.video_len self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition def record_detections(self, file, detections): """ Save face detections in a file for evaluation + TODO improve how this is stored Args: file (str): Records detections here detections (List): contains all the bounding boxes and confidence values @@ -160,7 +270,7 @@ def record_detections(self, file, detections): def infer(self): """ - Performs inference on a video by using the face detection + Performs inference on a video using the face detection and goggle classification models. @param rate: How often to run detection (every 1/rate frames). It returns: @@ -172,11 +282,11 @@ def infer(self): # check if the video needs to be rotated rotate_code = check_rotation(self.video) - video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + self.video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - for frame_num in tqdm(range(video_len)): + for frame_num in tqdm(range(self.video_len)): ret, img = self.cap.read() - if frame_num % self.rate == 0: + if frame_num % self.rate == 0 and img is not None: if rotate_code is not None: correct_rotation(img, rotate_code) frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str( @@ -193,8 +303,8 @@ def infer(self): preds.append(label.item()) bboxes.append([frame_id, x1, y1, x2, y2, conf]) - inference_dict["Goggles"] += preds.count(1) inference_dict["Glasses"] += preds.count(0) + inference_dict["Goggles"] += preds.count(1) inference_dict["Neither"] += preds.count(2) self.record_detections(DETECTIONS_FILE, bboxes) @@ -219,121 +329,6 @@ def get_condition(self): """ return self.video.split('/')[-2] - def get_ground_truth_detections(self, directory): - """ - Get ground truth detection labels (from annotation file) - """ - ground_truths = {} - - for file in os.listdir(directory): - f = open(directory + file, "r") - key = file.strip('.txt') - content = f.readlines() - f.close() - - content = [list(map(float, x.strip(' \n').split(' '))) for x in content] - ground_truths[key] = content - - return ground_truths - - def evaluate_classifications(self): - """ - Returns the accuracy (percentage_of_correct_predictions) of the - predictions for a video - """ - inferences = self.infer() - if sum(inferences.values()) == 0: - percentage_of_correct_predictions = 0 - else: - percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values()) - - return percentage_of_correct_predictions, inferences[self.class_label], sum(inferences.values()) - - def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5): - """ - Calculates the recall and precision of face detection for a video. - TODO explain what that means... seems like overlap of x and y coords? I.e. IoU? - - @param annotations_dir: directory containing annotation files (created by annotator.py) - @param detection_dir: directory of predicted detections TODO ??? - @param overlap_threshold: greater than threshold counts as correct, less than is incorrect - """ - - ground_truth_detections = self.get_ground_truth_detections(annotations_dir) - with open(detection_dir, 'r') as f: - # TODO verify variable name accurate - predicted_detections = f.readlines() - - total_ground_truths = 0 - for frame_id in ground_truth_detections: - total_ground_truths += len(ground_truth_detections[frame_id]) - - # TODO ugly parsing and such here. Need to debug it. ==1 means...? - if any(predicted_detections) == 1: - splitlines = [x.strip().split('|') for x in predicted_detections] - image_ids = [x[0] for x in splitlines] - confidence = np.array([float(x[5]) for x in splitlines]) - bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines]) - - # sort by confidence - sorted_ind = np.argsort(-confidence) - sorted_scores = np.sort(-confidence) - bboxes = bboxes[sorted_ind, :] - image_ids = [image_ids[x] for x in sorted_ind] - - nd = len(image_ids) - true_pos = np.zeros(nd) - false_pos = np.zeros(nd) - - # TODO for frame in frames? - for d in range(nd): - try: - bbox = bboxes[d, :].astype(float) - max_overlap = -np.inf - bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32) - if bbox_ground_truth_detections.size > 0: - # TODO max and min variable names are backwards? - ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0]) - iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1]) - ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2]) - iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3]) - iw = np.maximum(ixmax - ixmin, 0.) - ih = np.maximum(iymax - iymin, 0.) - # TODO debug. inters = intersection? uni = union? Overlaps is actual value? - # TODO import IoU from box_utils should work - inters = iw * ih - uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) + - (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) * - (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters) - overlaps = inters / uni - max_overlap = np.max(overlaps) - # jmax = np.argmax(overlaps) - - if max_overlap > overlap_threshold: - true_pos[d] = 1. - else: - false_pos[d] = 1. - - except KeyError: - continue - - print("Total ground truths: ", total_ground_truths) - false_pos = np.cumsum(false_pos) - true_pos = np.cumsum(true_pos) - recall = true_pos / float(total_ground_truths) - # avoid divide by zero in case the first detection matches a difficult - # ground truth - precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps) - else: - recall = -1. - precision = -1. - ap = -1. - - print("Precision: ", precision) - print("Recall: ", recall) - - return precision[len(precision)], recall[len(recall)] # final precision, recall - def get_video_files(self, input_directory: str): """ Gets all the video files in the input directory @@ -380,6 +375,6 @@ def get_evaluator_results(self): with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file: json.dump(individual_video_results, json_file, indent=4) - print(f"\n Output saved at {args.output_file}") + print(f"\n Output saved at {CLASSIFICATION_RESULTS_FILE}") exit() diff --git a/scripts/utils.py b/scripts/utils.py index 5c8361c3..3c8d5336 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -1,11 +1,16 @@ import cv2 import ffmpeg +""" +Miscellaneous utility functions that apply to multiple scripts. +""" + + """ check_rotation and correct_rotation adapted from https://stackoverflow.com/questions/53097092/frame-from-video-is-upside-down-after-extracting to handle the fact that some videos store rotation metadata while others do not, -and OpenCV can't tell the difference +and OpenCV can't tell the difference. """ From 275dea45e393e5fd808163bdfa3d1a28e43adcf6 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Wed, 22 Jul 2020 19:19:17 -0400 Subject: [PATCH 13/25] Finish annotator saving to csv. check_rotation only rotates .MOV files --- scripts/annotator.py | 221 ++++++++------------------------------ scripts/evaluator.py | 1 + scripts/face_extractor.py | 2 +- scripts/utils.py | 8 +- 4 files changed, 53 insertions(+), 179 deletions(-) diff --git a/scripts/annotator.py b/scripts/annotator.py index 3b8ed341..34e489c1 100644 --- a/scripts/annotator.py +++ b/scripts/annotator.py @@ -1,65 +1,25 @@ from __future__ import print_function -import os + import argparse -import torch -import torch.backends.cudnn as cudnn -import numpy as np -from src.jetson.models.Retinaface.data.config import cfg_mnet, cfg_re50 -from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox -from src.jetson.models.utils.box_utils import nms_numpy, decode_landm, decode +import csv +import os + import cv2 -from src.jetson.models.Retinaface.retinaface import RetinaFace -import time -import json +import torch +from tqdm import tqdm + +from src.jetson.main import FaceDetector +from scripts.utils import check_rotation, correct_rotation """ -Run the face detector model on TestVideos (on the Drive, also args.input_directory). -Save bbox detections to SEPARATE text files for evaluation by evaluator.py +Run the face detector model on a folder of videos (most recently used on TestVideos from the Drive). +Save bbox detections to a csv file to be compared in evaluator.py. +An earlier version of this script was used to compare Retinaface with +a Mobilenet backbone versus a Resnet backbone; comparison of object +detectors would be its most applicable use. """ -# TODO output large json file or something -# TODO add instructions for running annotator and evaluator - -CLASSES = ['Glasses/', 'Goggles/', 'Neither/'] -CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/', - 'Pose_45_degrees_up/', - 'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/'] - - -def check_keys(model, pretrained_state_dict): - ckpt_keys = set(pretrained_state_dict.keys()) - model_keys = set(model.state_dict().keys()) - used_pretrained_keys = model_keys & ckpt_keys - unused_pretrained_keys = ckpt_keys - model_keys - missing_keys = model_keys - ckpt_keys - print('Missing keys: {}'.format(len(missing_keys))) - print('Unused checkpoint keys: {}'.format(len(unused_pretrained_keys))) - print('Used keys: {}'.format(len(used_pretrained_keys))) - assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' - return True - - -def remove_prefix(state_dict, prefix): - """ Old style model is stored with all names of parameters sharing common prefix 'module.' """ - print('remove prefix \'{}\''.format(prefix)) - f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x - return {f(key): value for key, value in state_dict.items()} - - -def load_model(model, pretrained_path, load_to_cuda): - print('Loading pretrained model from {}'.format(pretrained_path)) - if not load_to_cuda: - pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) - else: - device = torch.cuda.current_device() - pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) - if "state_dict" in pretrained_dict.keys(): - pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') - else: - pretrained_dict = remove_prefix(pretrained_dict, 'module.') - check_keys(model, pretrained_dict) - model.load_state_dict(pretrained_dict, strict=False) - return model +DETECTIONS_FILE = 'detection_results.csv' def create_directory(root_directory): @@ -67,149 +27,60 @@ def create_directory(root_directory): os.mkdir(root_directory) -def get_storage_location(output_directory, video_filename, input_directory): - # TODO ugly filename strip - save_dir = os.path.join(output_directory, video_filename.strip(input_directory) - .strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_') - - return save_dir - - def get_videos(input_directory): filenames = [] for dirName, subdirList, fileList in os.walk(input_directory): for filename in fileList: ext = '.' + filename.split('.')[-1] - if ext in ['.mov', '.mp4', '.avi', '.MOV']: + if ext in ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']: filenames.append(dirName + '/' + filename) return filenames if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Retinaface') - - parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth', - type=str, help='Trained face detector state_dict path') - parser.add_argument('--network', default='resnet50', help='Backbone network. mobile0.25 or resnet50') - # TODO make CUDA arg instead + parser = argparse.ArgumentParser(description='Save face detection results') + parser.add_argument('--detector', '-d', type=str, required=True, help="Path to a trained face detector .pth file") + parser.add_argument('--detector_type', '-t', type=str, required=True, help="Type of face detector. One of " + "blazeface, ssd, or retinaface.") parser.add_argument('--cuda', '-c', action="store_true", default=False, help='Use CUDA') - parser.add_argument('--confidence_threshold', default=0.5, type=float, help='Bounding box IoU required to count as ' - 'correct') - parser.add_argument('--top_k', default=1000, type=int, help='top_k') - parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold') - parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k') - # TODO not currently used - parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results') - parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str, - help='directory to store detected labels') - parser.add_argument('--input_directory', default='test_videos/', type=str, + parser.add_argument('--input_directory', '-i', default='test_videos/', type=str, help='directory where test videos are located') + parser.add_argument('--output_directory', '-o', default='ground_truth_detections_lowlight/', type=str, + help='directory to store detected labels') args = parser.parse_args() + device = torch.device('cuda:0') if args.cuda and torch.cuda.is_available() else torch.device('cpu') + create_directory(args.output_directory) torch.set_grad_enabled(False) - cfg = None - if args.network == "mobile0.25": - cfg = cfg_mnet - elif args.network == "resnet50": - cfg = cfg_re50 - - # load the network - net = RetinaFace(cfg=cfg, phase='test') - - # load the model weights # TODO rename method load_model - net = load_model(net, args.trained_model, args.cuda) - net.eval() - print('Finished loading model!') - print(net) - cudnn.benchmark = True - device = torch.device("cuda:0" if args.cuda else "cpu") - net = net.to(device) - - resize = 0.4 + + # load the face detector + detector = FaceDetector(detector=args.detector, detector_type=args.detector_type, + cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True) video_files = get_videos(args.input_directory) for video in video_files: - cap = cv2.VideoCapture(video) - storage_location = get_storage_location(args.output_directory, video, args.input_directory) - create_directory(storage_location) print("Video: ", video) - - # testing begin - if cap.isOpened(): - frame_number = 0 - while True: - ret, img_raw = cap.read() - if not ret: - break - img = np.float32(img_raw) - img = cv2.resize(img, (int(img.shape[1] * resize), int(img.shape[0] * resize))) - - # TODO does this vvv code appear in Retinaface/ ? Or possibly in main.py - - im_height, im_width, _ = img.shape - scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) - img -= (104, 117, 123) - img = img.transpose(2, 0, 1) - img = torch.from_numpy(img).unsqueeze(0) - img = img.to(device) - scale = scale.to(device) - - tic = time.time() - loc, conf, landms = net(img) # forward pass - # print('net forward time: {:.4f}'.format(time.time() - tic)) - - priorbox = PriorBox(cfg, image_size=(im_height, im_width)) - priors = priorbox.forward() - priors = priors.to(device) - prior_data = priors.data - boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) - boxes = boxes * scale / resize - boxes = boxes.cpu().numpy() - scores = conf.squeeze(0).data.cpu().numpy()[:, 1] - landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) - scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], - img.shape[3], img.shape[2], img.shape[3], img.shape[2], - img.shape[3], img.shape[2]]) - scale1 = scale1.to(device) - landms = landms * scale1 / resize - landms = landms.cpu().numpy() - - # ignore low scores - inds = np.where(scores > args.confidence_threshold)[0] - boxes = boxes[inds] - landms = landms[inds] - scores = scores[inds] - - # keep top-K before NMS - order = scores.argsort()[::-1][:args.top_k] - boxes = boxes[order] - landms = landms[order] - scores = scores[order] - - # do NMS - dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) - keep = nms_numpy(dets, args.nms_threshold) - dets = dets[keep, :] - landms = landms[keep] - - # keep top-K faster NMS - dets = dets[:args.keep_top_k, :] - landms = landms[:args.keep_top_k, :] - - # dets = np.concatenate((dets, landms), axis=1) - output_file = os.path.join(storage_location, 'frame' + str(frame_number) + '.txt') - f = open(output_file, "w") - for detection in dets: - for coord in detection: - f.write(str(coord) + " ") - f.write("\n") - f.close() - - frame_number += 1 + cap = cv2.VideoCapture(video) + rotate_code = check_rotation(video) + file_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + detections = [] + + for frame_num in tqdm(range(file_len)): + _, frame = cap.read() + if rotate_code is not None: + frame = correct_rotation(frame, rotate_code) + boxes = detector.detect(frame) + detections.append([video, frame_num, boxes]) + + # save detections to csv one video at a time + with open(DETECTIONS_FILE, "a") as f: + writer = csv.writer(f) + writer.writerows(detections) exit(0) diff --git a/scripts/evaluator.py b/scripts/evaluator.py index ef8f730f..d751e882 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -25,6 +25,7 @@ # TODO - TODO TODO don't evaluate face detection? Would have to manually label faces but we're using a # TODO - SOTA face detection model that could just empirically be observed to work # TODO make comments with @param things +# TODO load detections as csv file class Evaluator(): diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py index b5bd6104..644a483b 100644 --- a/scripts/face_extractor.py +++ b/scripts/face_extractor.py @@ -20,7 +20,7 @@ warnings.filterwarnings('once') IMAGE_EXT = ['.jpg', '.JPG', '.png', '.PNG'] -VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV'] +VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV', '.avi', '.AVI'] def get_images(input_dir): diff --git a/scripts/utils.py b/scripts/utils.py index 3c8d5336..bb1ddb16 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -14,15 +14,17 @@ """ -def check_rotation(path_video_file): +def check_rotation(path_video_file: str): + # only .mov files need to be rotated + if path_video_file.split('.')[-1] != '.MOV' or '.mov': + return None + # this returns meta-data of the video file in form of a dictionary meta_dict = ffmpeg.probe(path_video_file) # from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key # we are looking for rotate_code = None - if 'rotate' not in meta_dict['streams'][0]['tags'].keys(): - return rotate_code if int(meta_dict['streams'][0]['tags']['rotate']) == 90: rotate_code = cv2.ROTATE_90_CLOCKWISE elif int(meta_dict['streams'][0]['tags']['rotate']) == 180: From 899cd1ccda83c93fb6699f582147c15dd1b5edfd Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Thu, 23 Jul 2020 09:35:09 -0400 Subject: [PATCH 14/25] Change csv storage format --- scripts/annotator.py | 8 +- scripts/evaluator.py | 256 ++++++++++++++++++++++--------------------- 2 files changed, 138 insertions(+), 126 deletions(-) diff --git a/scripts/annotator.py b/scripts/annotator.py index 34e489c1..bf51b9f1 100644 --- a/scripts/annotator.py +++ b/scripts/annotator.py @@ -76,7 +76,13 @@ def get_videos(input_directory): if rotate_code is not None: frame = correct_rotation(frame, rotate_code) boxes = detector.detect(frame) - detections.append([video, frame_num, boxes]) + detection = [video, frame_num] + + # each box is one set of face coords + for box in boxes: + for b in box: + detection.append(b) + detections.append(detection) # save detections to csv one video at a time with open(DETECTIONS_FILE, "a") as f: diff --git a/scripts/evaluator.py b/scripts/evaluator.py index d751e882..3ae3f961 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -1,4 +1,5 @@ import argparse +import csv import json import os import warnings @@ -12,8 +13,9 @@ from scripts.goggle_classifier import get_model from scripts.utils import check_rotation, correct_rotation from src.jetson.main import FaceDetector, Classifier +from src.jetson.models.utils.box_utils import matrix_iou -DETECTIONS_FILE = 'det_results.txt' +PRED_DETECTIONS_FILE = 'detection_predictions.txt' CLASSIFICATION_RESULTS_FILE = 'results.json' VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] @@ -22,27 +24,25 @@ Videos to be evaluated should be from the TestVideos folder on the Drive. """ -# TODO - TODO TODO don't evaluate face detection? Would have to manually label faces but we're using a -# TODO - SOTA face detection model that could just empirically be observed to work + # TODO make comments with @param things -# TODO load detections as csv file class Evaluator(): - def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path, rate=1): + def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate, det_file): """ Evaluates face detection and goggle classification performance. Goggle Classification accuracy is given by average class accuracy and individual video accuracy. Face detection accuracy is given by precision and recall values. - Args: - cuda: A bool value that specifies if cuda shall be used - detector: A string path to a .pth weights file for a face detection model - classifier: A string path to a .pth weights file for a goggle classification model - input_directory: Directory containing test videos to run Evaluator on - annotation_path: Directory containing annotation files (output by annotator.py) - rate: Run detection and classification on every 1/rate frames + @param cuda: A bool value that specifies if cuda shall be used + @param detector: A string path to a .pth weights file for a face detection model + @param detector_type: One of 'blazeface', 'ssd', 'retinaface'. + @param classifier: A string path to a .pth weights file for a goggle classification model + @param input_directory: Directory containing test videos to run Evaluator on + @param rate: Run detection and classification on every 1/rate frames + @param det_file: CSV generated by annotator.py containing detection results """ if cuda and torch.cuda.is_available(): @@ -52,16 +52,21 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a torch.set_default_tensor_type('torch.FloatTensor') self.device = torch.device('cpu') - if os.path.exists(DETECTIONS_FILE): - os.remove(DETECTIONS_FILE) + if os.path.exists(PRED_DETECTIONS_FILE): + os.remove(PRED_DETECTIONS_FILE) + + self.detector = FaceDetector(detector=detector, detector_type=detector_type, + cuda=cuda and torch.cuda.is_available(), set_default_dev=True) + + weights = torch.load(classifier, map_location=self.device) + if isinstance(weights, dict): + # if the .pth is just a state_dict, we need to + # load the model from goggle_classifier.py + model = get_model() + model.load_state_dict(weights) + weights = model - self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(), - set_default_dev=True) - # TODO check state_dict vs. not - model = get_model() - model.load_state_dict(torch.load(classifier, map_location=self.device)) - self.classifier = Classifier(model, self.device) - #self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device) + self.classifier = Classifier(weights, cuda) self.video_filenames = self.get_video_files(input_directory) self.results = {'Goggles': {'average_class_accuracy': 0.0, @@ -85,16 +90,15 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a self.video = '' self.video_len = 0 self.rate = rate - self.evaluate(annotation_path) + self.det_file = det_file + self.evaluate() - def evaluate(self, annotation_path: str): + def evaluate(self): """ Evaluates (classification and detection) every video file in the input directory containing test videos and stores results in self.results. To understand the format of self.results dict, check the constructor - Args: - annotation_path - Directory containing all the annotations of face detections """ total_videos_processed = 0 for video_file in self.video_filenames: @@ -106,8 +110,7 @@ def evaluate(self, annotation_path: str): self.cap = cv2.VideoCapture(self.video) if self.cap.isOpened(): - classification_result = self.evaluate_classifications() # Also contains boxes - self.record_results(classification_result) + self.evaluate_classifications() # Also contains boxes total_videos_processed += 1 print(f"{self.video} : Done") else: @@ -116,18 +119,15 @@ def evaluate(self, annotation_path: str): self.calculate_average_class_accuracy() - # ------- classification ^^^ detection vvv - # TODO why is this returning something - # TODO make it an optional arg to evaluate face detection - #detection_results = self.evaluate_detections(annotation_path, DETECTIONS_FILE) + if self.det_file is not None: + detection_results = self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE) print(f"\n {total_videos_processed} videos processed!") def evaluate_classifications(self): """ - Returns the accuracy (percentage_of_correct_predictions) of the - predictions for a video + Run classification on one video, save classification results """ inferences = self.infer() if sum(inferences.values()) == 0: @@ -135,31 +135,35 @@ def evaluate_classifications(self): else: percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values()) - return percentage_of_correct_predictions, inferences, sum(inferences.values()) + self.record_results((percentage_of_correct_predictions, inferences, sum(inferences.values()))) - def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file, overlap_threshold=0.5): + def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file): """ Calculates the recall and precision of face detection for a video. TODO explain what that means... seems like overlap of x and y coords? I.e. IoU? @param ground_truth_detections_file: file containing actual face detections (created by annotator.py) @param predicted_detections_file: file containing predicted face detections - @param overlap_threshold: IoU greater than threshold counts as correct, less than is incorrect """ - with open(ground_truth_detections_file) as detect_file: - ground_truth_detections = json.load(detect_file) + ground_truth_detections = [] + predicted_detections = [] + with open(ground_truth_detections_file, newline='') as detect_file: + reader = csv.reader(detect_file) + for row in reader: + ground_truth_detections.append(row) - with open(predicted_detections_file, 'r') as prediction_file: - predicted_detections = json.load(prediction_file) + with open(predicted_detections_file, newline='') as prediction_file: + reader = csv.reader(prediction_file) + for row in reader: + predicted_detections.append(row) - # TODO fix below based on detections format - total_ground_truths = 0 - for frame_id in ground_truth_detections: - total_ground_truths += len(ground_truth_detections[frame_id]) + total_ground_truths = len(ground_truth_detections) + true_pos = 0 + false_pos = 0 - # TODO ugly parsing and such here. Need to debug it. ==1 means...? - if any(predicted_detections) == 1: + for d in predicted_detections: + """ splitlines = [x.strip().split('|') for x in predicted_detections] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[5]) for x in splitlines]) @@ -170,12 +174,9 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections sorted_scores = np.sort(-confidence) bboxes = bboxes[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] + """ - nd = len(image_ids) - true_pos = np.zeros(nd) - false_pos = np.zeros(nd) - - # TODO for frame in frames? + """# TODO for frame in frames? for d in range(nd): try: bbox = bboxes[d, :].astype(float) @@ -199,89 +200,58 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections max_overlap = np.max(overlaps) # jmax = np.argmax(overlaps) - if max_overlap > overlap_threshold: - true_pos[d] = 1. + if max_overlap > 0.5: + true_pos += 1. else: - false_pos[d] = 1. - - except KeyError: - continue + false_pos += 1. + """ + + # only look at frames where a face was detected + if len(d) > 2: + ground_truth_bboxes = None + pred_bboxes = d[2:6] + + # get matching frame detection from the ground_truth + for video_name, frame_num, _ in ground_truth_detections: + if video_name == d[0] and frame_num == d[1]: + # if the ground truth also detected a face in this frame + if len(_) > 0: + ground_truth_bboxes = _ + break + + if ground_truth_bboxes is not None: + # 0.5 IoU is commonly used to compare bounding boxes + if matrix_iou(np.asarray(pred_bboxes), np.asarray(ground_truth_bboxes)) > 0.5: + true_pos += 1 + else: + false_pos += 1 + else: + # ground truth did not detect a face, but the prediction did + false_pos += 1 print("Total ground truths: ", total_ground_truths) - false_pos = np.cumsum(false_pos) - true_pos = np.cumsum(true_pos) + recall = true_pos / float(total_ground_truths) - # avoid divide by zero in case the first detection matches a difficult - # ground truth + # avoid divide by zero in case the first detection matches a difficult ground truth precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps) - else: - recall = -1. - precision = -1. - ap = -1. print("Precision: ", precision) print("Recall: ", recall) - + # TODO difference between ^ and v return precision[len(precision)], recall[len(recall)] # final precision, recall - def calculate_average_class_accuracy(self): - """ - Calculates the average class accuracy for each class and stores it in self.results - """ - for class_label in self.results: - if self.results[class_label]['number_of_videos'] > 0: - self.results[class_label]['average_class_accuracy'] = self.results[class_label][ - 'average_class_accuracy'] / \ - self.results[class_label]['number_of_videos'] - - def record_results(self, result): - """ - Records results of one video in the self.results dict - - @param result(List) - contains the classification accuracy, - number of predictions for each label, number of detections - """ - self.results[self.class_label]['number_of_videos'] += 1 - # below is just a running sum which gets divided by the number of videos at the end - self.results[self.class_label]['average_class_accuracy'] += result[0] - self.results[self.class_label]['individual_video_results'][self.video] = {} - self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0] - self.results[self.class_label]['individual_video_results'][self.video]["glasses"] = result[1]['Glasses'] - self.results[self.class_label]['individual_video_results'][self.video]["goggles"] = result[1]['Goggles'] - self.results[self.class_label]['individual_video_results'][self.video]["neither"] = result[1]['Neither'] - self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2] - self.results[self.class_label]['individual_video_results'][self.video]["num_frames"] = self.video_len - self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition - - def record_detections(self, file, detections): - """ - Save face detections in a file for evaluation - TODO improve how this is stored - Args: - file (str): Records detections here - detections (List): contains all the bounding boxes and confidence values - """ - f = open(file, "a+") - for detection in detections: - for element in detection: - f.write(str(element)) - f.write("|") - f.write("\n") - f.close() - def infer(self): """ Performs inference on a video using the face detection and goggle classification models. - @param rate: How often to run detection (every 1/rate frames). - It returns: - 1) inference_dict: the number of inferences for each class. + Also saves the face detections if they're going to be compared later + + @return inference_dict: the number of inferences for each class """ - bboxes = [] + detections = [] preds = [] inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0} - # check if the video needs to be rotated rotate_code = check_rotation(self.video) self.video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) @@ -290,9 +260,8 @@ def infer(self): if frame_num % self.rate == 0 and img is not None: if rotate_code is not None: correct_rotation(img, rotate_code) - frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str( - frame_num) boxes = self.detector.detect(img) # Also contains confidence + detection = [self.video, frame_num] for box in boxes: x1 = max(0, box[0]) y1 = max(0, box[1]) @@ -302,15 +271,52 @@ def infer(self): face = img[int(y1):int(y2), int(x1):int(x2), :] label = self.classifier.classifyFace(face) preds.append(label.item()) - bboxes.append([frame_id, x1, y1, x2, y2, conf]) + detection.append(x1, y1, x2, y2) + detections.append(detection) inference_dict["Glasses"] += preds.count(0) inference_dict["Goggles"] += preds.count(1) inference_dict["Neither"] += preds.count(2) - self.record_detections(DETECTIONS_FILE, bboxes) + # save the detections for comparison later + if self.det_file is not None: + with open(PRED_DETECTIONS_FILE, "a") as f: + writer = csv.writer(f) + writer.writerows(detections) + return inference_dict + def calculate_average_class_accuracy(self): + """ + Calculates the average class accuracy for each class and stores it in self.results + """ + for class_label in self.results: + if self.results[class_label]['number_of_videos'] > 0: + self.results[class_label]['average_class_accuracy'] = self.results[class_label][ + 'average_class_accuracy'] / \ + self.results[class_label]['number_of_videos'] + + def record_results(self, result): + """ + Records results of one video in the self.results dict. + All of this information is necessary for getting detailed face detection results + and creating classifier confusion matrices. + + @param result(List) - contains the classification accuracy, + number of predictions for each label, number of detections + """ + self.results[self.class_label]['number_of_videos'] += 1 + # average_class_accuracy is a running sum which gets divided by the number of videos at the end + self.results[self.class_label]['average_class_accuracy'] += result[0] + self.results[self.class_label]['individual_video_results'][self.video] = {} + self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0] + self.results[self.class_label]['individual_video_results'][self.video]["glasses"] = result[1]['Glasses'] + self.results[self.class_label]['individual_video_results'][self.video]["goggles"] = result[1]['Goggles'] + self.results[self.class_label]['individual_video_results'][self.video]["neither"] = result[1]['Neither'] + self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2] + self.results[self.class_label]['individual_video_results'][self.video]["num_frames"] = self.video_len + self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition + def get_class_label(self): """ Get class label [Goggles / Glasses / Neither] that the image belongs to @@ -360,17 +366,17 @@ def get_evaluator_results(self): help="Path to a trained classifier .pth file") parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files") - parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation " - "files") + parser.add_argument('--detection_file', type=str, help="Path to the detections csv output by annotator.py." + "If given, the detections will be compared.") parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.') - # TODO add store_true args for detection, evaluation (to do separately if desired) args = parser.parse_args() if not args.input_directory: raise Exception("Invalid input directory") - evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory, - args.annotation_path) + + evaluator = Evaluator(args.cuda and torch.cuda.is_available(), args.detector, args.detector_type, args.classifier, args.input_directory, + args.rate, args.detection_file) individual_video_results = evaluator.get_evaluator_results() with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file: From 0f5ded333858b25221e9fcfd350a629780dc5b5b Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Thu, 23 Jul 2020 16:34:55 -0400 Subject: [PATCH 15/25] Add IoU calculation --- scripts/evaluator.py | 88 ++++++++++---------------------------------- scripts/utils.py | 32 ++++++++++++++++ 2 files changed, 51 insertions(+), 69 deletions(-) diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 3ae3f961..9f927575 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -7,27 +7,23 @@ import cv2 import numpy as np import torch -import torchvision from tqdm import tqdm from scripts.goggle_classifier import get_model -from scripts.utils import check_rotation, correct_rotation from src.jetson.main import FaceDetector, Classifier -from src.jetson.models.utils.box_utils import matrix_iou +from scripts.utils import check_rotation, correct_rotation, bbox_iou -PRED_DETECTIONS_FILE = 'detection_predictions.txt' +PRED_DETECTIONS_FILE = 'detection_predictions.csv' CLASSIFICATION_RESULTS_FILE = 'results.json' VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] """ -Use this script with annotator.py. +Evaluate classification and (optionally) face detection ability on a set of videos. Videos to be evaluated should be from the TestVideos folder on the Drive. +To compare face detection models, run annotator.py first. """ -# TODO make comments with @param things - - class Evaluator(): def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate, det_file): """ @@ -98,7 +94,6 @@ def evaluate(self): Evaluates (classification and detection) every video file in the input directory containing test videos and stores results in self.results. To understand the format of self.results dict, check the constructor - """ total_videos_processed = 0 for video_file in self.video_filenames: @@ -119,9 +114,8 @@ def evaluate(self): self.calculate_average_class_accuracy() - # TODO why is this returning something if self.det_file is not None: - detection_results = self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE) + self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE) print(f"\n {total_videos_processed} videos processed!") @@ -140,7 +134,7 @@ def evaluate_classifications(self): def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file): """ Calculates the recall and precision of face detection for a video. - TODO explain what that means... seems like overlap of x and y coords? I.e. IoU? + Defined by 0.5 IoU or greater with ground truth bounding box. @param ground_truth_detections_file: file containing actual face detections (created by annotator.py) @param predicted_detections_file: file containing predicted face detections @@ -158,70 +152,26 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections for row in reader: predicted_detections.append(row) - total_ground_truths = len(ground_truth_detections) true_pos = 0 false_pos = 0 for d in predicted_detections: - """ - splitlines = [x.strip().split('|') for x in predicted_detections] - image_ids = [x[0] for x in splitlines] - confidence = np.array([float(x[5]) for x in splitlines]) - bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines]) - - # sort by confidence - sorted_ind = np.argsort(-confidence) - sorted_scores = np.sort(-confidence) - bboxes = bboxes[sorted_ind, :] - image_ids = [image_ids[x] for x in sorted_ind] - """ - - """# TODO for frame in frames? - for d in range(nd): - try: - bbox = bboxes[d, :].astype(float) - max_overlap = -np.inf - bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32) - if bbox_ground_truth_detections.size > 0: - # TODO max and min variable names are backwards? - ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0]) - iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1]) - ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2]) - iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3]) - iw = np.maximum(ixmax - ixmin, 0.) - ih = np.maximum(iymax - iymin, 0.) - # TODO debug. inters = intersection? uni = union? Overlaps is actual value? - # TODO import IoU from box_utils should work - inters = iw * ih - uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) + - (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) * - (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters) - overlaps = inters / uni - max_overlap = np.max(overlaps) - # jmax = np.argmax(overlaps) - - if max_overlap > 0.5: - true_pos += 1. - else: - false_pos += 1. - """ - # only look at frames where a face was detected if len(d) > 2: ground_truth_bboxes = None pred_bboxes = d[2:6] # get matching frame detection from the ground_truth - for video_name, frame_num, _ in ground_truth_detections: - if video_name == d[0] and frame_num == d[1]: - # if the ground truth also detected a face in this frame - if len(_) > 0: - ground_truth_bboxes = _ + for detection in ground_truth_detections: + if detection[0] == d[0] and detection[1] == d[1]: + if len(detection) > 2: + # if the ground truth also detected a face in this frame + ground_truth_bboxes = detection[2:6] break if ground_truth_bboxes is not None: # 0.5 IoU is commonly used to compare bounding boxes - if matrix_iou(np.asarray(pred_bboxes), np.asarray(ground_truth_bboxes)) > 0.5: + if bbox_iou(pred_bboxes, ground_truth_bboxes) > 0.5: true_pos += 1 else: false_pos += 1 @@ -229,16 +179,16 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections # ground truth did not detect a face, but the prediction did false_pos += 1 - print("Total ground truths: ", total_ground_truths) + total_ground_truths = len(ground_truth_detections) + print("Total ground truths: ", total_ground_truths) - recall = true_pos / float(total_ground_truths) - # avoid divide by zero in case the first detection matches a difficult ground truth - precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps) + recall = true_pos / float(total_ground_truths) + # avoid divide by zero in case the first detection matches a difficult ground truth + precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps) print("Precision: ", precision) print("Recall: ", recall) - # TODO difference between ^ and v - return precision[len(precision)], recall[len(recall)] # final precision, recall + return precision, recall def infer(self): """ @@ -271,7 +221,7 @@ def infer(self): face = img[int(y1):int(y2), int(x1):int(x2), :] label = self.classifier.classifyFace(face) preds.append(label.item()) - detection.append(x1, y1, x2, y2) + detection.extend([x1, y1, x2, y2, conf]) detections.append(detection) inference_dict["Glasses"] += preds.count(0) diff --git a/scripts/utils.py b/scripts/utils.py index bb1ddb16..708a88a9 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -37,3 +37,35 @@ def check_rotation(path_video_file: str): def correct_rotation(frame, rotate_code): return cv2.rotate(frame, rotate_code) + + +def bbox_iou(boxA, boxB): + """ + Calculate IoU (Intersection over Union) of two bounding boxes. + @param boxA: the top left and bottom right coords of the box + as a list [xmin, ymin, xmax, ymax] + @param boxB: the other box, same format as boxA. + It doesn't matter which one is the ground truth bounding box. + """ + + for i in range(len(boxA)): + boxA[i] = float(boxA[i]) + boxB[i] = float(boxB[i]) + + # determine the (x, y)-coordinates of the intersection rectangle + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + + xA += 5 + + # compute the area of intersection rectangle + inter_area = max(0, xB - xA + 1) * max(0, yB - yA + 1) + # compute the area of both the prediction and ground-truth + # rectangles + boxA_area = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) + boxB_area = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) + + iou = inter_area / float(boxA_area + boxB_area - inter_area) + return iou \ No newline at end of file From 1f0f2a552ffe3202e041e80b5ed830e8cf0681f8 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Thu, 23 Jul 2020 17:00:35 -0400 Subject: [PATCH 16/25] Update imports --- scripts/annotator.py | 2 +- scripts/evaluator.py | 3 ++- scripts/face_extractor.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/annotator.py b/scripts/annotator.py index bf51b9f1..67cb4afe 100644 --- a/scripts/annotator.py +++ b/scripts/annotator.py @@ -8,7 +8,7 @@ import torch from tqdm import tqdm -from src.jetson.main import FaceDetector +from src.jetson.face_detector import FaceDetector from scripts.utils import check_rotation, correct_rotation """ diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 9f927575..4a896cbb 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -10,7 +10,8 @@ from tqdm import tqdm from scripts.goggle_classifier import get_model -from src.jetson.main import FaceDetector, Classifier +from src.jetson.face_detector import FaceDetector +from src.jetson.classifier import Classifier from scripts.utils import check_rotation, correct_rotation, bbox_iou PRED_DETECTIONS_FILE = 'detection_predictions.csv' diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py index 644a483b..95d50615 100644 --- a/scripts/face_extractor.py +++ b/scripts/face_extractor.py @@ -10,7 +10,7 @@ from tqdm import tqdm from scripts.utils import check_rotation, correct_rotation -from src.jetson.main import FaceDetector +from src.jetson.face_detector import FaceDetector """ Given a folder of images or videos, run a face detector (literally a FaceDetector) on all images From 2170dfb9abe82fd6ef47fdbe921c3cd5e37255c1 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Thu, 23 Jul 2020 19:21:58 -0400 Subject: [PATCH 17/25] Update video_capturer device --- src/jetson/main.py | 3 --- src/jetson/video_capturer.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/jetson/main.py b/src/jetson/main.py index e76be8f7..9101c6b6 100644 --- a/src/jetson/main.py +++ b/src/jetson/main.py @@ -1,4 +1,3 @@ -import argparse import os import time import datetime @@ -8,8 +7,6 @@ import cv2 import torch -from torch.autograd import Variable -from torchvision import transforms from src.jetson.face_detector import FaceDetector from src.jetson.video_capturer import VideoCapturer diff --git a/src/jetson/video_capturer.py b/src/jetson/video_capturer.py index 04319250..9a94b267 100644 --- a/src/jetson/video_capturer.py +++ b/src/jetson/video_capturer.py @@ -32,7 +32,7 @@ def gstreamer_pipeline( class VideoCapturer(object): - def __init__(self, gstreamer, dev=1): + def __init__(self, gstreamer, dev=0): """ This class captures videos using open-cv's VideoCapture object Args: From 1205013006cd948918e65de04298e6af2c45f8d7 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Tue, 28 Jul 2020 10:18:38 -0400 Subject: [PATCH 18/25] Fix .mov mistake --- scripts/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/utils.py b/scripts/utils.py index 708a88a9..657666e3 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -16,7 +16,7 @@ def check_rotation(path_video_file: str): # only .mov files need to be rotated - if path_video_file.split('.')[-1] != '.MOV' or '.mov': + if path_video_file.split('.')[-1].upper() != '.MOV': return None # this returns meta-data of the video file in form of a dictionary From f12531253c7c8030c50513147c185e320b5aba9d Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Tue, 28 Jul 2020 12:39:42 -0400 Subject: [PATCH 19/25] Updates --- scripts/collect_images.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/scripts/collect_images.py b/scripts/collect_images.py index 04ba2467..51e0cb9d 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -2,14 +2,14 @@ import datetime import json -from src.jetson.db.db_connection import sql_cursor - -# from wherever import email method +from src.db.db_connection import sql_cursor +from scripts.automatic_notification import send_email """ Put this file one folder up from the stored images. -Eg. on the HELPS machine: if /local/b/embedvis/imgs contains images, +Eg. on ee220clnx1: if /local/b/embedvis/imgs contains images, this file's path should be /local/b/embedvis/collect_images.py +Set up a cron job to run this script daily. Collect images of non-goggle detections from the database. Upload images and metadata to Google Drive. @@ -31,6 +31,7 @@ def get_metadata(): """ metadata = [] + current_date = (datetime.date.today(),) # make sql connection # execute query @@ -38,13 +39,13 @@ def get_metadata(): try: cursor.execute('USE goggles') cursor.execute('SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, ' - 'i.image_name, i.init_vector from bbox AS b, image as i where ' - 'b.image_name=i.image_name and b.goggles=False') + 'b.init_vector, i.image_name, i.image_date from BBOX AS b, IMAGE as i where ' + 'b.image_name=i.image_name and i.image_date=? and b.goggles=False', current_date) - for (image_name, x_min, y_min, x_max, y_max, image_name, init_vector) in cursor: + for (image_name, x_min, y_min, x_max, y_max, init_vector, image_name, image_date) in cursor: metadata.append({'image_name': image_name, 'x_min': float(x_min), # JSON cannot serialize Decimals. - 'y_min': float(y_min), # If there is a better way to do this, someone let me know. + 'y_min': float(y_min), # If there is a better way to do this, let me know. 'x_max': float(x_max), 'y_max': float(y_max), 'init_vector': init_vector @@ -75,15 +76,13 @@ def upload_files(metadata, dir): # subprocess rclone copy METADATA_FILE [Drive name]: -# TODO call Seoyoung's method to email +# TODO call Seoyoung's method to email??? if __name__ == "__main__": parser = argparse.ArgumentParser('Collect images.') parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload') args = parser.parse_args() - current_date = datetime.datetime.now().strftime("%m-%d-%Y") - # call the methods metadata = get_metadata() upload_files(metadata, args.directory) From 865dba9290d575e69bb5cfdafad34252798ee8c2 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Mon, 3 Aug 2020 17:02:12 -0400 Subject: [PATCH 20/25] Split evaluation script and Evaluator class --- scripts/annotator.py | 3 +- scripts/evaluation.py | 47 +++++++++++++++++++++++ scripts/evaluator.py | 86 ++++++++++++++++--------------------------- 3 files changed, 80 insertions(+), 56 deletions(-) create mode 100644 scripts/evaluation.py diff --git a/scripts/annotator.py b/scripts/annotator.py index 67cb4afe..dbbf9382 100644 --- a/scripts/annotator.py +++ b/scripts/annotator.py @@ -13,7 +13,7 @@ """ Run the face detector model on a folder of videos (most recently used on TestVideos from the Drive). -Save bbox detections to a csv file to be compared in evaluator.py. +Save bbox detections to a csv file to be compared in evaluation.py An earlier version of this script was used to compare Retinaface with a Mobilenet backbone versus a Resnet backbone; comparison of object detectors would be its most applicable use. @@ -57,7 +57,6 @@ def get_videos(input_directory): torch.set_grad_enabled(False) - # load the face detector detector = FaceDetector(detector=args.detector, detector_type=args.detector_type, cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True) diff --git a/scripts/evaluation.py b/scripts/evaluation.py new file mode 100644 index 00000000..da7d3e6b --- /dev/null +++ b/scripts/evaluation.py @@ -0,0 +1,47 @@ +import argparse +import json +import warnings + +import torch + +from scripts.evaluator import Evaluator + +PRED_DETECTIONS_FILE = 'detection_predictions.csv' +CLASSIFICATION_RESULTS_FILE = 'results.json' + +""" +Evaluate classification and (optionally) compare face detection models on a set of videos. +Videos to be evaluated should be from the TestVideos folder on the Drive +to get correct labels and conditions. +To compare face detection models, run annotator.py first. +""" + +if __name__ == "__main__": + warnings.filterwarnings("once") + parser = argparse.ArgumentParser(description="Face detection") + parser.add_argument('--detector', '-d', type=str, default='model_weights/blazeface.pth', + help="Path to a trained face detector .pth file") + parser.add_argument('--detector_type', '-t', type=str, help="One of blazeface, retinaface, ssd") + parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str, + help="Path to a trained classifier .pth file") + parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") + parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files") + parser.add_argument('--detection_file', type=str, help="Path to the detections csv output by annotator.py." + "If given, the detections will be compared.") + parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.') + + args = parser.parse_args() + + if not args.input_directory: + raise Exception("Invalid input directory") + + evaluator = Evaluator(args.cuda and torch.cuda.is_available(), args.detector, args.detector_type, args.classifier, + args.input_directory, args.rate, args.detection_file, PRED_DETECTIONS_FILE) + individual_video_results = evaluator.get_evaluator_results() + + with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file: + json.dump(individual_video_results, json_file, indent=4) + + print(f"\n Output saved at {CLASSIFICATION_RESULTS_FILE}") + + exit() diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 4a896cbb..8c8aba74 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -14,23 +14,17 @@ from src.jetson.classifier import Classifier from scripts.utils import check_rotation, correct_rotation, bbox_iou -PRED_DETECTIONS_FILE = 'detection_predictions.csv' -CLASSIFICATION_RESULTS_FILE = 'results.json' -VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] -""" -Evaluate classification and (optionally) face detection ability on a set of videos. -Videos to be evaluated should be from the TestVideos folder on the Drive. -To compare face detection models, run annotator.py first. -""" +VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] class Evaluator(): - def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate, det_file): + def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate, + comparison_dets_file, self_dets_file): """ Evaluates face detection and goggle classification performance. - Goggle Classification accuracy is given by average class accuracy and individual - video accuracy. + Goggle Classification accuracy is given by average class accuracy and + accuracy for each individual video. Face detection accuracy is given by precision and recall values. @param cuda: A bool value that specifies if cuda shall be used @@ -39,7 +33,9 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r @param classifier: A string path to a .pth weights file for a goggle classification model @param input_directory: Directory containing test videos to run Evaluator on @param rate: Run detection and classification on every 1/rate frames - @param det_file: CSV generated by annotator.py containing detection results + @param comparison_dets_file: CSV generated by annotator.py containing detection results + of another detection model (to be compared) + @param self_dets_file: CSV generated by this class containing detections by self.detector """ if cuda and torch.cuda.is_available(): @@ -49,9 +45,6 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r torch.set_default_tensor_type('torch.FloatTensor') self.device = torch.device('cpu') - if os.path.exists(PRED_DETECTIONS_FILE): - os.remove(PRED_DETECTIONS_FILE) - self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(), set_default_dev=True) @@ -87,15 +80,20 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r self.video = '' self.video_len = 0 self.rate = rate - self.det_file = det_file + self.comparison_dets_file = comparison_dets_file self.evaluate() + self.self_dets_file = self_dets_file + + if os.path.exists(self.self_dets_file): + os.remove(self.self_dets_file) def evaluate(self): """ Evaluates (classification and detection) every video file in the input directory containing test videos and stores results in self.results. - To understand the format of self.results dict, check the constructor + To understand the format of the self.results dict, check the constructor """ + total_videos_processed = 0 for video_file in self.video_filenames: self.video = video_file @@ -115,8 +113,8 @@ def evaluate(self): self.calculate_average_class_accuracy() - if self.det_file is not None: - self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE) + if self.comparison_dets_file is not None: + self.evaluate_detections(self.comparison_dets_file, self.self_dets_file) print(f"\n {total_videos_processed} videos processed!") @@ -124,6 +122,7 @@ def evaluate_classifications(self): """ Run classification on one video, save classification results """ + inferences = self.infer() if sum(inferences.values()) == 0: percentage_of_correct_predictions = 0 @@ -135,10 +134,10 @@ def evaluate_classifications(self): def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file): """ Calculates the recall and precision of face detection for a video. - Defined by 0.5 IoU or greater with ground truth bounding box. + A "correct" detection is defined by 0.5 IoU or greater with the bounding box of the comparison detections. - @param ground_truth_detections_file: file containing actual face detections (created by annotator.py) - @param predicted_detections_file: file containing predicted face detections + @param ground_truth_detections_file: file containing detections to be compared (created by annotator.py) + @param predicted_detections_file: file containing detections by self.detector """ ground_truth_detections = [] @@ -199,6 +198,7 @@ def infer(self): @return inference_dict: the number of inferences for each class """ + detections = [] preds = [] inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0} @@ -230,8 +230,8 @@ def infer(self): inference_dict["Neither"] += preds.count(2) # save the detections for comparison later - if self.det_file is not None: - with open(PRED_DETECTIONS_FILE, "a") as f: + if self.comparison_dets_file is not None: + with open(self.self_dets_file, "a") as f: writer = csv.writer(f) writer.writerows(detections) @@ -241,6 +241,7 @@ def calculate_average_class_accuracy(self): """ Calculates the average class accuracy for each class and stores it in self.results """ + for class_label in self.results: if self.results[class_label]['number_of_videos'] > 0: self.results[class_label]['average_class_accuracy'] = self.results[class_label][ @@ -254,10 +255,12 @@ def record_results(self, result): and creating classifier confusion matrices. @param result(List) - contains the classification accuracy, - number of predictions for each label, number of detections + number of predictions for each label, number of detections (see evaluate_classifications) """ + self.results[self.class_label]['number_of_videos'] += 1 # average_class_accuracy is a running sum which gets divided by the number of videos at the end + # see calculate_average_class_accuracy self.results[self.class_label]['average_class_accuracy'] += result[0] self.results[self.class_label]['individual_video_results'][self.video] = {} self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0] @@ -272,6 +275,7 @@ def get_class_label(self): """ Get class label [Goggles / Glasses / Neither] that the image belongs to """ + if '/Goggles/' in self.video or '/goggles/' in self.video: class_label = 'Goggles' elif '/Glasses/' in self.video or '/glasses/' in self.video: @@ -285,12 +289,14 @@ def get_condition(self): """ Get condition [Ideal, low_lighting etc. ] that the image belongs to """ + return self.video.split('/')[-2] def get_video_files(self, input_directory: str): """ Gets all the video files in the input directory """ + filenames = [] for dirName, subdirList, fileList in os.walk(input_directory): for filename in fileList: @@ -304,35 +310,7 @@ def get_evaluator_results(self): """ Returns the dict containing all the test results (self.results) """ - return self.results - - -if __name__ == "__main__": - warnings.filterwarnings("once") - parser = argparse.ArgumentParser(description="Face detection") - parser.add_argument('--detector', '-d', type=str, default='model_weights/blazeface.pth', - help="Path to a trained face detector .pth file") - parser.add_argument('--detector_type', '-t', type=str, help="One of blazeface, retinaface, ssd") - parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str, - help="Path to a trained classifier .pth file") - parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA") - parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files") - parser.add_argument('--detection_file', type=str, help="Path to the detections csv output by annotator.py." - "If given, the detections will be compared.") - parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.') - args = parser.parse_args() - - if not args.input_directory: - raise Exception("Invalid input directory") - - evaluator = Evaluator(args.cuda and torch.cuda.is_available(), args.detector, args.detector_type, args.classifier, args.input_directory, - args.rate, args.detection_file) - individual_video_results = evaluator.get_evaluator_results() - - with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file: - json.dump(individual_video_results, json_file, indent=4) + return self.results - print(f"\n Output saved at {CLASSIFICATION_RESULTS_FILE}") - exit() From f400f7685949f694d8f3137488d81bb4a568ebf0 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Mon, 3 Aug 2020 17:17:55 -0400 Subject: [PATCH 21/25] Add constants file --- scripts/constants.py | 5 +++++ scripts/evaluator.py | 26 ++++---------------------- scripts/face_extractor.py | 4 ++-- 3 files changed, 11 insertions(+), 24 deletions(-) create mode 100644 scripts/constants.py diff --git a/scripts/constants.py b/scripts/constants.py new file mode 100644 index 00000000..9987a69d --- /dev/null +++ b/scripts/constants.py @@ -0,0 +1,5 @@ +# constants that may be used in multiple files + +# support image and video extensions +IMAGE_EXT = ['.jpg', '.JPG', '.png', '.PNG'] +VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV', '.avi', '.AVI'] \ No newline at end of file diff --git a/scripts/evaluator.py b/scripts/evaluator.py index 8c8aba74..de558191 100644 --- a/scripts/evaluator.py +++ b/scripts/evaluator.py @@ -1,8 +1,5 @@ -import argparse import csv -import json import os -import warnings import cv2 import numpy as np @@ -10,12 +7,10 @@ from tqdm import tqdm from scripts.goggle_classifier import get_model +from scripts.constants import VIDEO_EXT +from scripts.utils import check_rotation, correct_rotation, bbox_iou from src.jetson.face_detector import FaceDetector from src.jetson.classifier import Classifier -from scripts.utils import check_rotation, correct_rotation, bbox_iou - - -VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI'] class Evaluator(): @@ -37,7 +32,6 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r of another detection model (to be compared) @param self_dets_file: CSV generated by this class containing detections by self.detector """ - if cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') self.device = torch.device('cuda:0') @@ -93,7 +87,6 @@ def evaluate(self): containing test videos and stores results in self.results. To understand the format of the self.results dict, check the constructor """ - total_videos_processed = 0 for video_file in self.video_filenames: self.video = video_file @@ -122,7 +115,6 @@ def evaluate_classifications(self): """ Run classification on one video, save classification results """ - inferences = self.infer() if sum(inferences.values()) == 0: percentage_of_correct_predictions = 0 @@ -139,7 +131,6 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections @param ground_truth_detections_file: file containing detections to be compared (created by annotator.py) @param predicted_detections_file: file containing detections by self.detector """ - ground_truth_detections = [] predicted_detections = [] with open(ground_truth_detections_file, newline='') as detect_file: @@ -198,7 +189,6 @@ def infer(self): @return inference_dict: the number of inferences for each class """ - detections = [] preds = [] inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0} @@ -241,7 +231,6 @@ def calculate_average_class_accuracy(self): """ Calculates the average class accuracy for each class and stores it in self.results """ - for class_label in self.results: if self.results[class_label]['number_of_videos'] > 0: self.results[class_label]['average_class_accuracy'] = self.results[class_label][ @@ -257,10 +246,9 @@ def record_results(self, result): @param result(List) - contains the classification accuracy, number of predictions for each label, number of detections (see evaluate_classifications) """ - self.results[self.class_label]['number_of_videos'] += 1 - # average_class_accuracy is a running sum which gets divided by the number of videos at the end - # see calculate_average_class_accuracy + # average_class_accuracy is a running sum which gets divided by the number of videos after evaluating all videos + # (see calculate_average_class_accuracy) self.results[self.class_label]['average_class_accuracy'] += result[0] self.results[self.class_label]['individual_video_results'][self.video] = {} self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0] @@ -275,7 +263,6 @@ def get_class_label(self): """ Get class label [Goggles / Glasses / Neither] that the image belongs to """ - if '/Goggles/' in self.video or '/goggles/' in self.video: class_label = 'Goggles' elif '/Glasses/' in self.video or '/glasses/' in self.video: @@ -289,14 +276,12 @@ def get_condition(self): """ Get condition [Ideal, low_lighting etc. ] that the image belongs to """ - return self.video.split('/')[-2] def get_video_files(self, input_directory: str): """ Gets all the video files in the input directory """ - filenames = [] for dirName, subdirList, fileList in os.walk(input_directory): for filename in fileList: @@ -310,7 +295,4 @@ def get_evaluator_results(self): """ Returns the dict containing all the test results (self.results) """ - return self.results - - diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py index 95d50615..1477dcbd 100644 --- a/scripts/face_extractor.py +++ b/scripts/face_extractor.py @@ -9,6 +9,7 @@ import numpy as np from tqdm import tqdm +from scripts.constants import IMAGE_EXT, VIDEO_EXT from scripts.utils import check_rotation, correct_rotation from src.jetson.face_detector import FaceDetector @@ -19,8 +20,7 @@ """ warnings.filterwarnings('once') -IMAGE_EXT = ['.jpg', '.JPG', '.png', '.PNG'] -VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV', '.avi', '.AVI'] + def get_images(input_dir): From eca9ab468c22404ae943877e1301a731495167ea Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Wed, 5 Aug 2020 20:24:29 -0400 Subject: [PATCH 22/25] Update ftb transfer location. Make rclone to Drive work on remote server (HELPS machine). --- scripts/collect_images.py | 58 ++++++++++++++++++++++++++------------- scripts/prepare_images.py | 3 ++ src/db/data_insertion.py | 7 +++-- src/db/db_connection.py | 4 --- 4 files changed, 47 insertions(+), 25 deletions(-) diff --git a/scripts/collect_images.py b/scripts/collect_images.py index 51e0cb9d..2f40b940 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -1,25 +1,26 @@ import argparse import datetime import json +import os +import subprocess from src.db.db_connection import sql_cursor -from scripts.automatic_notification import send_email """ Put this file one folder up from the stored images. -Eg. on ee220clnx1: if /local/b/embedvis/imgs contains images, +Eg. on ee220clnx1: if /local/b/embedvis/Nano_Images contains images, this file's path should be /local/b/embedvis/collect_images.py Set up a cron job to run this script daily. +rclone should be set up, in our case pointing to a Google Drive folder: https://rclone.org/drive/ Collect images of non-goggle detections from the database. Upload images and metadata to Google Drive. -Email end-user with the Drive link. """ -METADATA_FILE = 'metadata.json' +METADATA_FILE = os.path.join(os.path.dirname(__file__), 'metadata.json') +TODAY = datetime.datetime.today().strftime('%Y-%m-%d') -# TODO rename method def get_metadata(): """ Get image filenames and other relevant metadata from the database. @@ -31,7 +32,12 @@ def get_metadata(): """ metadata = [] - current_date = (datetime.date.today(),) + #current_date = (datetime.date.today(),) + + # for testing + date = datetime.datetime(2020, 7, 23) + current_date = (date,) + # for testing # make sql connection # execute query @@ -39,10 +45,10 @@ def get_metadata(): try: cursor.execute('USE goggles') cursor.execute('SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, ' - 'b.init_vector, i.image_name, i.image_date from BBOX AS b, IMAGE as i where ' - 'b.image_name=i.image_name and i.image_date=? and b.goggles=False', current_date) + 'b.init_vector, b.goggles from BBOX AS b, IMAGE as i where ' + 'b.image_name=i.image_name and i.image_date=%s and b.goggles=False', current_date) - for (image_name, x_min, y_min, x_max, y_max, init_vector, image_name, image_date) in cursor: + for (image_name, x_min, y_min, x_max, y_max, init_vector, goggles) in cursor: metadata.append({'image_name': image_name, 'x_min': float(x_min), # JSON cannot serialize Decimals. 'y_min': float(y_min), # If there is a better way to do this, let me know. @@ -58,31 +64,45 @@ def get_metadata(): return metadata -# TODO make folder with date to contain images and metadata file -def upload_files(metadata, dir): +def upload_files(metadata, dir, rclone_path, remote_name): """ For each filename returned by get_metadata, upload image to Drive. Upload the day's metadata file. @param metadata: the list of dictionaries returned by get_metadata @param dir: the folder containing the images to upload + @param rclone_path: path to rclone installation. Must be an absolute path if on the HELPS machine. + @param remote_name: name of remote location in rclone """ - for image in metadata: - # upload image using rclone - # subprocess rclone copy os.path.join(dir, image['image_name']) [Drive name] - pass + # prevent sending the same image twice (if two faces are detected) + images = [] - # upload metadata json file to Drive - # subprocess rclone copy METADATA_FILE [Drive name]: + # send images to the Drive + for image in metadata: + if image not in images: + images.append(image) + image_path = os.path.join(os.path.dirname(__file__), dir, image['image_name']) + subprocess.run([rclone_path, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)]) + # upload metadata json to the Drive + subprocess.run([rclone_path, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)]) -# TODO call Seoyoung's method to email??? if __name__ == "__main__": parser = argparse.ArgumentParser('Collect images.') parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload') + parser.add_argument('--rclone_path', '-r', type=str, default='rclone', help='Path to rclone installation. If not ' + 'on the HELPS machine, the default ' + 'should work (if you have rclone ' + 'installed).') + parser.add_argument('--remote_name', type=str, default='EmbedVisDrive', help='Name of remote location according ' + 'to rclone (default is the Drive ' + 'name on the HELPS machine). Don\'t ' + 'include the semicolon.') args = parser.parse_args() # call the methods metadata = get_metadata() - upload_files(metadata, args.directory) + upload_files(metadata, args.directory, args.rclone_path, args.remote_name) + + exit(0) diff --git a/scripts/prepare_images.py b/scripts/prepare_images.py index 4d78b22e..eaa46bca 100644 --- a/scripts/prepare_images.py +++ b/scripts/prepare_images.py @@ -5,8 +5,11 @@ """ After having run collect_images, decrypt the associated images (if necessary) and combine images together into a short video (using metadata). + +This file is assumed to be on the end user's machine. """ +# the metadata file generated by collect_images METADATA_FILE = 'metadata.json' diff --git a/src/db/data_insertion.py b/src/db/data_insertion.py index f6b3b6d9..4f7ab1c4 100644 --- a/src/db/data_insertion.py +++ b/src/db/data_insertion.py @@ -3,6 +3,9 @@ from decimal import Decimal import datetime +# location where the images will be stored (on the HELPS machine) +IMAGE_DIR = '/local/b/embedvis/Nano_Images' + def data_insert(image_name: str, image_date: datetime, image_time: datetime, init_vecs: list, bboxes: list, input_dir: str, labels: list): """Transfer image to remote storage then inserts image metadata and bounding boxes data in database @@ -18,8 +21,8 @@ def data_insert(image_name: str, image_date: datetime, image_time: datetime, ini """ # Below ftp transfer has been commented out for testing purposes - #with ftp_transfer() as transfer: - #transfer(input_dir, './Documents', image_name) + with ftp_transfer() as transfer: + transfer(input_dir, IMAGE_DIR, image_name) sql_insert(IMAGE(image_name, image_date, image_time)) diff --git a/src/db/db_connection.py b/src/db/db_connection.py index d7241ff2..d719104c 100644 --- a/src/db/db_connection.py +++ b/src/db/db_connection.py @@ -1,13 +1,9 @@ -import datetime - import mysql.connector -import datetime from src.db.config import get_config from contextlib import contextmanager, closing import datetime -from src.jetson.db.config import get_config class Table: def __init__(self): From 310b7472be4af2c993287b998d9019f78b6937f0 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Wed, 12 Aug 2020 18:50:14 -0400 Subject: [PATCH 23/25] Made rclone path to be fixed. Renamed to decrypt_images (that's all it does now). --- scripts/collect_images.py | 49 ++++++++++++++++-------------------- scripts/decrypt_images.py | 42 +++++++++++++++++++++++++++++++ scripts/prepare_images.py | 53 --------------------------------------- 3 files changed, 64 insertions(+), 80 deletions(-) create mode 100644 scripts/decrypt_images.py delete mode 100644 scripts/prepare_images.py diff --git a/scripts/collect_images.py b/scripts/collect_images.py index 2f40b940..6078a620 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -7,35 +7,35 @@ from src.db.db_connection import sql_cursor """ -Put this file one folder up from the stored images. -Eg. on ee220clnx1: if /local/b/embedvis/Nano_Images contains images, -this file's path should be /local/b/embedvis/collect_images.py -Set up a cron job to run this script daily. +This script should be set up with a cron job to run daily. rclone should be set up, in our case pointing to a Google Drive folder: https://rclone.org/drive/ Collect images of non-goggle detections from the database. Upload images and metadata to Google Drive. """ -METADATA_FILE = os.path.join(os.path.dirname(__file__), 'metadata.json') +METADATA_FILE = 'metadata.json' +# rclone on ee220clnx1 is an earlier version that doesn't support copying to shared folders +RCLONE_PATH = '/home/shay/a/bergz/rclone-v1.52.2-linux-amd64/rclone' TODAY = datetime.datetime.today().strftime('%Y-%m-%d') def get_metadata(): """ Get image filenames and other relevant metadata from the database. - @return: A list of dictionaries with the metadata for each image TODO describe the metadata + Save metadata to a file for future decryption. + @return: A list of dictionaries with the metadata for each image - Query: - SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, - i.image_name, i.init_vector from bbox AS b, image as i where b.image_name=i.image_name and b.goggles=False + Example list: [ + {'image_name': "0.jpg", 'x_min': 0.0, 'y_min': 0.0, 'x_max': 100.0, 'y_max': 100.0, 'init_vector': "example"} + {'image_name': "1.jpg", 'x_min': 25.0, 'y_min': 25.0, 'x_max': 120.0, 'y_max': 140.0, 'init_vector': "example2"}] """ metadata = [] - #current_date = (datetime.date.today(),) + # current_date = (datetime.date.today(),) # for testing - date = datetime.datetime(2020, 7, 23) + date = datetime.datetime(2020, 8, 10) current_date = (date,) # for testing @@ -64,45 +64,40 @@ def get_metadata(): return metadata -def upload_files(metadata, dir, rclone_path, remote_name): +def upload_files(metadata, dir, remote_name): """ For each filename returned by get_metadata, upload image to Drive. Upload the day's metadata file. @param metadata: the list of dictionaries returned by get_metadata @param dir: the folder containing the images to upload - @param rclone_path: path to rclone installation. Must be an absolute path if on the HELPS machine. @param remote_name: name of remote location in rclone """ - # prevent sending the same image twice (if two faces are detected) images = [] # send images to the Drive for image in metadata: + # prevent sending the same image twice (if two faces are detected) if image not in images: images.append(image) - image_path = os.path.join(os.path.dirname(__file__), dir, image['image_name']) - subprocess.run([rclone_path, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)]) + image_path = os.path.join(dir, image['image_name']) + subprocess.run([RCLONE_PATH, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)]) - # upload metadata json to the Drive - subprocess.run([rclone_path, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)]) + # upload metadata.json to the Drive + subprocess.run([RCLONE_PATH, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)]) + os.remove(METADATA_FILE) if __name__ == "__main__": parser = argparse.ArgumentParser('Collect images.') parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload') - parser.add_argument('--rclone_path', '-r', type=str, default='rclone', help='Path to rclone installation. If not ' - 'on the HELPS machine, the default ' - 'should work (if you have rclone ' - 'installed).') parser.add_argument('--remote_name', type=str, default='EmbedVisDrive', help='Name of remote location according ' - 'to rclone (default is the Drive ' - 'name on the HELPS machine). Don\'t ' - 'include the semicolon.') + 'to rclone (default is the Drive ' + 'name on ee220clnx1). Don\'t ' + 'include the semicolon.') args = parser.parse_args() - # call the methods metadata = get_metadata() - upload_files(metadata, args.directory, args.rclone_path, args.remote_name) + upload_files(metadata, args.directory, args.remote_name) exit(0) diff --git a/scripts/decrypt_images.py b/scripts/decrypt_images.py new file mode 100644 index 00000000..f81e6cf4 --- /dev/null +++ b/scripts/decrypt_images.py @@ -0,0 +1,42 @@ +import argparse +import getpass +import json +import os + +from src.jetson.AES import Encryption + +""" +After having collect_images has run and the output folder has been downloaded, +decrypt the associated images. +This file is assumed to be on the end user's machine. +""" + +# the metadata file generated by collect_images +METADATA_FILE = 'metadata.json' + + +def decrypt_images(dir): + # ask for decryption key + decrypt_key = getpass.getpass('Decryption password: ') + + # convert to PKBDF2 or whatever + + # make decryptor; probably changes once Jason finishes + decryptor = Encryption + + with open(os.path.join(dir, METADATA_FILE)) as meta_file: + metadata = json.load(meta_file) + # use face coords to find where to decrypt in video frame + for image in metadata: + # TODO handle multiple faces in one frame. append to coords list + coords = [(image['x_min'], image['y_min'], image['x_max'], image['y_max'])] + init_vector = image['init_vector'] + # overwrite encrypted image + + +if __name__ == "__main__": + parser = argparse.ArgumentParser('Decrypt images.') + parser.add_argument('--directory', '-d', type=str, required=True, help='Folder of images to be decrypted.') + args = parser.parse_args() + + decrypt_images(args.directory) diff --git a/scripts/prepare_images.py b/scripts/prepare_images.py deleted file mode 100644 index eaa46bca..00000000 --- a/scripts/prepare_images.py +++ /dev/null @@ -1,53 +0,0 @@ -import argparse -import json -import os - -""" -After having run collect_images, decrypt the associated images -(if necessary) and combine images together into a short video (using metadata). - -This file is assumed to be on the end user's machine. -""" - -# the metadata file generated by collect_images -METADATA_FILE = 'metadata.json' - - -def decrypt_images(dir): - # ask for decryption key - with open(os.path.join(dir, METADATA_FILE)) as meta_file: - metadata = json.load(meta_file) - # use face coords to find where to decrypt in video frame - # decrypt - pass - - -def make_videos(dir): - # use a heuristic (such as images within 5 seconds of each other) - # to combine similar images into one video for easier viewing - with open(os.path.join(dir, METADATA_FILE)) as meta_file: - metadata = json.load(meta_file) - # for each image, if within 5 seconds of the previous one, - # concatenate them and make them into a video - pass - - -if __name__ == "__main__": - parser = argparse.ArgumentParser('Combine images into a short video for easier viewing.' - 'Decrypt if needed.') - parser.add_argument('--directory', '-d', type=str, required=True, help='Folder of images to be prepared.') - parser.add_argument('--decrypt', default=False, action='store_true', help='Decrypt faces in the images.') - parser.add_argument('--make_videos', '-m', default=False, action='store_true', - help='Combine frames from the same time period into a single video.') - - args = parser.parse_args() - - if not args.decrypt and not args.make_videos: - print('No options selected. Please select at least one of --decrypt or --make_videos.') - exit(0) - - if args.decrypt: - decrypt_images(args.directory) - - if args.make_videos(): - make_videos(args.directory) From f8ef93b5a49ebcb05133736dbb5b9779592c7039 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Wed, 12 Aug 2020 19:00:18 -0400 Subject: [PATCH 24/25] Remove testing date --- scripts/collect_images.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/collect_images.py b/scripts/collect_images.py index 6078a620..01b3cdc0 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -32,12 +32,7 @@ def get_metadata(): """ metadata = [] - # current_date = (datetime.date.today(),) - - # for testing - date = datetime.datetime(2020, 8, 10) - current_date = (date,) - # for testing + current_date = (datetime.date.today(),) # make sql connection # execute query From 50c10c9805507d92582a4efd1684d15cd2f00529 Mon Sep 17 00:00:00 2001 From: ZPBerg Date: Thu, 13 Aug 2020 12:14:50 -0400 Subject: [PATCH 25/25] Changed rclone instructions. --- scripts/collect_images.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/scripts/collect_images.py b/scripts/collect_images.py index 01b3cdc0..572ba299 100644 --- a/scripts/collect_images.py +++ b/scripts/collect_images.py @@ -8,16 +8,14 @@ """ This script should be set up with a cron job to run daily. -rclone should be set up, in our case pointing to a Google Drive folder: https://rclone.org/drive/ +Each user will have to set up their rclone config, in our case pointing to a Google Drive folder: https://rclone.org/drive/ +This can be done with /local/b/embedvis/rclone-v1.52.2-linux-amd64/rclone config Collect images of non-goggle detections from the database. Upload images and metadata to Google Drive. """ METADATA_FILE = 'metadata.json' -# rclone on ee220clnx1 is an earlier version that doesn't support copying to shared folders -RCLONE_PATH = '/home/shay/a/bergz/rclone-v1.52.2-linux-amd64/rclone' -TODAY = datetime.datetime.today().strftime('%Y-%m-%d') def get_metadata(): @@ -45,8 +43,8 @@ def get_metadata(): for (image_name, x_min, y_min, x_max, y_max, init_vector, goggles) in cursor: metadata.append({'image_name': image_name, - 'x_min': float(x_min), # JSON cannot serialize Decimals. - 'y_min': float(y_min), # If there is a better way to do this, let me know. + 'x_min': float(x_min), + 'y_min': float(y_min), 'x_max': float(x_max), 'y_max': float(y_max), 'init_vector': init_vector @@ -59,7 +57,7 @@ def get_metadata(): return metadata -def upload_files(metadata, dir, remote_name): +def upload_files(metadata, dir, rclone_path, remote_name): """ For each filename returned by get_metadata, upload image to Drive. Upload the day's metadata file. @@ -69,6 +67,7 @@ def upload_files(metadata, dir, remote_name): """ images = [] + today = datetime.datetime.today().strftime('%Y-%m-%d') # send images to the Drive for image in metadata: @@ -76,23 +75,24 @@ def upload_files(metadata, dir, remote_name): if image not in images: images.append(image) image_path = os.path.join(dir, image['image_name']) - subprocess.run([RCLONE_PATH, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)]) + subprocess.run([rclone_path, 'copy', image_path, '{}:{}'.format(remote_name, today)]) # upload metadata.json to the Drive - subprocess.run([RCLONE_PATH, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)]) + subprocess.run([rclone_path, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, today)]) os.remove(METADATA_FILE) if __name__ == "__main__": parser = argparse.ArgumentParser('Collect images.') parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload') - parser.add_argument('--remote_name', type=str, default='EmbedVisDrive', help='Name of remote location according ' - 'to rclone (default is the Drive ' - 'name on ee220clnx1). Don\'t ' - 'include the semicolon.') + parser.add_argument('--rclone_path', '-r', type=str, default='/local/b/embedvis/rclone-v1.52.2-linux-amd64/rclone', + help='Location of rclone binary. Default version on ee220clnx1 doesn\'t support copying to ' + 'shared folders.') + parser.add_argument('--remote_name', type=str, help='Name of remote location according to rclone config. You must ' + 'create your own config.') args = parser.parse_args() metadata = get_metadata() - upload_files(metadata, args.directory, args.remote_name) + upload_files(metadata, args.directory, args.rclone_path, args.remote_name) exit(0)