From 0214cafd9cd00cc8676fe0063c35cab5391f09e9 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Thu, 2 Jul 2020 14:25:01 -0400
Subject: [PATCH 01/25] Initial commit of collect_images

---
 scripts/collect_images.py | 44 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 scripts/collect_images.py

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
new file mode 100644
index 00000000..cb31b4a0
--- /dev/null
+++ b/scripts/collect_images.py
@@ -0,0 +1,44 @@
+import datetime
+import json
+
+# from Embedded2.src.jetson.db ... import ...
+
+"""
+Put this file one folder up from the stored images.
+Eg. /local/b/embedvis/imgs contains images, /local/b/embedvis/collect_images.py
+
+Collect images of non-goggle detections from the database.
+Upload images to Google Drive.
+Email end-user with the Drive link.
+"""
+
+
+def query_db():
+    """Get image filenames. Probably just a SQL query."""
+    pass
+
+
+def upload_images(imgs):
+    """
+    For each filename returned by query_db, upload image
+    and its relevant metadata (eg. face coords) to Drive.
+    @param imgs: [str, str, ...]
+    """
+
+    current_date = datetime.datetime.now().strftime("%m-%d-%Y")
+
+    # TODO how should metadata be transferred? JSON file?
+    with open(current_date + '.json', 'w') as meta_file:
+        for i in imgs:
+            # 1. append image metadata
+            # 2. upload image
+            image_metadata = []
+            json.dump(image_metadata, meta_file)
+            pass
+
+        # upload metadata json file to Drive
+
+
+if __name__ == "__main__":
+    # call the methods
+    pass

From bf3390368f324ea499702ce6115212fd250363fc Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Tue, 7 Jul 2020 09:40:21 -0400
Subject: [PATCH 02/25] Initial collect_images commit. Deleted login.json from
 Git

---
 scripts/collect_images.py      | 68 +++++++++++++++++++++++++---------
 src/jetson/db/db_connection.py |  4 +-
 src/jetson/db/login.json       | 11 ------
 3 files changed, 54 insertions(+), 29 deletions(-)
 delete mode 100644 src/jetson/db/login.json

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index cb31b4a0..c5eeb7c9 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -2,6 +2,7 @@
 import json
 
 # from Embedded2.src.jetson.db ... import ...
+# from wherever import email method
 
 """
 Put this file one folder up from the stored images.
@@ -13,32 +14,65 @@
 """
 
 
-def query_db():
-    """Get image filenames. Probably just a SQL query."""
-    pass
+# TODO rename method
+def get_metadata():
+    """
+    Get image filenames and other relevant metadata from the database.
+    @return: A list of dictionaries with the metadata for each image TODO describe the metadata
+
+    Query:
+    SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max,
+    i.image_name, i.init_vector from bbox AS b, image as i where b.image_name == i.image_name and b.goggles == False
+    """
+    # make sql connection
+    # execute query
+
+    # for everything returned:
+            # combine everything into a dictionary
+            # append dictionary to list
 
+    # return list of dictionaries
+    # TODO just json.dump entire list?
+    return []
 
-def upload_images(imgs):
+# TODO don't need this method if json.dump ing all dictionaries at once
+def organize_metadata(metadata):
     """
-    For each filename returned by query_db, upload image
-    and its relevant metadata (eg. face coords) to Drive.
-    @param imgs: [str, str, ...]
+    Create metadata file needed for decrypting images.
+    @param metadata: the list of dictionaries returned by get_metadata
     """
 
-    current_date = datetime.datetime.now().strftime("%m-%d-%Y")
+    with open(meta_file, 'w') as m:
+        for x in metadata:
+            # append image metadata
 
-    # TODO how should metadata be transferred? JSON file?
-    with open(current_date + '.json', 'w') as meta_file:
-        for i in imgs:
-            # 1. append image metadata
-            # 2. upload image
+            # use metadata param
             image_metadata = []
-            json.dump(image_metadata, meta_file)
-            pass
+            json.dump(image_metadata, m)
 
-        # upload metadata json file to Drive
 
+def upload_files(metadata):
+    """
+    For each filename returned by get_metadata, upload image
+    to Drive. Upload the day's metadata file.
+    @param metadata: the list of dictionaries returned by get_metadata
+    """
+
+    for image in metadata:
+        # upload image using rclone
+        pass
+
+    # upload metadata json file to Drive
+    # subprocess rclone copy meta_file [name of Drive in rclone]:
+
+
+# TODO call Seoyoung's method to email
 
 if __name__ == "__main__":
+    current_date = datetime.datetime.now().strftime("%m-%d-%Y")
+    meta_file = current_date + '.json'
+
     # call the methods
-    pass
+    metadata = get_metadata()
+    organize_metadata(metadata)
+    upload_files(metadata)
diff --git a/src/jetson/db/db_connection.py b/src/jetson/db/db_connection.py
index 24e6565b..72e95892 100644
--- a/src/jetson/db/db_connection.py
+++ b/src/jetson/db/db_connection.py
@@ -1,5 +1,7 @@
+import datetime
+
 import mysql.connector
-from .config import get_config
+from config import get_config
 from contextlib import contextmanager, closing
 
 
diff --git a/src/jetson/db/login.json b/src/jetson/db/login.json
deleted file mode 100644
index 65b553e4..00000000
--- a/src/jetson/db/login.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "SQL_HOST" : "mydb.itap.purdue.edu",
-    "USER_NAME" : "",
-    "PASSWORD" : "",
-    "KEYSPACE" : "pawar4",
-    
-    "FTPHOST" : "128.46.75.117",
-    "FTPUSER" : "",
-    "FTPPASS" : ""
-    
-}
\ No newline at end of file

From bafd011334f104d4906b6b114d49fc28ff02254e Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Fri, 10 Jul 2020 16:11:38 -0400
Subject: [PATCH 03/25] Add prepare_images. Refactoring.

---
 scripts/collect_images.py   | 75 +++++++++++++++++++++----------------
 scripts/extract_features.py |  2 +-
 scripts/prepare_images.py   | 50 +++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 34 deletions(-)
 create mode 100644 scripts/prepare_images.py

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index c5eeb7c9..5c0b6639 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -1,18 +1,23 @@
+import argparse
 import datetime
 import json
 
-# from Embedded2.src.jetson.db ... import ...
+from Embedded2.src.jetson.db.db_connection import sql_cursor
+
 # from wherever import email method
 
 """
 Put this file one folder up from the stored images.
-Eg. /local/b/embedvis/imgs contains images, /local/b/embedvis/collect_images.py
+Eg. on the HELPS machine: if /local/b/embedvis/imgs contains images, 
+this file's path should be /local/b/embedvis/collect_images.py
 
 Collect images of non-goggle detections from the database.
-Upload images to Google Drive.
+Upload images and metadata to Google Drive.
 Email end-user with the Drive link.
 """
 
+METADATA_FILE = 'metadata.json'
+
 
 # TODO rename method
 def get_metadata():
@@ -22,36 +27,37 @@ def get_metadata():
 
     Query:
     SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max,
-    i.image_name, i.init_vector from bbox AS b, image as i where b.image_name == i.image_name and b.goggles == False
+    i.image_name, i.init_vector from bbox AS b, image as i where b.image_name=i.image_name and b.goggles=False
     """
-    # make sql connection
-    # execute query
 
-    # for everything returned:
-            # combine everything into a dictionary
-            # append dictionary to list
+    metadata = []
 
-    # return list of dictionaries
-    # TODO just json.dump entire list?
-    return []
-
-# TODO don't need this method if json.dump ing all dictionaries at once
-def organize_metadata(metadata):
-    """
-    Create metadata file needed for decrypting images.
-    @param metadata: the list of dictionaries returned by get_metadata
-    """
-
-    with open(meta_file, 'w') as m:
-        for x in metadata:
-            # append image metadata
-
-            # use metadata param
-            image_metadata = []
-            json.dump(image_metadata, m)
-
-
-def upload_files(metadata):
+    # make sql connection
+    # execute query
+    with sql_cursor() as cursor:
+        try:
+            cursor.execute('USE goggles')
+            cursor.execute('SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, '
+                           'i.image_name, i.init_vector from bbox AS b, image as i where '
+                           'b.image_name=i.image_name and b.goggles=False')
+
+            for (image_name, x_min, y_min, x_max, y_max, image_name, init_vector) in cursor:
+                metadata.append({'image_name': image_name,
+                                 'x_min': x_min,
+                                 'y_min': y_min,
+                                 'x_max': x_max,
+                                 'y_max': y_max,
+                                 'init_vector': init_vector
+                                 })
+        except Exception as e:
+            print(e)
+
+    with open(METADATA_FILE, 'w') as meta_file:
+        json.dump(metadata, meta_file)
+    return metadata
+
+
+def upload_files(metadata, dir):
     """
     For each filename returned by get_metadata, upload image
     to Drive. Upload the day's metadata file.
@@ -60,19 +66,22 @@ def upload_files(metadata):
 
     for image in metadata:
         # upload image using rclone
+        # subprocess rclone copy os.path.join(dir, image['image_name']) [Drive name]
         pass
 
     # upload metadata json file to Drive
-    # subprocess rclone copy meta_file [name of Drive in rclone]:
+    # subprocess rclone copy METADATA_FILE [Drive name]:
 
 
 # TODO call Seoyoung's method to email
 
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser('Collect images.')
+    parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload')
+
     current_date = datetime.datetime.now().strftime("%m-%d-%Y")
     meta_file = current_date + '.json'
 
     # call the methods
     metadata = get_metadata()
-    organize_metadata(metadata)
-    upload_files(metadata)
+    upload_files(metadata, args.directory)
diff --git a/scripts/extract_features.py b/scripts/extract_features.py
index c9346a03..9aaa7a50 100644
--- a/scripts/extract_features.py
+++ b/scripts/extract_features.py
@@ -83,7 +83,7 @@ def hook(model, input, output):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Run classification on a dataset')
     parser.add_argument('--directory', '-d', type=str, required=True, help='(Relative) Directory location of dataset')
-    parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable cuda")
+    parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
 
     args = parser.parse_args()
 
diff --git a/scripts/prepare_images.py b/scripts/prepare_images.py
new file mode 100644
index 00000000..4d78b22e
--- /dev/null
+++ b/scripts/prepare_images.py
@@ -0,0 +1,50 @@
+import argparse
+import json
+import os
+
+"""
+After having run collect_images, decrypt the associated images
+(if necessary) and combine images together into a short video (using metadata).
+"""
+
+METADATA_FILE = 'metadata.json'
+
+
+def decrypt_images(dir):
+    # ask for decryption key
+    with open(os.path.join(dir, METADATA_FILE)) as meta_file:
+        metadata = json.load(meta_file)
+        # use face coords to find where to decrypt in video frame
+        # decrypt
+        pass
+
+
+def make_videos(dir):
+    # use a heuristic (such as images within 5 seconds of each other)
+    # to combine similar images into one video for easier viewing
+    with open(os.path.join(dir, METADATA_FILE)) as meta_file:
+        metadata = json.load(meta_file)
+        # for each image, if within 5 seconds of the previous one,
+        # concatenate them and make them into a video
+        pass
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser('Combine images into a short video for easier viewing.'
+                                     'Decrypt if needed.')
+    parser.add_argument('--directory', '-d', type=str, required=True, help='Folder of images to be prepared.')
+    parser.add_argument('--decrypt', default=False, action='store_true', help='Decrypt faces in the images.')
+    parser.add_argument('--make_videos', '-m', default=False, action='store_true',
+                        help='Combine frames from the same time period into a single video.')
+
+    args = parser.parse_args()
+
+    if not args.decrypt and not args.make_videos:
+        print('No options selected. Please select at least one of --decrypt or --make_videos.')
+        exit(0)
+
+    if args.decrypt:
+        decrypt_images(args.directory)
+
+    if args.make_videos():
+        make_videos(args.directory)

From 3872d31c267f2cd3d82e628a7dfda8ce6c57d4a7 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Sun, 12 Jul 2020 20:43:25 -0400
Subject: [PATCH 04/25] Minor fixes to work with database querying.

---
 scripts/collect_images.py      | 14 ++++++++------
 src/jetson/db/db_connection.py |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index 5c0b6639..04ba2467 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -2,7 +2,7 @@
 import datetime
 import json
 
-from Embedded2.src.jetson.db.db_connection import sql_cursor
+from src.jetson.db.db_connection import sql_cursor
 
 # from wherever import email method
 
@@ -43,10 +43,10 @@ def get_metadata():
 
             for (image_name, x_min, y_min, x_max, y_max, image_name, init_vector) in cursor:
                 metadata.append({'image_name': image_name,
-                                 'x_min': x_min,
-                                 'y_min': y_min,
-                                 'x_max': x_max,
-                                 'y_max': y_max,
+                                 'x_min': float(x_min),  # JSON cannot serialize Decimals.
+                                 'y_min': float(y_min),  # If there is a better way to do this, someone let me know.
+                                 'x_max': float(x_max),
+                                 'y_max': float(y_max),
                                  'init_vector': init_vector
                                  })
         except Exception as e:
@@ -57,11 +57,13 @@ def get_metadata():
     return metadata
 
 
+# TODO make folder with date to contain images and metadata file
 def upload_files(metadata, dir):
     """
     For each filename returned by get_metadata, upload image
     to Drive. Upload the day's metadata file.
     @param metadata: the list of dictionaries returned by get_metadata
+    @param dir: the folder containing the images to upload
     """
 
     for image in metadata:
@@ -78,9 +80,9 @@ def upload_files(metadata, dir):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser('Collect images.')
     parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload')
+    args = parser.parse_args()
 
     current_date = datetime.datetime.now().strftime("%m-%d-%Y")
-    meta_file = current_date + '.json'
 
     # call the methods
     metadata = get_metadata()
diff --git a/src/jetson/db/db_connection.py b/src/jetson/db/db_connection.py
index 72e95892..6ed54f85 100644
--- a/src/jetson/db/db_connection.py
+++ b/src/jetson/db/db_connection.py
@@ -1,9 +1,9 @@
 import datetime
 
 import mysql.connector
-from config import get_config
 from contextlib import contextmanager, closing
 
+from src.jetson.db.config import get_config
 
 class Table:
     def __init__(self):

From 55e6f8f14c5ee845fe22ea8c52795dcdb4eec05f Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Sun, 12 Jul 2020 21:31:09 -0400
Subject: [PATCH 05/25] Initial commit. Renaming variables and understanding
 evaluator.py

---
 scripts/annotator.py | 199 ++++++++++++++++++++++
 scripts/evaluator.py | 384 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 583 insertions(+)
 create mode 100644 scripts/annotator.py
 create mode 100644 scripts/evaluator.py

diff --git a/scripts/annotator.py b/scripts/annotator.py
new file mode 100644
index 00000000..6a85df7f
--- /dev/null
+++ b/scripts/annotator.py
@@ -0,0 +1,199 @@
+from __future__ import print_function
+import os
+import argparse
+import torch
+import torch.backends.cudnn as cudnn
+import numpy as np
+from data import cfg_mnet, cfg_re50
+from layers.functions.prior_box import PriorBox
+from utils.nms.py_cpu_nms import py_cpu_nms
+import cv2
+from models.retinaface import RetinaFace
+from utils.box_utils import decode, decode_landm
+import time
+import json
+
+parser = argparse.ArgumentParser(description='Retinaface')
+
+parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth',
+                    type=str, help='Trained state_dict file path to open')
+parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50')
+parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference')
+parser.add_argument('--confidence_threshold', default=0.5, type=float, help='confidence_threshold')
+parser.add_argument('--top_k', default=1000, type=int, help='top_k')
+parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold')
+parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k')
+parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
+parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str, help='directory to store detected labels')
+parser.add_argument('--input_directory', default='test_videos/', type=str, help='directory where test videos are located')
+
+
+args = parser.parse_args()
+CLASSES = ['Glasses/', 'Goggles/', 'Neither/']
+CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/', 'Pose_45_degrees_up/',
+            'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/']
+
+def check_keys(model, pretrained_state_dict):
+    ckpt_keys = set(pretrained_state_dict.keys())
+    model_keys = set(model.state_dict().keys())
+    used_pretrained_keys = model_keys & ckpt_keys
+    unused_pretrained_keys = ckpt_keys - model_keys
+    missing_keys = model_keys - ckpt_keys
+    print('Missing keys:{}'.format(len(missing_keys)))
+    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
+    print('Used keys:{}'.format(len(used_pretrained_keys)))
+    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
+    return True
+
+
+def remove_prefix(state_dict, prefix):
+    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
+    print('remove prefix \'{}\''.format(prefix))
+    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
+    return {f(key): value for key, value in state_dict.items()}
+
+
+def load_model(model, pretrained_path, load_to_cpu):
+    print('Loading pretrained model from {}'.format(pretrained_path))
+    if load_to_cpu:
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+    else:
+        device = torch.cuda.current_device()
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    if "state_dict" in pretrained_dict.keys():
+        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
+    else:
+        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
+    check_keys(model, pretrained_dict)
+    model.load_state_dict(pretrained_dict, strict=False)
+    return model
+
+
+def create_directories(root_directory, create_class_condition_directories=False):
+    if not os.path.isdir(root_directory):
+        os.mkdir(root_directory)
+
+
+
+def get_storage_location(output_directory, video_filename, input_directory):
+    save_dir = output_directory + video_filename.strip(input_directory).strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_'
+    #create_directories(save_dir)
+
+    return save_dir
+
+
+
+def get_videos(input_directory):
+    filenames = []
+    for dirName, subdirList, fileList in os.walk(input_directory):
+        for filename in fileList:
+            ext = '.' + filename.split('.')[-1]
+            if ext in ['.mov','.mp4','.avi', '.MOV']:
+                filenames.append(dirName + '/' + filename)
+
+    return filenames
+
+
+if __name__ == '__main__':
+    create_directories(args.output_directory, create_class_condition_directories=True)
+
+    torch.set_grad_enabled(False)
+    cfg = None
+    if args.network == "mobile0.25":
+        cfg = cfg_mnet
+    elif args.network == "resnet50":
+        cfg = cfg_re50
+    # net and model
+    net = RetinaFace(cfg=cfg, phase = 'test')
+    net = load_model(net, args.trained_model, args.cpu)
+    net.eval()
+    print('Finished loading model!')
+    print(net)
+    cudnn.benchmark = True
+    device = torch.device("cpu" if args.cpu else "cuda")
+    net = net.to(device)
+
+    resize = 0.4
+
+    video_files = get_videos(args.input_directory)
+
+    for video in video_files:
+        cap = cv2.VideoCapture(video)
+        storage_location = get_storage_location(args.output_directory, video, args.input_directory)
+        create_directories(storage_location)
+        print ("Video: ", video)
+
+
+        # testing begin
+        if cap.isOpened():
+            frame_number = 0
+            while True:
+                ret, img_raw = cap.read()
+                if not ret:
+                    break
+                img = np.float32(img_raw)
+                img = cv2.resize(img, (int(img.shape[1]*resize), int(img.shape[0]*resize)))
+
+                im_height, im_width, _ = img.shape
+                scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
+                img -= (104, 117, 123)
+                img = img.transpose(2, 0, 1)
+                img = torch.from_numpy(img).unsqueeze(0)
+                img = img.to(device)
+                scale = scale.to(device)
+
+                tic = time.time()
+                loc, conf, landms = net(img)  # forward pass
+                #print('net forward time: {:.4f}'.format(time.time() - tic))
+
+                priorbox = PriorBox(cfg, image_size=(im_height, im_width))
+                priors = priorbox.forward()
+                priors = priors.to(device)
+                prior_data = priors.data
+                boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
+                boxes = boxes * scale / resize
+                boxes = boxes.cpu().numpy()
+                scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
+                landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
+                scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
+                                       img.shape[3], img.shape[2], img.shape[3], img.shape[2],
+                                       img.shape[3], img.shape[2]])
+                scale1 = scale1.to(device)
+                landms = landms * scale1 / resize
+                landms = landms.cpu().numpy()
+
+                # ignore low scores
+                inds = np.where(scores > args.confidence_threshold)[0]
+                boxes = boxes[inds]
+                landms = landms[inds]
+                scores = scores[inds]
+
+                # keep top-K before NMS
+                order = scores.argsort()[::-1][:args.top_k]
+                boxes = boxes[order]
+                landms = landms[order]
+                scores = scores[order]
+
+                # do NMS
+                dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+                keep = py_cpu_nms(dets, args.nms_threshold)
+                # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
+                dets = dets[keep, :]
+                landms = landms[keep]
+
+                # keep top-K faster NMS
+                dets = dets[:args.keep_top_k, :]
+                landms = landms[:args.keep_top_k, :]
+
+                #dets = np.concatenate((dets, landms), axis=1)
+                output_file = storage_location + str(frame_number) + '.txt'
+                f = open(output_file, "w")
+                for detection in dets:
+                    for coord in detection:
+                        f.write(str(coord) + " ")
+                    f.write("\n")
+                f.close()
+
+                frame_number += 1
+
+    exit(0)
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
new file mode 100644
index 00000000..96965698
--- /dev/null
+++ b/scripts/evaluator.py
@@ -0,0 +1,384 @@
+import os
+import cv2
+import argparse
+import torch
+import time
+import warnings
+import json
+import numpy as np
+
+from main import FaceDetector, Classifier
+
+VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
+
+"""
+Use this script with annotator.py . Videos to be evaluated should be in this structure:
+
+- [Top-level dir]
+--- Goggles
+---------- [goggle videos]
+--- Glasses
+---------- [glasses videos]
+--- Neither
+---------- [neither videos]
+"""
+
+
+class Evaluator():
+    def __init__(self, cuda, detector, classifier, input_directory, annotation_path):
+        """
+        Evaluates face detection and goggle classification performance.
+        Goggle Classification accuracy is given by average class accuracy and individual
+        video accuracy.
+        Face detection accuracy is given by precision and recall values.
+
+        Args:
+            cuda: A bool value that specifies if cuda shall be used
+            detector: A string path to a .pth weights file for a face detection model
+            classifier: A string path to a .pth weights file for a goggle classification model
+            input_directory: Directory containing test videos to run Evaluator on
+            annotation_path: Directory containing annotation files
+        """
+
+        if cuda and torch.cuda.is_available():
+            torch.set_default_tensor_type('torch.cuda.FloatTensor')
+            self.device = torch.device('cuda:0')
+        else:
+            torch.set_default_tensor_type('torch.FloatTensor')
+            self.device = torch.device('cpu')
+
+        if os.path.exists("eval/det_results.txt"):
+            os.remove("eval/det_results.txt")
+
+        self.detector = FaceDetector(detector=detector, cuda=cuda and torch.cuda.is_available(),
+                                     set_default_dev=True)
+        self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device)
+        self.video_filenames = self.get_video_files(input_directory)
+        self.results = {'Goggles':
+                            {'average_class_accuracy': 0.0,
+                             'number_of_videos': 0,
+                             'individual_video_results': {}
+                             },
+                        'Glasses':
+                            {'average_class_accuracy': 0.0,
+                             'number_of_videos': 0,
+                             'individual_video_results': {}
+                             },
+                        'Neither':
+                            {'average_class_accuracy': 0.0,
+                             'number_of_videos': 0,
+                             'individual_video_results': {}
+                             }
+                        }
+        self.class_label = ''
+        self.condition = ''
+        self.cap = ''
+        self.video = ''
+        self.evaluate(annotation_path)
+
+    def evaluate(self, annotation_path: str):
+        """
+        Evaluates every video file in the input directory containing test videos and
+        stores results in self.results.
+        To understand the format of self.results dict, check the constructor
+
+        Args:
+            annotation_path - Directory containing all the annotations of face detections
+        """
+        total_videos_processed = 0
+        for video_file in self.video_filenames:
+            self.video = video_file
+            print(f"Processing {self.video} ...")
+
+            self.class_label = self.get_class_label()
+            self.condition = self.get_condition()
+            self.cap = cv2.VideoCapture(self.video)
+
+            if self.cap.isOpened():
+                classification_result = self.evaluate_classifications()  # Also contains boxes
+                self.record_results(classification_result)
+                total_videos_processed += 1
+                print(f"{self.video} : Done")
+
+            else:
+                print(f"Unable to open video {self.video}")
+                continue
+        self.calculate_average_class_accuracy()
+        detection_results = self.evaluate_detections(annotation_path, "eval/det_results.txt")
+
+        print(f"\n {total_videos_processed} videos processed!")
+
+    def calculate_average_class_accuracy(self):
+        """
+        Calculates the average class accuracy for each class and stores it in the
+        self.results dict.
+        """
+        for class_label in self.results:
+            if self.results[class_label]['number_of_videos'] > 0:
+                self.results[class_label]['average_class_accuracy'] = self.results[class_label][
+                                                                          'average_class_accuracy'] / \
+                                                                      self.results[class_label]['number_of_videos']
+
+    def record_results(self, result):
+        """
+        Records all the results in the self.results dict
+
+        Args:
+            result(List) - contains the classification accuracy and inference time
+        """
+        self.results[self.class_label]['number_of_videos'] += 1
+        self.results[self.class_label]['average_class_accuracy'] += result[0]
+        self.results[self.class_label]['individual_video_results'][self.video] = {}
+        self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0]
+        self.results[self.class_label]['individual_video_results'][self.video]["inference_time"] = result[1]
+        self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition
+
+    def record_detections(self, file, detections):
+        """
+        Save detections in a file for evaluation
+        Args:
+            file (str): Records detections here
+            detections (List): contains all the bounding boxes and confidence values
+        """
+        f = open(file, "a+")
+        for detection in detections:
+            for element in detection:
+                f.write(str(element))
+                f.write("|")
+            f.write("\n")
+        f.close()
+
+    def infer(self):
+        """
+        Performs inference on a video by using the face detection
+        and goggle classification models
+        It returns:
+        1) inference_dict: the number of inferences for each class.
+        2) average_inference_time: a float containing the average inference time for the whole video
+        """
+        bboxes = []
+        preds = []
+        inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0}
+        frame_counter = 0
+        start_time = time.time()
+
+        while True:
+            ret, img = self.cap.read()
+            if not ret:
+                break
+            # img = cv2.resize(img, (640, 480))  #Set this to the input shape of image for faster processing. (Remember to do the same in annotator)
+            frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str(
+                frame_counter)
+            boxes = self.detector.detect(img)  # Also contains confidence
+            box_no_conf = []
+            if len(boxes) != 0:
+                for box in boxes:
+                    x1 = max(0, box[0])
+                    y1 = max(0, box[1])
+                    x2 = min(img.shape[1], box[2])
+                    y2 = min(img.shape[0], box[3])
+                    conf = box[4]
+                    face = img[int(y1):int(y2), int(x1):int(x2), :]
+                    label = self.classifier.classifyFace(face)
+                    preds.append(label.item())
+                    bboxes.append([frame_id, x1, y1, x2, y2, conf])
+
+                    inference_dict["Goggles"] += preds.count(1)
+                    inference_dict["Glasses"] += preds.count(0)
+                    inference_dict["Neither"] += preds.count(2)
+
+        total_time = time.time() - start_time
+        if frame_counter > 0:
+            average_inference_time = total_time / frame_counter
+        else:
+            average_inference_time = -1  # Empty video file
+
+        # TODO make eval/det_results.txt a global variable DETECTION_FILE
+        self.record_detections("eval/det_results.txt", bboxes)
+        return inference_dict, average_inference_time
+
+    def get_class_label(self):
+        """
+        Get class label [Goggles / Glasses / Neither] that the image belongs to
+        """
+        if '/Goggles/' in self.video or '/goggles/' in self.video:
+            class_label = 'Goggles'
+        elif '/Glasses/' in self.video or '/glasses/' in self.video:
+            class_label = 'Glasses'
+        else:
+            class_label = 'Neither'
+
+        return class_label
+
+    def get_condition(self):
+        """
+        Get condition [Ideal, low_lighting etc. ] that the image belongs to
+        """
+        return self.video.split('/')[-2]
+
+    def get_ground_truth_detections(self, directory):
+        """
+        Get ground truth detection labels (from annotation file)
+        """
+        ground_truths = {}
+
+        for file in os.listdir(directory):
+            f = open(directory + file, "r")
+            key = file.strip('.txt')
+            content = f.readlines()
+            f.close()
+
+            content = [list(map(float, x.strip(' \n').split(' '))) for x in content]
+            ground_truths[key] = content
+
+        return ground_truths
+
+    def evaluate_classifications(self):
+        """
+        Returns the accuracy (percentage_of_correct_predictions) of the
+        predictions for a video
+        """
+        inferences, inference_time = self.infer()
+        if sum(inferences.values()) == 0:
+            percentage_of_correct_predictions = 0
+        else:
+            percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values())
+
+        return percentage_of_correct_predictions, inference_time
+
+    def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5):
+        """
+        Calculates the recall and precision of face detection for a video.
+        TODO explain what that means... seems like overlap of x and y coords?
+        
+        @param annotations_dir: directory containing annotation files (created by annotator.py)
+        @param detection_dir: directory of predicted detections TODO ???
+        @param overlap_threshold: greater than threshold counts as correct, less than is incorrect
+        """
+
+        ground_truth_detections = self.get_ground_truth_detections(annotations_dir)
+        with open(detection_dir, 'r') as f:
+            # TODO verify variable name accurate
+            predicted_detections = f.readlines()
+
+        total_ground_truths = 0
+        for frame_id in ground_truth_detections:
+            total_ground_truths += len(ground_truth_detections[frame_id])
+
+        # TODO ugly parsing and such here. Need to debug it. ==1 means...?
+        if any(predicted_detections) == 1:
+            splitlines = [x.strip().split('|') for x in predicted_detections]
+            image_ids = [x[0] for x in splitlines]
+            confidence = np.array([float(x[5]) for x in splitlines])
+            bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines])
+
+            # sort by confidence
+            sorted_ind = np.argsort(-confidence)
+            sorted_scores = np.sort(-confidence)
+            bboxes = bboxes[sorted_ind, :]
+            image_ids = [image_ids[x] for x in sorted_ind]
+
+            nd = len(image_ids)
+            true_pos = np.zeros(nd)
+            false_pos = np.zeros(nd)
+
+            # TODO for frame in frames?
+            for d in range(nd):
+                try:
+                    bbox = bboxes[d, :].astype(float)
+                    max_overlap = -np.inf
+                    bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32)
+                    if bbox_ground_truth_detections.size > 0:
+                        # TODO max and min variable names are backwards?
+                        ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0])
+                        iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1])
+                        ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2])
+                        iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3])
+                        iw = np.maximum(ixmax - ixmin, 0.)
+                        ih = np.maximum(iymax - iymin, 0.)
+                        # TODO debug. inters = intersection? uni = union? Overlaps is actual value?
+                        inters = iw * ih
+                        uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) +
+                               (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) *
+                               (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters)
+                        overlaps = inters / uni
+                        max_overlap = np.max(overlaps)
+                        # jmax = np.argmax(overlaps)
+
+                    if max_overlap > overlap_threshold:
+                        true_pos[d] = 1.
+                    else:
+                        false_pos[d] = 1.
+
+                except KeyError:
+                    continue
+
+            print("Total ground truths: ", total_ground_truths)
+            false_pos = np.cumsum(false_pos)
+            true_pos = np.cumsum(true_pos)
+            recall = true_pos / float(total_ground_truths)
+            # avoid divide by zero in case the first detection matches a difficult
+            # ground truth
+            precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps)
+        else:
+            recall = -1.
+            precision = -1.
+            ap = -1.
+
+        print("Precision: ", precision)
+        print("Recall: ", recall)
+
+        return precision[len(precision)], recall[len(recall)]  # final precision, recall
+
+    def get_video_files(self, input_directory: str):
+        """
+        Gets all the video files in the input directory
+        """
+        filenames = []
+        for dirName, subdirList, fileList in os.walk(input_directory):
+            for filename in fileList:
+                ext = '.' + filename.split('.')[-1]
+                if ext in VIDEO_EXT:
+                    filenames.append(dirName + '/' + filename)
+
+        return filenames
+
+    def get_evaluator_results(self):
+        """
+        Returns the dict containing all the test results (self.results)
+        """
+
+        return self.results
+
+
+def main():
+    if not args.input_directory:
+        raise Exception("Invalid input directory")
+    evaluator = Evaluator(args.cuda, args.detector, args.classifier, args.input_directory, args.annotation_path)
+    individual_video_results = evaluator.get_evaluator_results()
+
+    with open(args.output_file, 'w') as json_file:
+        json.dump(individual_video_results, json_file, indent=4)
+
+    print(f"\n Output saved at {args.output_file}")
+
+
+if __name__ == "__main__":
+    warnings.filterwarnings("once")
+    parser = argparse.ArgumentParser(description="Face detection")
+    parser.add_argument('--detector', '-t', type=str, default='model_weights/blazeface.pth',
+                        help="Path to a trained face detector .pth file")
+    parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str,
+                        help="Path to a trained classifier .pth file")
+    parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
+    parser.add_argument('--output_file', type=str, default='eval/test1.json',
+                        help="Name of evaluation log")
+    parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files")
+    parser.add_argument('--annotation_path', type=str, required=True, help="Path to annotation files")
+    # TODO add store_true args for detection, evaluation (to do separately if desired)
+
+    args = parser.parse_args()
+
+    main()
+
+    exit()

From 0e2d495329b521961d741882b453c0e312c06aba Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Tue, 14 Jul 2020 09:21:39 -0400
Subject: [PATCH 06/25] Progress

---
 scripts/annotator.py | 109 ++++++++++++++++++++++++-------------------
 scripts/evaluator.py |  24 +++++-----
 src/jetson/main.py   |  70 +++++++++++++--------------
 3 files changed, 105 insertions(+), 98 deletions(-)

diff --git a/scripts/annotator.py b/scripts/annotator.py
index 6a85df7f..6377bbde 100644
--- a/scripts/annotator.py
+++ b/scripts/annotator.py
@@ -4,34 +4,26 @@
 import torch
 import torch.backends.cudnn as cudnn
 import numpy as np
-from data import cfg_mnet, cfg_re50
-from layers.functions.prior_box import PriorBox
-from utils.nms.py_cpu_nms import py_cpu_nms
+from src.jetson.models.Retinaface.data.config import cfg_mnet, cfg_re50
+from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox
+from src.jetson.models.utils.box_utils import nms_numpy, decode_landm, decode
 import cv2
-from models.retinaface import RetinaFace
-from utils.box_utils import decode, decode_landm
+from src.jetson.models.Retinaface.retinaface import RetinaFace
 import time
 import json
 
-parser = argparse.ArgumentParser(description='Retinaface')
+"""
+Run the face detector model on TestVideos (on the Drive, also args.input_directory).
+Save bbox detections to SEPARATE text files for evaluation by evaluator.py
+"""
 
-parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth',
-                    type=str, help='Trained state_dict file path to open')
-parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50')
-parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference')
-parser.add_argument('--confidence_threshold', default=0.5, type=float, help='confidence_threshold')
-parser.add_argument('--top_k', default=1000, type=int, help='top_k')
-parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold')
-parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k')
-parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
-parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str, help='directory to store detected labels')
-parser.add_argument('--input_directory', default='test_videos/', type=str, help='directory where test videos are located')
+# TODO there's gotta be a better way than saving to text files
 
-
-args = parser.parse_args()
 CLASSES = ['Glasses/', 'Goggles/', 'Neither/']
-CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/', 'Pose_45_degrees_up/',
-            'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/']
+CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/',
+              'Pose_45_degrees_up/',
+              'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/']
+
 
 def check_keys(model, pretrained_state_dict):
     ckpt_keys = set(pretrained_state_dict.keys())
@@ -39,23 +31,23 @@ def check_keys(model, pretrained_state_dict):
     used_pretrained_keys = model_keys & ckpt_keys
     unused_pretrained_keys = ckpt_keys - model_keys
     missing_keys = model_keys - ckpt_keys
-    print('Missing keys:{}'.format(len(missing_keys)))
-    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
-    print('Used keys:{}'.format(len(used_pretrained_keys)))
+    print('Missing keys: {}'.format(len(missing_keys)))
+    print('Unused checkpoint keys: {}'.format(len(unused_pretrained_keys)))
+    print('Used keys: {}'.format(len(used_pretrained_keys)))
     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
     return True
 
 
 def remove_prefix(state_dict, prefix):
-    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
+    """ Old style model is stored with all names of parameters sharing common prefix 'module.' """
     print('remove prefix \'{}\''.format(prefix))
     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
     return {f(key): value for key, value in state_dict.items()}
 
 
-def load_model(model, pretrained_path, load_to_cpu):
+def load_model(model, pretrained_path, load_to_cuda):
     print('Loading pretrained model from {}'.format(pretrained_path))
-    if load_to_cpu:
+    if not load_to_cuda:
         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
     else:
         device = torch.cuda.current_device()
@@ -69,33 +61,53 @@ def load_model(model, pretrained_path, load_to_cpu):
     return model
 
 
-def create_directories(root_directory, create_class_condition_directories=False):
+def create_directory(root_directory):
     if not os.path.isdir(root_directory):
         os.mkdir(root_directory)
 
 
-
 def get_storage_location(output_directory, video_filename, input_directory):
-    save_dir = output_directory + video_filename.strip(input_directory).strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_'
-    #create_directories(save_dir)
+    # TODO ugly filename strip
+    save_dir = os.path.join(output_directory, video_filename.strip(input_directory)
+                            .strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_')
 
     return save_dir
 
 
-
 def get_videos(input_directory):
     filenames = []
     for dirName, subdirList, fileList in os.walk(input_directory):
         for filename in fileList:
             ext = '.' + filename.split('.')[-1]
-            if ext in ['.mov','.mp4','.avi', '.MOV']:
+            if ext in ['.mov', '.mp4', '.avi', '.MOV']:
                 filenames.append(dirName + '/' + filename)
 
     return filenames
 
 
 if __name__ == '__main__':
-    create_directories(args.output_directory, create_class_condition_directories=True)
+    parser = argparse.ArgumentParser(description='Retinaface')
+
+    parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth',
+                        type=str, help='Trained face detector state_dict path')
+    parser.add_argument('--network', default='resnet50', help='Backbone network. mobile0.25 or resnet50')
+    # TODO make CUDA arg instead
+    parser.add_argument('--cuda', '-c', action="store_true", default=False, help='Use CUDA')
+    parser.add_argument('--confidence_threshold', default=0.5, type=float, help='Bounding box IoU required to count as '
+                                                                                'correct')
+    parser.add_argument('--top_k', default=1000, type=int, help='top_k')
+    parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold')
+    parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k')
+    # TODO not currently used
+    parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
+    parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str,
+                        help='directory to store detected labels')
+    parser.add_argument('--input_directory', default='test_videos/', type=str,
+                        help='directory where test videos are located')
+
+    args = parser.parse_args()
+
+    create_directory(args.output_directory)
 
     torch.set_grad_enabled(False)
     cfg = None
@@ -103,14 +115,17 @@ def get_videos(input_directory):
         cfg = cfg_mnet
     elif args.network == "resnet50":
         cfg = cfg_re50
-    # net and model
-    net = RetinaFace(cfg=cfg, phase = 'test')
-    net = load_model(net, args.trained_model, args.cpu)
+
+    # load the network
+    net = RetinaFace(cfg=cfg, phase='test')
+
+    # load the model weights # TODO rename method load_model
+    net = load_model(net, args.trained_model, args.cuda)
     net.eval()
     print('Finished loading model!')
     print(net)
     cudnn.benchmark = True
-    device = torch.device("cpu" if args.cpu else "cuda")
+    device = torch.device("cuda:0" if args.cuda else "cpu")
     net = net.to(device)
 
     resize = 0.4
@@ -120,9 +135,8 @@ def get_videos(input_directory):
     for video in video_files:
         cap = cv2.VideoCapture(video)
         storage_location = get_storage_location(args.output_directory, video, args.input_directory)
-        create_directories(storage_location)
-        print ("Video: ", video)
-
+        create_directory(storage_location)
+        print("Video: ", video)
 
         # testing begin
         if cap.isOpened():
@@ -132,7 +146,9 @@ def get_videos(input_directory):
                 if not ret:
                     break
                 img = np.float32(img_raw)
-                img = cv2.resize(img, (int(img.shape[1]*resize), int(img.shape[0]*resize)))
+                img = cv2.resize(img, (int(img.shape[1] * resize), int(img.shape[0] * resize)))
+
+                # TODO does this vvv code appear in Retinaface/ ? Or possibly in main.py
 
                 im_height, im_width, _ = img.shape
                 scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
@@ -144,7 +160,7 @@ def get_videos(input_directory):
 
                 tic = time.time()
                 loc, conf, landms = net(img)  # forward pass
-                #print('net forward time: {:.4f}'.format(time.time() - tic))
+                # print('net forward time: {:.4f}'.format(time.time() - tic))
 
                 priorbox = PriorBox(cfg, image_size=(im_height, im_width))
                 priors = priorbox.forward()
@@ -176,8 +192,7 @@ def get_videos(input_directory):
 
                 # do NMS
                 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
-                keep = py_cpu_nms(dets, args.nms_threshold)
-                # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
+                keep = nms_numpy(dets, args.nms_threshold)
                 dets = dets[keep, :]
                 landms = landms[keep]
 
@@ -185,8 +200,8 @@ def get_videos(input_directory):
                 dets = dets[:args.keep_top_k, :]
                 landms = landms[:args.keep_top_k, :]
 
-                #dets = np.concatenate((dets, landms), axis=1)
-                output_file = storage_location + str(frame_number) + '.txt'
+                # dets = np.concatenate((dets, landms), axis=1)
+                output_file = os.path.join(storage_location, 'frame' + str(frame_number) + '.txt')
                 f = open(output_file, "w")
                 for detection in dets:
                     for coord in detection:
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 96965698..08307468 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -7,22 +7,18 @@
 import json
 import numpy as np
 
-from main import FaceDetector, Classifier
+from src.jetson.main import FaceDetector, Classifier
 
 VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
 
 """
-Use this script with annotator.py . Videos to be evaluated should be in this structure:
-
-- [Top-level dir]
---- Goggles
----------- [goggle videos]
---- Glasses
----------- [glasses videos]
---- Neither
----------- [neither videos]
+Use this script with annotator.py. 
+Videos to be evaluated should be from the TestVideos folder on the Drive.
 """
 
+# TODO - TODO TODO don't do face detection? Would have to manually label faces but we're using a
+# TODO - SOTA face detection model that could just empirically be observed to work
+
 
 class Evaluator():
     def __init__(self, cuda, detector, classifier, input_directory, annotation_path):
@@ -37,7 +33,7 @@ def __init__(self, cuda, detector, classifier, input_directory, annotation_path)
             detector: A string path to a .pth weights file for a face detection model
             classifier: A string path to a .pth weights file for a goggle classification model
             input_directory: Directory containing test videos to run Evaluator on
-            annotation_path: Directory containing annotation files
+            annotation_path: Directory containing annotation files (output by annotator.py)
         """
 
         if cuda and torch.cuda.is_available():
@@ -249,7 +245,7 @@ def evaluate_classifications(self):
     def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5):
         """
         Calculates the recall and precision of face detection for a video.
-        TODO explain what that means... seems like overlap of x and y coords?
+        TODO explain what that means... seems like overlap of x and y coords? I.e. IoU?
         
         @param annotations_dir: directory containing annotation files (created by annotator.py)
         @param detection_dir: directory of predicted detections TODO ???
@@ -297,6 +293,7 @@ def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=
                         iw = np.maximum(ixmax - ixmin, 0.)
                         ih = np.maximum(iymax - iymin, 0.)
                         # TODO debug. inters = intersection? uni = union? Overlaps is actual value?
+                        # TODO import IoU from box_utils should work
                         inters = iw * ih
                         uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) +
                                (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) *
@@ -374,7 +371,8 @@ def main():
     parser.add_argument('--output_file', type=str, default='eval/test1.json',
                         help="Name of evaluation log")
     parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files")
-    parser.add_argument('--annotation_path', type=str, required=True, help="Path to annotation files")
+    parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation "
+                                                                           "files")
     # TODO add store_true args for detection, evaluation (to do separately if desired)
 
     args = parser.parse_args()
diff --git a/src/jetson/main.py b/src/jetson/main.py
index 54ceee25..8c77f1dd 100644
--- a/src/jetson/main.py
+++ b/src/jetson/main.py
@@ -10,27 +10,28 @@
 from torch.autograd import Variable
 from torchvision import transforms
 
-from models.utils.transform import BaseTransform
-from models.utils.box_utils import decode, do_nms, postprocess
+from src.jetson.models.utils.transform import BaseTransform
+from src.jetson.models.utils.box_utils import decode, do_nms, postprocess
 
 import sys
 import os
 import inspect
 
-from AES import Encryption as AESEncryptor
+from src.jetson.AES import Encryption as AESEncryptor
 
 from threading import Thread
 import multiprocessing
 from multiprocessing import Process, Queue, Value
-from models.Retinaface.layers.functions.prior_box import PriorBox
-from models.Retinaface.data import cfg_mnet as cfg
-from models.Retinaface.data import cfg_inference as infer_params
+from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox
+from src.jetson.models.Retinaface.data import cfg_mnet as cfg
+from src.jetson.models.Retinaface.data import cfg_inference as infer_params
 
 fileCount = Value('i', 0)
-encryptRet = Queue() #Shared memory queue to allow child encryption process to return to parent
+encryptRet = Queue()  # Shared memory queue to allow child encryption process to return to parent
+
 
 class FaceDetector:
-    def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default_dev=False):
+    def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_default_dev=False):
         """
         Creates a FaceDetector object
         Args:
@@ -54,7 +55,6 @@ def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default
         elif ('.pth' in detector and 'blazeface' in detector):
             from models.BlazeFace.blazeface import BlazeFace
 
-
             self.net = BlazeFace(self.device)
             self.net.load_weights(detector)
             self.net.load_anchors("models/BlazeFace/anchors.npy")
@@ -66,17 +66,16 @@ def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default
         elif ('.pth' in detector and 'mobile' in detector):
             from models.Retinaface.retinaface import RetinaFace, load_model
 
-            self.net = RetinaFace(cfg=cfg, phase = 'test')
+            self.net = RetinaFace(cfg=cfg, phase='test')
             self.net = load_model(self.net, detector, True)
             self.model_name = 'retinaface'
-            self.image_shape = infer_params["image_shape"]  #(H, W)
+            self.image_shape = infer_params["image_shape"]  # (H, W)
             self.resize = infer_params["resize"]
             self.transformer = BaseTransform((self.image_shape[1], self.image_shape[0]), (104, 117, 123))
             priorbox = PriorBox(cfg, image_size=self.image_shape)
             priors = priorbox.forward()
             self.prior_data = priors.data
 
-
         self.detection_threshold = detection_threshold
         if cuda and torch.cuda.is_available():
             self.device = torch.device("cuda:0")
@@ -88,7 +87,6 @@ def __init__(self, detector:str, detection_threshold=0.7, cuda=True, set_default
         self.net.to(self.device)
         self.net.eval()
 
-
     def detect(self,
                image: np.ndarray):
         """
@@ -147,8 +145,8 @@ def detect(self,
         elif (self.model_name == 'retinaface'):
             img = (self.transformer(image)[0]).transpose(2, 0, 1)
             img = torch.from_numpy(img).unsqueeze(0)
-            loc, conf, _ = self.net(img)  # forward pass: Returns bounding box location, confidence and facial landmark locations
-
+            loc, conf, _ = self.net(
+                img)  # forward pass: Returns bounding box location, confidence and facial landmark locations
 
             boxes = decode(loc.data.squeeze(0), self.prior_data, cfg['variance'])
             boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize)
@@ -161,9 +159,6 @@ def detect(self,
             return bboxes
 
 
-
-
-
 class VideoCapturer(object):
     def __init__(self, src=0):
         '''
@@ -179,7 +174,6 @@ def __init__(self, src=0):
         self.t1.daemon = True
         self.t1.start()
 
-
     def update(self):
         '''Get next frame in video stream'''
         while self.running.value:
@@ -196,6 +190,7 @@ def close(self):
         self.running.value = False
         self.t1.join()
 
+
 class Classifier:
     def __init__(self, classifier):
         '''
@@ -207,7 +202,7 @@ def __init__(self, classifier):
         self.classifier = classifier
 
     def classifyFace(self,
-                    face: np.ndarray):
+                     face: np.ndarray):
         '''
         This method initializaes the transforms and classifies the face region
         Args:
@@ -243,8 +238,8 @@ def classifyFace(self,
         return pred
 
     def classifyFrame(self,
-                    img: np.ndarray,
-                    boxes: List[Tuple[np.float64]]):
+                      img: np.ndarray,
+                      boxes: List[Tuple[np.float64]]):
         '''
         This method loops through all the bounding boxes in an image, calls classifyFace method
         to classify face region and finally draws a box around the face.
@@ -270,9 +265,9 @@ def classifyFrame(self,
 
             label.append(int(self.classifyFace(face).data))
 
-
         return label
 
+
 class Encryptor(object):
     def __init__(self):
         '''
@@ -281,7 +276,6 @@ def __init__(self):
         self.encryptor = AESEncryptor()
         self.key = self.encryptor.key
 
-
     def encryptFace(self, coordinates: List[Tuple[int]],
                     img: np.ndarray):
         '''
@@ -298,8 +292,8 @@ def encryptFace(self, coordinates: List[Tuple[int]],
 
         return encryptedImg
 
-    def encryptFrame(self, img:np.ndarray,
-                    boxes:List[Tuple[np.float64]]):
+    def encryptFrame(self, img: np.ndarray,
+                     boxes: List[Tuple[np.float64]]):
         '''
         This method takes the face coordinates, encrypts the facial region, writes encrypted image to file filesystem
         Args:
@@ -333,7 +327,7 @@ def writeImg(img, output_dir):
     global fileCount
     face_file_name = os.path.join(output_dir, f'{fileCount.value}.jpg')
 
-    #TODO: Remove this print statement after db integration
+    # TODO: Remove this print statement after db integration
     print("writing ", face_file_name)
     if args.write_imgs:
         cv2.imwrite(face_file_name, img)
@@ -370,16 +364,16 @@ def drawFrame(boxes, frame, fps):
     index = 0
     for box in boxes:
         frame = cv2.putText(frame,
-                    'label: %s' % class_names[label[index]],
-                    (int(box[0]), int(box[1]-40)),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5,
-                    (0, 0, 255))
+                            'label: %s' % class_names[label[index]],
+                            (int(box[0]), int(box[1] - 40)),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5,
+                            (0, 0, 255))
 
         frame = cv2.putText(frame,
-                'fps: %.3f' % fps,
-                (int(box[0]), int(box[1]-20)),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                0.5, (0, 0, 255))
+                            'fps: %.3f' % fps,
+                            (int(box[0]), int(box[1] - 20)),
+                            cv2.FONT_HERSHEY_SIMPLEX,
+                            0.5, (0, 0, 255))
 
         index += 1
 
@@ -409,13 +403,13 @@ def drawFrame(boxes, frame, fps):
     encryptor = Encryptor()
 
     run_face_detection: bool = True
-    while run_face_detection: #main video detection loop that will iterate until ESC key is entered
+    while run_face_detection:  # main video detection loop that will iterate until ESC key is entered
         start_time = time.time()
 
         frame = capturer.get_frame()
         boxes = detector.detect(frame)
 
-        encryptedImg = frame.copy() #copy memory for encrypting image separate from unencrypted image
+        encryptedImg = frame.copy()  # copy memory for encrypting image separate from unencrypted image
 
         if len(boxes) != 0:
             p1 = Process(target=encryptWorker, args=(encryptor, encryptedImg, boxes, args.output_dir, args.write_imgs))
@@ -427,7 +421,7 @@ def drawFrame(boxes, frame, fps):
             fps = 1 / (time.time() - start_time)
             drawFrame(boxes, frame, fps)
 
-            #remove frame creation and drawing before deployment
+            # remove frame creation and drawing before deployment
 
             p1.join()
             if cv2.waitKey(1) == 27:

From dc6203d0875efc46c43419aa82af51d7b93d7f6d Mon Sep 17 00:00:00 2001
From: ZPBerg <31778364+ZPBerg@users.noreply.github.com>
Date: Tue, 14 Jul 2020 09:27:05 -0400
Subject: [PATCH 07/25] Update evaluator branch with master changes (#4)

* Added evaluator for detector and classifier

* Made necessary changes to main to run evaluator

* detector_type as an argument, compare to list of strings

* Retinaface works with GPU

Co-authored-by: Aditya Chakraborty <chakra17@purdue.edu>
Co-authored-by: Aditya Chakraborty <31283807+adityachakra16@users.noreply.github.com>
---
 src/jetson/main.py                   | 62 +++++++++++++++++-----------
 src/jetson/models/utils/box_utils.py |  2 +-
 2 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/src/jetson/main.py b/src/jetson/main.py
index 8c77f1dd..eda67324 100644
--- a/src/jetson/main.py
+++ b/src/jetson/main.py
@@ -4,6 +4,7 @@
 from typing import List, Set, Dict, Tuple, Optional
 
 import cv2
+from enum import Enum
 from PIL import Image
 import numpy as np
 import torch
@@ -28,14 +29,16 @@
 
 fileCount = Value('i', 0)
 encryptRet = Queue()  # Shared memory queue to allow child encryption process to return to parent
+DETECTOR_TYPES = ['blazeface', 'retinaface', 'ssd']
 
 
 class FaceDetector:
-    def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_default_dev=False):
+    def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, cuda=True, set_default_dev=False):
         """
         Creates a FaceDetector object
         Args:
             detector: A string path to a trained pth file for a ssd model trained in face detection
+            detector_type: A DetectorType describing which face detector is being used
             detection_threshold: The minimum threshold for a detection to be considered valid
             cuda: Whether or not to enable CUDA
             set_default_dev: Whether or not to set the default device for PyTorch
@@ -43,17 +46,16 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul
 
         self.device = torch.device("cpu")
 
-        if ('.pth' in detector and 'ssd' in detector):
-            from models.SSD.ssd import build_ssd
+        if detector_type == 'ssd':
+            from src.jetson.models.SSD.ssd import build_ssd
 
             self.net = build_ssd('test', 300, 2)
             self.model_name = 'ssd'
             self.net.load_state_dict(torch.load(detector, map_location=self.device))
             self.transformer = BaseTransform(self.net.size, (104, 117, 123))
 
-
-        elif ('.pth' in detector and 'blazeface' in detector):
-            from models.BlazeFace.blazeface import BlazeFace
+        elif detector_type == 'blazeface':
+            from src.jetson.models.BlazeFace.blazeface import BlazeFace
 
             self.net = BlazeFace(self.device)
             self.net.load_weights(detector)
@@ -63,8 +65,8 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul
             self.net.min_suppression_threshold = 0.3
             self.transformer = BaseTransform(128, None)
 
-        elif ('.pth' in detector and 'mobile' in detector):
-            from models.Retinaface.retinaface import RetinaFace, load_model
+        elif detector_type == 'retinaface':
+            from src.jetson.models.Retinaface.retinaface import RetinaFace, load_model
 
             self.net = RetinaFace(cfg=cfg, phase='test')
             self.net = load_model(self.net, detector, True)
@@ -74,7 +76,7 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul
             self.transformer = BaseTransform((self.image_shape[1], self.image_shape[0]), (104, 117, 123))
             priorbox = PriorBox(cfg, image_size=self.image_shape)
             priors = priorbox.forward()
-            self.prior_data = priors.data
+            self.prior_data = priors.data.to(device)
 
         self.detection_threshold = detection_threshold
         if cuda and torch.cuda.is_available():
@@ -98,7 +100,7 @@ def detect(self,
             The bounding boxes of the face(s) that were detected formatted (upper left corner(x, y) , lower right corner(x,y))
         """
 
-        if (self.model_name == 'ssd'):
+        if self.model_name == 'ssd':
             x = torch.from_numpy(self.transformer(image)[0]).permute(2, 0, 1)
             x = Variable(x.unsqueeze(0)).to(self.device)
             y = self.net(x)
@@ -109,12 +111,13 @@ def detect(self,
             while j < detections.shape[2] and detections[0, 1, j, 0] > self.detection_threshold:
                 pt = (detections[0, 1, j, 1:] * scale).cpu().numpy()
                 x1, y1, x2, y2 = pt
-                bboxes.append((x1, y1, x2, y2))
+                conf = detections[0, 1, j, 0].item()
+                bboxes.append((x1, y1, x2, y2, conf))
                 j += 1
 
             return bboxes
 
-        elif (self.model_name == 'blazeface'):
+        elif self.model_name == 'blazeface':
             img = self.transformer(image)[0].astype(np.float32)
 
             detections = self.net.predict_on_image(img)
@@ -130,6 +133,7 @@ def detect(self,
                 xmin = detections[i, 1] * image.shape[1]
                 ymax = detections[i, 2] * image.shape[0]
                 xmax = detections[i, 3] * image.shape[1]
+                conf = detections[i, 16]
 
                 img = img / 127.5 - 1.0
 
@@ -137,14 +141,15 @@ def detect(self,
                     kp_x = detections[i, 4 + k * 2] * img.shape[1]
                     kp_y = detections[i, 4 + k * 2 + 1] * img.shape[0]
 
-                bboxes.append((xmin, ymin, xmax, ymax))
+                bboxes.append((xmin, ymin, xmax, ymax, conf))
 
             return bboxes
 
-
-        elif (self.model_name == 'retinaface'):
+        elif self.model_name == 'retinaface':
             img = (self.transformer(image)[0]).transpose(2, 0, 1)
             img = torch.from_numpy(img).unsqueeze(0)
+            img = img.to(device)
+  
             loc, conf, _ = self.net(
                 img)  # forward pass: Returns bounding box location, confidence and facial landmark locations
 
@@ -152,9 +157,7 @@ def detect(self,
             boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize)
             dets = do_nms(boxes, scores, infer_params["nms_thresh"])
 
-            bboxes = []
-            for det in dets:
-                bboxes.append(tuple(dets[0][0:4]))
+            bboxes = [tuple(det[0:5]) for det in dets]
 
             return bboxes
 
@@ -192,14 +195,16 @@ def close(self):
 
 
 class Classifier:
-    def __init__(self, classifier):
+    def __init__(self, classifier, cuda: bool):
         '''
         Performs classification of facial region into three classes - [Goggles, Glasses, Neither]
         Args:
             classifier - Trained classifier model (Currently, mobilenetv2)
+            cuda - True if Nvidia GPU is used
         '''
         self.fps = 0
         self.classifier = classifier
+        self.device = cuda
 
     def classifyFace(self,
                      face: np.ndarray):
@@ -228,7 +233,7 @@ def classifyFace(self,
         ])
         transformed_face = transform(pil_face)
         face_batch = transformed_face.unsqueeze(0)
-        device = torch.device("cuda:0" if args.cuda and torch.cuda.is_available() else "cpu")
+        device = torch.device("cuda:0" if self.device and torch.cuda.is_available() else "cpu")
         with torch.no_grad():
             face_batch = face_batch.to(device)
             labels = classifier(face_batch)
@@ -253,7 +258,7 @@ def classifyFrame(self,
 
         label = []
         for box in boxes:
-            x1, y1, x2, y2 = [int(b) for b in box]
+            x1, y1, x2, y2 = [int(b) for b in box[0:4]]
             # draw boxes within the frame
             x1 = max(0, x1)
             y1 = max(0, y1)
@@ -301,7 +306,7 @@ def encryptFrame(self, img: np.ndarray,
             boxes: facial Coordinates
         '''
         for box in boxes:
-            x1, y1, x2, y2 = [int(b) for b in box]
+            x1, y1, x2, y2 = [int(b) for b in box[0:4]]
             # draw boxes within the frame
             x1 = max(0, x1)
             y1 = max(0, y1)
@@ -383,13 +388,19 @@ def drawFrame(boxes, frame, fps):
 if __name__ == "__main__":
     warnings.filterwarnings("once")
     parser = argparse.ArgumentParser(description="Face detection")
-    parser.add_argument('--detector', '-t', type=str, required=True, help="Path to a trained ssd .pth file")
+    parser.add_argument('--detector', '-d', type=str, required=True, help="Path to a trained face detector .pth file")
+    parser.add_argument('--detector_type', '-t', type=str, required=True, help="Type of face detector. One of "
+                                                                               "blazeface, ssd, or retinaface.")
     parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable cuda")
     parser.add_argument('--classifier', type=str, help="Path to a trained classifier .pth file")
     parser.add_argument('--write_imgs', default=False, help='Write images to output_dir')
     parser.add_argument('--output_dir', default='encrypted_imgs', type=str, help="Where to output encrypted images")
     args = parser.parse_args()
 
+    if args.detector_type not in DETECTOR_TYPES:
+        print('Please include a valid detector type (\'blazeface\', \'ssd\', or \'retinaface\'')
+        exit(1)
+
     device = torch.device('cpu')
     if args.cuda and torch.cuda.is_available():
         device = torch.device('cuda:0')
@@ -398,8 +409,9 @@ def drawFrame(boxes, frame, fps):
     g.eval()
 
     capturer = VideoCapturer()
-    detector = FaceDetector(detector=args.detector, cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True)
-    classifier = Classifier(g)
+    detector = FaceDetector(detector=args.detector, detector_type=args.detector_type,
+                            cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True)
+    classifier = Classifier(g, args.cuda)
     encryptor = Encryptor()
 
     run_face_detection: bool = True
diff --git a/src/jetson/models/utils/box_utils.py b/src/jetson/models/utils/box_utils.py
index bed236b3..03a5f513 100644
--- a/src/jetson/models/utils/box_utils.py
+++ b/src/jetson/models/utils/box_utils.py
@@ -376,7 +376,7 @@ def postprocess(boxes, conf, image_shape, detection_threshold, resize_factor):
     Returns boxes and confidence scores that are above confidence threshold
     """
     scale = torch.Tensor([image_shape[1], image_shape[0], image_shape[1], image_shape[0]])
-    boxes = (boxes * scale / resize_factor).numpy()
+    boxes = (boxes * scale / resize_factor).to('cpu').numpy()
     scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
 
     # ignore low scores

From 5eb307cd71b5540c4edbe48e4e4e13e1181a5280 Mon Sep 17 00:00:00 2001
From: ZPBerg <31778364+ZPBerg@users.noreply.github.com>
Date: Tue, 14 Jul 2020 09:27:05 -0400
Subject: [PATCH 08/25] Update evaluator branch with master changes (#4)

* Added evaluator for detector and classifier

* Made necessary changes to main to run evaluator

* detector_type as an argument, compare to list of strings

* Retinaface works with GPU

Co-authored-by: Aditya Chakraborty <chakra17@purdue.edu>
Co-authored-by: Aditya Chakraborty <31283807+adityachakra16@users.noreply.github.com>
---
 scripts/annotator.py                 |  3 +-
 scripts/evaluator.py                 | 16 ++++---
 src/jetson/main.py                   | 66 ++++++++++++++++------------
 src/jetson/models/utils/box_utils.py |  2 +-
 4 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/scripts/annotator.py b/scripts/annotator.py
index 6377bbde..40dd34d8 100644
--- a/scripts/annotator.py
+++ b/scripts/annotator.py
@@ -17,7 +17,8 @@
 Save bbox detections to SEPARATE text files for evaluation by evaluator.py
 """
 
-# TODO there's gotta be a better way than saving to text files
+# TODO there's gotta be a better way than saving to 47,000+ text files
+# TODO add instructions for running annotator and evaluator
 
 CLASSES = ['Glasses/', 'Goggles/', 'Neither/']
 CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/',
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 08307468..31c98bf8 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -43,10 +43,10 @@ def __init__(self, cuda, detector, classifier, input_directory, annotation_path)
             torch.set_default_tensor_type('torch.FloatTensor')
             self.device = torch.device('cpu')
 
-        if os.path.exists("eval/det_results.txt"):
-            os.remove("eval/det_results.txt")
+        if os.path.exists("det_results.txt"):
+            os.remove("det_results.txt")
 
-        self.detector = FaceDetector(detector=detector, cuda=cuda and torch.cuda.is_available(),
+        self.detector = FaceDetector(detector=detector, detector_type='retinaface', cuda=cuda and torch.cuda.is_available(),
                                      set_default_dev=True)
         self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device)
         self.video_filenames = self.get_video_files(input_directory)
@@ -100,7 +100,11 @@ def evaluate(self, annotation_path: str):
                 print(f"Unable to open video {self.video}")
                 continue
         self.calculate_average_class_accuracy()
-        detection_results = self.evaluate_detections(annotation_path, "eval/det_results.txt")
+
+        # ------- classification ^^^ detection vvv
+
+        # TODO why is this returning something
+        #detection_results = self.evaluate_detections(annotation_path, "det_results.txt")
 
         print(f"\n {total_videos_processed} videos processed!")
 
@@ -189,8 +193,8 @@ def infer(self):
         else:
             average_inference_time = -1  # Empty video file
 
-        # TODO make eval/det_results.txt a global variable DETECTION_FILE
-        self.record_detections("eval/det_results.txt", bboxes)
+        # TODO make det_results.txt a global variable DETECTION_FILE
+        self.record_detections("det_results.txt", bboxes)
         return inference_dict, average_inference_time
 
     def get_class_label(self):
diff --git a/src/jetson/main.py b/src/jetson/main.py
index 8c77f1dd..074d2614 100644
--- a/src/jetson/main.py
+++ b/src/jetson/main.py
@@ -4,6 +4,7 @@
 from typing import List, Set, Dict, Tuple, Optional
 
 import cv2
+from enum import Enum
 from PIL import Image
 import numpy as np
 import torch
@@ -23,19 +24,21 @@
 import multiprocessing
 from multiprocessing import Process, Queue, Value
 from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox
-from src.jetson.models.Retinaface.data import cfg_mnet as cfg
-from src.jetson.models.Retinaface.data import cfg_inference as infer_params
+from src.jetson.models.Retinaface.data.config import cfg_mnet as cfg
+from src.jetson.models.Retinaface.data.config import cfg_inference as infer_params
 
 fileCount = Value('i', 0)
 encryptRet = Queue()  # Shared memory queue to allow child encryption process to return to parent
+DETECTOR_TYPES = ['blazeface', 'retinaface', 'ssd']
 
 
 class FaceDetector:
-    def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_default_dev=False):
+    def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, cuda=True, set_default_dev=False):
         """
         Creates a FaceDetector object
         Args:
             detector: A string path to a trained pth file for a ssd model trained in face detection
+            detector_type: A DetectorType describing which face detector is being used
             detection_threshold: The minimum threshold for a detection to be considered valid
             cuda: Whether or not to enable CUDA
             set_default_dev: Whether or not to set the default device for PyTorch
@@ -43,17 +46,16 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul
 
         self.device = torch.device("cpu")
 
-        if ('.pth' in detector and 'ssd' in detector):
-            from models.SSD.ssd import build_ssd
+        if detector_type == 'ssd':
+            from src.jetson.models.SSD.ssd import build_ssd
 
             self.net = build_ssd('test', 300, 2)
             self.model_name = 'ssd'
             self.net.load_state_dict(torch.load(detector, map_location=self.device))
             self.transformer = BaseTransform(self.net.size, (104, 117, 123))
 
-
-        elif ('.pth' in detector and 'blazeface' in detector):
-            from models.BlazeFace.blazeface import BlazeFace
+        elif detector_type == 'blazeface':
+            from src.jetson.models.BlazeFace.blazeface import BlazeFace
 
             self.net = BlazeFace(self.device)
             self.net.load_weights(detector)
@@ -63,8 +65,8 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul
             self.net.min_suppression_threshold = 0.3
             self.transformer = BaseTransform(128, None)
 
-        elif ('.pth' in detector and 'mobile' in detector):
-            from models.Retinaface.retinaface import RetinaFace, load_model
+        elif detector_type == 'retinaface':
+            from src.jetson.models.Retinaface.retinaface import RetinaFace, load_model
 
             self.net = RetinaFace(cfg=cfg, phase='test')
             self.net = load_model(self.net, detector, True)
@@ -74,7 +76,7 @@ def __init__(self, detector: str, detection_threshold=0.7, cuda=True, set_defaul
             self.transformer = BaseTransform((self.image_shape[1], self.image_shape[0]), (104, 117, 123))
             priorbox = PriorBox(cfg, image_size=self.image_shape)
             priors = priorbox.forward()
-            self.prior_data = priors.data
+            self.prior_data = priors.data.to("cuda:0" if cuda else "cpu")
 
         self.detection_threshold = detection_threshold
         if cuda and torch.cuda.is_available():
@@ -98,7 +100,7 @@ def detect(self,
             The bounding boxes of the face(s) that were detected formatted (upper left corner(x, y) , lower right corner(x,y))
         """
 
-        if (self.model_name == 'ssd'):
+        if self.model_name == 'ssd':
             x = torch.from_numpy(self.transformer(image)[0]).permute(2, 0, 1)
             x = Variable(x.unsqueeze(0)).to(self.device)
             y = self.net(x)
@@ -109,12 +111,13 @@ def detect(self,
             while j < detections.shape[2] and detections[0, 1, j, 0] > self.detection_threshold:
                 pt = (detections[0, 1, j, 1:] * scale).cpu().numpy()
                 x1, y1, x2, y2 = pt
-                bboxes.append((x1, y1, x2, y2))
+                conf = detections[0, 1, j, 0].item()
+                bboxes.append((x1, y1, x2, y2, conf))
                 j += 1
 
             return bboxes
 
-        elif (self.model_name == 'blazeface'):
+        elif self.model_name == 'blazeface':
             img = self.transformer(image)[0].astype(np.float32)
 
             detections = self.net.predict_on_image(img)
@@ -130,6 +133,7 @@ def detect(self,
                 xmin = detections[i, 1] * image.shape[1]
                 ymax = detections[i, 2] * image.shape[0]
                 xmax = detections[i, 3] * image.shape[1]
+                conf = detections[i, 16]
 
                 img = img / 127.5 - 1.0
 
@@ -137,14 +141,15 @@ def detect(self,
                     kp_x = detections[i, 4 + k * 2] * img.shape[1]
                     kp_y = detections[i, 4 + k * 2 + 1] * img.shape[0]
 
-                bboxes.append((xmin, ymin, xmax, ymax))
+                bboxes.append((xmin, ymin, xmax, ymax, conf))
 
             return bboxes
 
-
-        elif (self.model_name == 'retinaface'):
+        elif self.model_name == 'retinaface':
             img = (self.transformer(image)[0]).transpose(2, 0, 1)
             img = torch.from_numpy(img).unsqueeze(0)
+            img = img.to(self.device)
+  
             loc, conf, _ = self.net(
                 img)  # forward pass: Returns bounding box location, confidence and facial landmark locations
 
@@ -152,9 +157,7 @@ def detect(self,
             boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize)
             dets = do_nms(boxes, scores, infer_params["nms_thresh"])
 
-            bboxes = []
-            for det in dets:
-                bboxes.append(tuple(dets[0][0:4]))
+            bboxes = [tuple(det[0:5]) for det in dets]
 
             return bboxes
 
@@ -192,14 +195,16 @@ def close(self):
 
 
 class Classifier:
-    def __init__(self, classifier):
+    def __init__(self, classifier, cuda: bool):
         '''
         Performs classification of facial region into three classes - [Goggles, Glasses, Neither]
         Args:
             classifier - Trained classifier model (Currently, mobilenetv2)
+            cuda - True if Nvidia GPU is used
         '''
         self.fps = 0
         self.classifier = classifier
+        self.device = cuda
 
     def classifyFace(self,
                      face: np.ndarray):
@@ -228,7 +233,7 @@ def classifyFace(self,
         ])
         transformed_face = transform(pil_face)
         face_batch = transformed_face.unsqueeze(0)
-        device = torch.device("cuda:0" if args.cuda and torch.cuda.is_available() else "cpu")
+        device = torch.device("cuda:0" if self.device and torch.cuda.is_available() else "cpu")
         with torch.no_grad():
             face_batch = face_batch.to(device)
             labels = classifier(face_batch)
@@ -253,7 +258,7 @@ def classifyFrame(self,
 
         label = []
         for box in boxes:
-            x1, y1, x2, y2 = [int(b) for b in box]
+            x1, y1, x2, y2 = [int(b) for b in box[0:4]]
             # draw boxes within the frame
             x1 = max(0, x1)
             y1 = max(0, y1)
@@ -301,7 +306,7 @@ def encryptFrame(self, img: np.ndarray,
             boxes: facial Coordinates
         '''
         for box in boxes:
-            x1, y1, x2, y2 = [int(b) for b in box]
+            x1, y1, x2, y2 = [int(b) for b in box[0:4]]
             # draw boxes within the frame
             x1 = max(0, x1)
             y1 = max(0, y1)
@@ -383,13 +388,19 @@ def drawFrame(boxes, frame, fps):
 if __name__ == "__main__":
     warnings.filterwarnings("once")
     parser = argparse.ArgumentParser(description="Face detection")
-    parser.add_argument('--detector', '-t', type=str, required=True, help="Path to a trained ssd .pth file")
+    parser.add_argument('--detector', '-d', type=str, required=True, help="Path to a trained face detector .pth file")
+    parser.add_argument('--detector_type', '-t', type=str, required=True, help="Type of face detector. One of "
+                                                                               "blazeface, ssd, or retinaface.")
     parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable cuda")
     parser.add_argument('--classifier', type=str, help="Path to a trained classifier .pth file")
     parser.add_argument('--write_imgs', default=False, help='Write images to output_dir')
     parser.add_argument('--output_dir', default='encrypted_imgs', type=str, help="Where to output encrypted images")
     args = parser.parse_args()
 
+    if args.detector_type not in DETECTOR_TYPES:
+        print('Please include a valid detector type (\'blazeface\', \'ssd\', or \'retinaface\'')
+        exit(1)
+
     device = torch.device('cpu')
     if args.cuda and torch.cuda.is_available():
         device = torch.device('cuda:0')
@@ -398,8 +409,9 @@ def drawFrame(boxes, frame, fps):
     g.eval()
 
     capturer = VideoCapturer()
-    detector = FaceDetector(detector=args.detector, cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True)
-    classifier = Classifier(g)
+    detector = FaceDetector(detector=args.detector, detector_type=args.detector_type,
+                            cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True)
+    classifier = Classifier(g, args.cuda)
     encryptor = Encryptor()
 
     run_face_detection: bool = True
diff --git a/src/jetson/models/utils/box_utils.py b/src/jetson/models/utils/box_utils.py
index bed236b3..03a5f513 100644
--- a/src/jetson/models/utils/box_utils.py
+++ b/src/jetson/models/utils/box_utils.py
@@ -376,7 +376,7 @@ def postprocess(boxes, conf, image_shape, detection_threshold, resize_factor):
     Returns boxes and confidence scores that are above confidence threshold
     """
     scale = torch.Tensor([image_shape[1], image_shape[0], image_shape[1], image_shape[0]])
-    boxes = (boxes * scale / resize_factor).numpy()
+    boxes = (boxes * scale / resize_factor).to('cpu').numpy()
     scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
 
     # ignore low scores

From 75abbfe7de34d82fb80cf0179f532028701539fd Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Wed, 15 Jul 2020 17:49:45 -0400
Subject: [PATCH 09/25] Update files, fix face_extractor getting files

---
 scripts/evaluator.py                          |  4 ++--
 scripts/face_extractor.py                     | 16 ++++++++++------
 src/jetson/main.py                            |  2 +-
 src/jetson/models/Retinaface/data/__init__.py |  2 +-
 src/jetson/models/utils/box_utils.py          |  1 +
 5 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 31c98bf8..4cac8fae 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -358,7 +358,7 @@ def main():
     evaluator = Evaluator(args.cuda, args.detector, args.classifier, args.input_directory, args.annotation_path)
     individual_video_results = evaluator.get_evaluator_results()
 
-    with open(args.output_file, 'w') as json_file:
+    with open(args.output_file, 'w+') as json_file:
         json.dump(individual_video_results, json_file, indent=4)
 
     print(f"\n Output saved at {args.output_file}")
@@ -372,7 +372,7 @@ def main():
     parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str,
                         help="Path to a trained classifier .pth file")
     parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
-    parser.add_argument('--output_file', type=str, default='eval/test1.json',
+    parser.add_argument('--output_file', type=str, default='results.json',
                         help="Name of evaluation log")
     parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files")
     parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation "
diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py
index a73c5afb..5197f9fe 100644
--- a/scripts/face_extractor.py
+++ b/scripts/face_extractor.py
@@ -7,11 +7,11 @@
 import numpy as np
 from tqdm import tqdm
 
-from face_detector_threaded import FaceDetector
+from src.jetson.main import FaceDetector
 
 """
 Given a folder of images or videos, run a face detector (literally a FaceDetector) on all images 
-or videos in the folder. Detects and crop all faces in the images or every 1/rate frames from the videos. 
+or videos in the folder. Detect and crop all faces in the images or every 1/rate frames from the videos. 
 Save the resulting crops as .jpgs in an output folder. 
 """
 
@@ -27,7 +27,8 @@ def get_images(input_dir):
     @return: List of image filenames.
     """
     files = [glob(f"{input_dir}/*{e}") for e in IMAGE_EXT]
-    return files[0]
+    files = [file for subfile in files for file in subfile]
+    return files
 
 
 def get_videos(input_dir):
@@ -37,16 +38,18 @@ def get_videos(input_dir):
     @return: List of video filenames.
     """
     files = [glob(f"{input_dir}/*{e}") for e in VIDEO_EXT]
-    return files[0]
+    files = [file for subfile in files for file in subfile]
+    return files
 
 
 def crop_and_save_img(frame, file_num, output_dir):
     """Run frame through FaceDetector and save the cropped face image."""
     if frame is not None and not 0:
+        print("Searching for a face")
         boxes = face_detector.detect(frame)
         for box in boxes:
             # Get individual coordinates as integers
-            x1, y1, x2, y2 = [int(b) for b in box]
+            x1, y1, x2, y2, _ = [int(b) for b in box]
             face = frame[y1:y2, x1:x2]
             if face is None or 0 in face.shape:
                 continue
@@ -90,6 +93,7 @@ def crop_faces_from_videos(output_dir):
     parser.add_argument("--input_dir", default="videos", type=str, help="Input directory containing the videos/images.")
     parser.add_argument('--output_dir', default='face_imgs', type=str, help="Output directory for the extracted faces.")
     parser.add_argument('--trained_model', default='blazeface.pth', type=str, help="Path to the face detector model.")
+    parser.add_argument('--detector_type', type=str, help='One of blazeface, ssd, retinaface')
     parser.add_argument('--images', default=False, action='store_true',
                         help='Crop faces from images instead of videos.')
     parser.add_argument('--rate', default=5, type=int, help="Crop faces from every 1/rate frames of the video.")
@@ -97,7 +101,7 @@ def crop_faces_from_videos(output_dir):
                                                                             'are all sideways, enable this.')
     args = parser.parse_args()
 
-    face_detector = FaceDetector(trained_model=args.trained_model)
+    face_detector = FaceDetector(args.trained_model, args.detector_type)
     filenames = get_images(args.input_dir) if args.images else get_videos(args.input_dir)
 
     if not os.path.isdir(args.output_dir):
diff --git a/src/jetson/main.py b/src/jetson/main.py
index 074d2614..e50c16a0 100644
--- a/src/jetson/main.py
+++ b/src/jetson/main.py
@@ -37,7 +37,7 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c
         """
         Creates a FaceDetector object
         Args:
-            detector: A string path to a trained pth file for a ssd model trained in face detection
+            detector: A string path to a trained pth file for a face detection model
             detector_type: A DetectorType describing which face detector is being used
             detection_threshold: The minimum threshold for a detection to be considered valid
             cuda: Whether or not to enable CUDA
diff --git a/src/jetson/models/Retinaface/data/__init__.py b/src/jetson/models/Retinaface/data/__init__.py
index 311ea72e..5c6eb077 100644
--- a/src/jetson/models/Retinaface/data/__init__.py
+++ b/src/jetson/models/Retinaface/data/__init__.py
@@ -1 +1 @@
-from models.Retinaface.data.config import cfg_mnet, cfg_re50, cfg_inference
+from src.jetson.models.Retinaface.data.config import cfg_mnet, cfg_re50, cfg_inference
diff --git a/src/jetson/models/utils/box_utils.py b/src/jetson/models/utils/box_utils.py
index 03a5f513..50e6ea77 100644
--- a/src/jetson/models/utils/box_utils.py
+++ b/src/jetson/models/utils/box_utils.py
@@ -376,6 +376,7 @@ def postprocess(boxes, conf, image_shape, detection_threshold, resize_factor):
     Returns boxes and confidence scores that are above confidence threshold
     """
     scale = torch.Tensor([image_shape[1], image_shape[0], image_shape[1], image_shape[0]])
+    scale = scale.to(boxes.device)
     boxes = (boxes * scale / resize_factor).to('cpu').numpy()
     scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
 

From bbd0e54614237b1b08535ce4726549669ab864cf Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Thu, 16 Jul 2020 14:26:58 -0400
Subject: [PATCH 10/25] Add scripts/utils for misc. helper functions.
 Auto-rotate videos.

---
 scripts/evaluator.py      | 25 +++++++++++++++++--------
 scripts/face_extractor.py | 17 +++++++++++------
 scripts/utils.py          | 32 ++++++++++++++++++++++++++++++++
 src/jetson/main.py        | 12 ++++++++----
 4 files changed, 68 insertions(+), 18 deletions(-)
 create mode 100644 scripts/utils.py

diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 4cac8fae..dc59d450 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -1,12 +1,14 @@
-import os
-import cv2
 import argparse
-import torch
+import json
+import os
 import time
 import warnings
-import json
+
+import cv2
 import numpy as np
+import torch
 
+from scripts.utils import check_rotation, correct_rotation
 from src.jetson.main import FaceDetector, Classifier
 
 VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
@@ -21,7 +23,7 @@
 
 
 class Evaluator():
-    def __init__(self, cuda, detector, classifier, input_directory, annotation_path):
+    def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path):
         """
         Evaluates face detection and goggle classification performance.
         Goggle Classification accuracy is given by average class accuracy and individual
@@ -46,7 +48,7 @@ def __init__(self, cuda, detector, classifier, input_directory, annotation_path)
         if os.path.exists("det_results.txt"):
             os.remove("det_results.txt")
 
-        self.detector = FaceDetector(detector=detector, detector_type='retinaface', cuda=cuda and torch.cuda.is_available(),
+        self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(),
                                      set_default_dev=True)
         self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device)
         self.video_filenames = self.get_video_files(input_directory)
@@ -104,6 +106,7 @@ def evaluate(self, annotation_path: str):
         # ------- classification ^^^ detection vvv
 
         # TODO why is this returning something
+        # TODO make it an optional arg to evaluate face detection
         #detection_results = self.evaluate_detections(annotation_path, "det_results.txt")
 
         print(f"\n {total_videos_processed} videos processed!")
@@ -162,10 +165,15 @@ def infer(self):
         frame_counter = 0
         start_time = time.time()
 
+        # check if the video needs to be rotated
+        rotate_code = check_rotation(self.video)
+
         while True:
             ret, img = self.cap.read()
             if not ret:
                 break
+            if rotate_code is not None:
+                correct_rotation(img, rotate_code)
             # img = cv2.resize(img, (640, 480))  #Set this to the input shape of image for faster processing. (Remember to do the same in annotator)
             frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str(
                 frame_counter)
@@ -355,7 +363,7 @@ def get_evaluator_results(self):
 def main():
     if not args.input_directory:
         raise Exception("Invalid input directory")
-    evaluator = Evaluator(args.cuda, args.detector, args.classifier, args.input_directory, args.annotation_path)
+    evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory, args.annotation_path)
     individual_video_results = evaluator.get_evaluator_results()
 
     with open(args.output_file, 'w+') as json_file:
@@ -367,8 +375,9 @@ def main():
 if __name__ == "__main__":
     warnings.filterwarnings("once")
     parser = argparse.ArgumentParser(description="Face detection")
-    parser.add_argument('--detector', '-t', type=str, default='model_weights/blazeface.pth',
+    parser.add_argument('--detector', '-d', type=str, default='model_weights/blazeface.pth',
                         help="Path to a trained face detector .pth file")
+    parser.add_argument('--detector_type', '-t', type=str, help="One of blazeface, retinaface, ssd")
     parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str,
                         help="Path to a trained classifier .pth file")
     parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py
index 5197f9fe..b5bd6104 100644
--- a/scripts/face_extractor.py
+++ b/scripts/face_extractor.py
@@ -1,12 +1,15 @@
 import argparse
-from glob import glob
+import math
 import os
 import warnings
 
+from glob import glob
+
 import cv2
 import numpy as np
 from tqdm import tqdm
 
+from scripts.utils import check_rotation, correct_rotation
 from src.jetson.main import FaceDetector
 
 """
@@ -27,6 +30,7 @@ def get_images(input_dir):
     @return: List of image filenames.
     """
     files = [glob(f"{input_dir}/*{e}") for e in IMAGE_EXT]
+    # convert the 2d list into a 1d list
     files = [file for subfile in files for file in subfile]
     return files
 
@@ -38,6 +42,7 @@ def get_videos(input_dir):
     @return: List of video filenames.
     """
     files = [glob(f"{input_dir}/*{e}") for e in VIDEO_EXT]
+    # convert the 2d list into a 1d list
     files = [file for subfile in files for file in subfile]
     return files
 
@@ -45,11 +50,10 @@ def get_videos(input_dir):
 def crop_and_save_img(frame, file_num, output_dir):
     """Run frame through FaceDetector and save the cropped face image."""
     if frame is not None and not 0:
-        print("Searching for a face")
         boxes = face_detector.detect(frame)
         for box in boxes:
             # Get individual coordinates as integers
-            x1, y1, x2, y2, _ = [int(b) for b in box]
+            x1, y1, x2, y2, _ = [int(math.ceil(b)) for b in box]
             face = frame[y1:y2, x1:x2]
             if face is None or 0 in face.shape:
                 continue
@@ -72,15 +76,15 @@ def crop_faces_from_videos(output_dir):
     for video_file in filenames:
         print(f"Opening {video_file}")
         video = cv2.VideoCapture(video_file)
+        rotate_code = check_rotation(video_file)
         file_len = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 
         for frame_num in tqdm(range(file_len)):
             ret, frame = video.read()
 
             if frame_num % args.rate == 0:
-                # If the video is shot horizontally, flip it so it's in the right orientation
-                if args.horiz:
-                    frame = cv2.transpose(frame)
+                if rotate_code is not None:
+                    frame = correct_rotation(frame, rotate_code)
                 crop_and_save_img(frame, file_num, output_dir)
                 file_num += 1
 
@@ -101,6 +105,7 @@ def crop_faces_from_videos(output_dir):
                                                                             'are all sideways, enable this.')
     args = parser.parse_args()
 
+    # the FaceDetector will use CUDA if possible
     face_detector = FaceDetector(args.trained_model, args.detector_type)
     filenames = get_images(args.input_dir) if args.images else get_videos(args.input_dir)
 
diff --git a/scripts/utils.py b/scripts/utils.py
new file mode 100644
index 00000000..5c8361c3
--- /dev/null
+++ b/scripts/utils.py
@@ -0,0 +1,32 @@
+import cv2
+import ffmpeg
+
+"""
+check_rotation and correct_rotation adapted from 
+https://stackoverflow.com/questions/53097092/frame-from-video-is-upside-down-after-extracting
+to handle the fact that some videos store rotation metadata while others do not,
+and OpenCV can't tell the difference
+"""
+
+
+def check_rotation(path_video_file):
+    # this returns meta-data of the video file in form of a dictionary
+    meta_dict = ffmpeg.probe(path_video_file)
+
+    # from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key
+    # we are looking for
+    rotate_code = None
+    if 'rotate' not in meta_dict['streams'][0]['tags'].keys():
+        return rotate_code
+    if int(meta_dict['streams'][0]['tags']['rotate']) == 90:
+        rotate_code = cv2.ROTATE_90_CLOCKWISE
+    elif int(meta_dict['streams'][0]['tags']['rotate']) == 180:
+        rotate_code = cv2.ROTATE_180
+    elif int(meta_dict['streams'][0]['tags']['rotate']) == 270:
+        rotate_code = cv2.ROTATE_90_COUNTERCLOCKWISE
+
+    return rotate_code
+
+
+def correct_rotation(frame, rotate_code):
+    return cv2.rotate(frame, rotate_code)
diff --git a/src/jetson/main.py b/src/jetson/main.py
index f271db4e..9f65cf8d 100644
--- a/src/jetson/main.py
+++ b/src/jetson/main.py
@@ -37,11 +37,7 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c
         """
         Creates a FaceDetector object
         Args:
-<<<<<<< HEAD
-            detector: A string path to a trained pth file for a face detection model
-=======
             detector: A string path to a trained pth file for a ssd model trained in face detection
->>>>>>> dc6203d0875efc46c43419aa82af51d7b93d7f6d
             detector_type: A DetectorType describing which face detector is being used
             detection_threshold: The minimum threshold for a detection to be considered valid
             cuda: Whether or not to enable CUDA
@@ -63,6 +59,7 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c
 
             self.net = BlazeFace(self.device)
             self.net.load_weights(detector)
+            # TODO load_anchors doesn't work if run from face_extractor
             self.net.load_anchors("models/BlazeFace/anchors.npy")
             self.model_name = 'blazeface'
             self.net.min_score_thresh = 0.75
@@ -161,6 +158,13 @@ def detect(self,
             boxes, scores = postprocess(boxes, conf, self.image_shape, self.detection_threshold, self.resize)
             dets = do_nms(boxes, scores, infer_params["nms_thresh"])
 
+            # scale bbox coords back to original image size
+            for det in dets:
+                det[0] *= image.shape[1] / img.shape[3]
+                det[1] *= image.shape[0] / img.shape[2]
+                det[2] *= image.shape[1] / img.shape[3]
+                det[3] *= image.shape[0] / img.shape[2]
+
             bboxes = [tuple(det[0:5]) for det in dets]
 
             return bboxes

From b372438beeade888bfd5ac9ff12bb1b08498d6b3 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Sat, 18 Jul 2020 17:05:36 -0400
Subject: [PATCH 11/25] Improve results.json output, fix load_anchors not
 finding anchors.npy

---
 scripts/evaluator.py | 106 ++++++++++++++++++++-----------------------
 src/jetson/main.py   |   5 +-
 2 files changed, 50 insertions(+), 61 deletions(-)

diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index dc59d450..28f42eb5 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -1,16 +1,18 @@
 import argparse
 import json
 import os
-import time
 import warnings
 
 import cv2
 import numpy as np
 import torch
+from tqdm import tqdm
 
 from scripts.utils import check_rotation, correct_rotation
 from src.jetson.main import FaceDetector, Classifier
 
+DETECTIONS_FILE = 'det_results.txt'
+CLASSIFICATION_RESULTS_FILE = 'results.json'
 VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
 
 """
@@ -18,12 +20,13 @@
 Videos to be evaluated should be from the TestVideos folder on the Drive.
 """
 
-# TODO - TODO TODO don't do face detection? Would have to manually label faces but we're using a
+# TODO - TODO TODO don't evaluate face detection? Would have to manually label faces but we're using a
 # TODO - SOTA face detection model that could just empirically be observed to work
+# TODO make comments with @param things
 
 
 class Evaluator():
-    def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path):
+    def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path, rate=1):
         """
         Evaluates face detection and goggle classification performance.
         Goggle Classification accuracy is given by average class accuracy and individual
@@ -36,6 +39,7 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a
             classifier: A string path to a .pth weights file for a goggle classification model
             input_directory: Directory containing test videos to run Evaluator on
             annotation_path: Directory containing annotation files (output by annotator.py)
+            rate: Run detection and classification on every 1/rate frames
         """
 
         if cuda and torch.cuda.is_available():
@@ -45,8 +49,8 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a
             torch.set_default_tensor_type('torch.FloatTensor')
             self.device = torch.device('cpu')
 
-        if os.path.exists("det_results.txt"):
-            os.remove("det_results.txt")
+        if os.path.exists(DETECTIONS_FILE):
+            os.remove(DETECTIONS_FILE)
 
         self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(),
                                      set_default_dev=True)
@@ -72,12 +76,13 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a
         self.condition = ''
         self.cap = ''
         self.video = ''
+        self.rate = rate
         self.evaluate(annotation_path)
 
     def evaluate(self, annotation_path: str):
         """
-        Evaluates every video file in the input directory containing test videos and
-        stores results in self.results.
+        Evaluates (classification and detection) every video file in the input directory
+        containing test videos and stores results in self.results.
         To understand the format of self.results dict, check the constructor
 
         Args:
@@ -86,7 +91,7 @@ def evaluate(self, annotation_path: str):
         total_videos_processed = 0
         for video_file in self.video_filenames:
             self.video = video_file
-            print(f"Processing {self.video} ...")
+            print(f"Processing {self.video} ..., video {total_videos_processed}/{len(self.video_filenames)}")
 
             self.class_label = self.get_class_label()
             self.condition = self.get_condition()
@@ -107,7 +112,7 @@ def evaluate(self, annotation_path: str):
 
         # TODO why is this returning something
         # TODO make it an optional arg to evaluate face detection
-        #detection_results = self.evaluate_detections(annotation_path, "det_results.txt")
+        #detection_results = self.evaluate_detections(annotation_path, DETECTIONS_FILE)
 
         print(f"\n {total_videos_processed} videos processed!")
 
@@ -124,21 +129,23 @@ def calculate_average_class_accuracy(self):
 
     def record_results(self, result):
         """
-        Records all the results in the self.results dict
+        Records results of one video in the self.results dict
 
         Args:
-            result(List) - contains the classification accuracy and inference time
+            result(List) - contains the classification accuracy and inference time and of one video
         """
         self.results[self.class_label]['number_of_videos'] += 1
+        # below is just a running sum which gets divided by the number of videos at the end
         self.results[self.class_label]['average_class_accuracy'] += result[0]
         self.results[self.class_label]['individual_video_results'][self.video] = {}
         self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0]
-        self.results[self.class_label]['individual_video_results'][self.video]["inference_time"] = result[1]
+        self.results[self.class_label]['individual_video_results'][self.video]["num_correct"] = result[1]
+        self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2]
         self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition
 
     def record_detections(self, file, detections):
         """
-        Save detections in a file for evaluation
+        Save face detections in a file for evaluation
         Args:
             file (str): Records detections here
             detections (List): contains all the bounding boxes and confidence values
@@ -154,32 +161,27 @@ def record_detections(self, file, detections):
     def infer(self):
         """
         Performs inference on a video by using the face detection
-        and goggle classification models
+        and goggle classification models.
+        @param rate: How often to run detection (every 1/rate frames).
         It returns:
         1) inference_dict: the number of inferences for each class.
-        2) average_inference_time: a float containing the average inference time for the whole video
         """
         bboxes = []
         preds = []
         inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0}
-        frame_counter = 0
-        start_time = time.time()
 
         # check if the video needs to be rotated
         rotate_code = check_rotation(self.video)
+        video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
-        while True:
+        for frame_num in tqdm(range(video_len)):
             ret, img = self.cap.read()
-            if not ret:
-                break
-            if rotate_code is not None:
-                correct_rotation(img, rotate_code)
-            # img = cv2.resize(img, (640, 480))  #Set this to the input shape of image for faster processing. (Remember to do the same in annotator)
-            frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str(
-                frame_counter)
-            boxes = self.detector.detect(img)  # Also contains confidence
-            box_no_conf = []
-            if len(boxes) != 0:
+            if frame_num % self.rate == 0:
+                if rotate_code is not None:
+                    correct_rotation(img, rotate_code)
+                frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str(
+                    frame_num)
+                boxes = self.detector.detect(img)  # Also contains confidence
                 for box in boxes:
                     x1 = max(0, box[0])
                     y1 = max(0, box[1])
@@ -191,19 +193,12 @@ def infer(self):
                     preds.append(label.item())
                     bboxes.append([frame_id, x1, y1, x2, y2, conf])
 
-                    inference_dict["Goggles"] += preds.count(1)
-                    inference_dict["Glasses"] += preds.count(0)
-                    inference_dict["Neither"] += preds.count(2)
+        inference_dict["Goggles"] += preds.count(1)
+        inference_dict["Glasses"] += preds.count(0)
+        inference_dict["Neither"] += preds.count(2)
 
-        total_time = time.time() - start_time
-        if frame_counter > 0:
-            average_inference_time = total_time / frame_counter
-        else:
-            average_inference_time = -1  # Empty video file
-
-        # TODO make det_results.txt a global variable DETECTION_FILE
-        self.record_detections("det_results.txt", bboxes)
-        return inference_dict, average_inference_time
+        self.record_detections(DETECTIONS_FILE, bboxes)
+        return inference_dict
 
     def get_class_label(self):
         """
@@ -246,13 +241,13 @@ def evaluate_classifications(self):
         Returns the accuracy (percentage_of_correct_predictions) of the
         predictions for a video
         """
-        inferences, inference_time = self.infer()
+        inferences = self.infer()
         if sum(inferences.values()) == 0:
             percentage_of_correct_predictions = 0
         else:
             percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values())
 
-        return percentage_of_correct_predictions, inference_time
+        return percentage_of_correct_predictions, inferences[self.class_label], sum(inferences.values())
 
     def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5):
         """
@@ -356,22 +351,9 @@ def get_evaluator_results(self):
         """
         Returns the dict containing all the test results (self.results)
         """
-
         return self.results
 
 
-def main():
-    if not args.input_directory:
-        raise Exception("Invalid input directory")
-    evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory, args.annotation_path)
-    individual_video_results = evaluator.get_evaluator_results()
-
-    with open(args.output_file, 'w+') as json_file:
-        json.dump(individual_video_results, json_file, indent=4)
-
-    print(f"\n Output saved at {args.output_file}")
-
-
 if __name__ == "__main__":
     warnings.filterwarnings("once")
     parser = argparse.ArgumentParser(description="Face detection")
@@ -381,15 +363,23 @@ def main():
     parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str,
                         help="Path to a trained classifier .pth file")
     parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
-    parser.add_argument('--output_file', type=str, default='results.json',
-                        help="Name of evaluation log")
     parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files")
     parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation "
                                                                            "files")
+    parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.')
     # TODO add store_true args for detection, evaluation (to do separately if desired)
 
     args = parser.parse_args()
 
-    main()
+    if not args.input_directory:
+        raise Exception("Invalid input directory")
+    evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory,
+                          args.annotation_path)
+    individual_video_results = evaluator.get_evaluator_results()
+
+    with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file:
+        json.dump(individual_video_results, json_file, indent=4)
+
+    print(f"\n Output saved at {args.output_file}")
 
     exit()
diff --git a/src/jetson/main.py b/src/jetson/main.py
index 9f65cf8d..c2586c8c 100644
--- a/src/jetson/main.py
+++ b/src/jetson/main.py
@@ -59,8 +59,8 @@ def __init__(self, detector: str, detector_type: str, detection_threshold=0.7, c
 
             self.net = BlazeFace(self.device)
             self.net.load_weights(detector)
-            # TODO load_anchors doesn't work if run from face_extractor
-            self.net.load_anchors("models/BlazeFace/anchors.npy")
+            # assume anchors.npy is in this location relative to the class definition
+            self.net.load_anchors(os.path.join(os.path.dirname(__file__), "models/BlazeFace/anchors.npy"))
             self.model_name = 'blazeface'
             self.net.min_score_thresh = 0.75
             self.net.min_suppression_threshold = 0.3
@@ -235,7 +235,6 @@ def classifyFace(self,
         # the same transforms as applied while training model
         transform = transforms.Compose([
             transforms.Resize(224),
-            transforms.RandomGrayscale(1),
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         ])

From 778c9a2bd9ae6b9664c8936db9aca90c0c69154e Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Tue, 21 Jul 2020 10:03:53 -0400
Subject: [PATCH 12/25] Documenting and refactoring

---
 scripts/annotator.py |   2 +-
 scripts/evaluator.py | 251 +++++++++++++++++++++----------------------
 scripts/utils.py     |   7 +-
 3 files changed, 130 insertions(+), 130 deletions(-)

diff --git a/scripts/annotator.py b/scripts/annotator.py
index 40dd34d8..3b8ed341 100644
--- a/scripts/annotator.py
+++ b/scripts/annotator.py
@@ -17,7 +17,7 @@
 Save bbox detections to SEPARATE text files for evaluation by evaluator.py
 """
 
-# TODO there's gotta be a better way than saving to 47,000+ text files
+# TODO output large json file or something
 # TODO add instructions for running annotator and evaluator
 
 CLASSES = ['Glasses/', 'Goggles/', 'Neither/']
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 28f42eb5..ef8f730f 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -6,8 +6,10 @@
 import cv2
 import numpy as np
 import torch
+import torchvision
 from tqdm import tqdm
 
+from scripts.goggle_classifier import get_model
 from scripts.utils import check_rotation, correct_rotation
 from src.jetson.main import FaceDetector, Classifier
 
@@ -54,7 +56,11 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a
 
         self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(),
                                      set_default_dev=True)
-        self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device)
+        # TODO check state_dict vs. not
+        model = get_model()
+        model.load_state_dict(torch.load(classifier, map_location=self.device))
+        self.classifier = Classifier(model, self.device)
+        #self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device)
         self.video_filenames = self.get_video_files(input_directory)
         self.results = {'Goggles':
                             {'average_class_accuracy': 0.0,
@@ -76,6 +82,7 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a
         self.condition = ''
         self.cap = ''
         self.video = ''
+        self.video_len = 0
         self.rate = rate
         self.evaluate(annotation_path)
 
@@ -102,10 +109,10 @@ def evaluate(self, annotation_path: str):
                 self.record_results(classification_result)
                 total_videos_processed += 1
                 print(f"{self.video} : Done")
-
             else:
                 print(f"Unable to open video {self.video}")
                 continue
+
         self.calculate_average_class_accuracy()
 
         # ------- classification ^^^ detection vvv
@@ -116,10 +123,109 @@ def evaluate(self, annotation_path: str):
 
         print(f"\n {total_videos_processed} videos processed!")
 
+    def evaluate_classifications(self):
+        """
+        Returns the accuracy (percentage_of_correct_predictions) of the
+        predictions for a video
+        """
+        inferences = self.infer()
+        if sum(inferences.values()) == 0:
+            percentage_of_correct_predictions = 0
+        else:
+            percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values())
+
+        return percentage_of_correct_predictions, inferences, sum(inferences.values())
+
+    def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file, overlap_threshold=0.5):
+        """
+        Calculates the recall and precision of face detection for a video.
+        TODO explain what that means... seems like overlap of x and y coords? I.e. IoU?
+
+        @param ground_truth_detections_file: file containing actual face detections (created by annotator.py)
+        @param predicted_detections_file: file containing predicted face detections
+        @param overlap_threshold: IoU greater than threshold counts as correct, less than is incorrect
+        """
+
+        with open(ground_truth_detections_file) as detect_file:
+            ground_truth_detections = json.load(detect_file)
+
+        with open(predicted_detections_file, 'r') as prediction_file:
+            predicted_detections = json.load(prediction_file)
+
+        # TODO fix below based on detections format
+        total_ground_truths = 0
+        for frame_id in ground_truth_detections:
+            total_ground_truths += len(ground_truth_detections[frame_id])
+
+        # TODO ugly parsing and such here. Need to debug it. ==1 means...?
+        if any(predicted_detections) == 1:
+            splitlines = [x.strip().split('|') for x in predicted_detections]
+            image_ids = [x[0] for x in splitlines]
+            confidence = np.array([float(x[5]) for x in splitlines])
+            bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines])
+
+            # sort by confidence
+            sorted_ind = np.argsort(-confidence)
+            sorted_scores = np.sort(-confidence)
+            bboxes = bboxes[sorted_ind, :]
+            image_ids = [image_ids[x] for x in sorted_ind]
+
+            nd = len(image_ids)
+            true_pos = np.zeros(nd)
+            false_pos = np.zeros(nd)
+
+            # TODO for frame in frames?
+            for d in range(nd):
+                try:
+                    bbox = bboxes[d, :].astype(float)
+                    max_overlap = -np.inf
+                    bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32)
+                    if bbox_ground_truth_detections.size > 0:
+                        # TODO max and min variable names are backwards?
+                        ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0])
+                        iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1])
+                        ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2])
+                        iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3])
+                        iw = np.maximum(ixmax - ixmin, 0.)
+                        ih = np.maximum(iymax - iymin, 0.)
+                        # TODO debug. inters = intersection? uni = union? Overlaps is actual value?
+                        # TODO import IoU from box_utils should work
+                        inters = iw * ih
+                        uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) +
+                               (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) *
+                               (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters)
+                        overlaps = inters / uni
+                        max_overlap = np.max(overlaps)
+                        # jmax = np.argmax(overlaps)
+
+                    if max_overlap > overlap_threshold:
+                        true_pos[d] = 1.
+                    else:
+                        false_pos[d] = 1.
+
+                except KeyError:
+                    continue
+
+            print("Total ground truths: ", total_ground_truths)
+            false_pos = np.cumsum(false_pos)
+            true_pos = np.cumsum(true_pos)
+            recall = true_pos / float(total_ground_truths)
+            # avoid divide by zero in case the first detection matches a difficult
+            # ground truth
+            precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps)
+        else:
+            recall = -1.
+            precision = -1.
+            ap = -1.
+
+        print("Precision: ", precision)
+        print("Recall: ", recall)
+
+        return precision[len(precision)], recall[len(recall)]  # final precision, recall
+
     def calculate_average_class_accuracy(self):
         """
-        Calculates the average class accuracy for each class and stores it in the
-        self.results dict.
+        Calculates the average class accuracy for each class and stores it in self.results
         """
         for class_label in self.results:
             if self.results[class_label]['number_of_videos'] > 0:
@@ -131,21 +237,25 @@ def record_results(self, result):
         """
         Records results of one video in the self.results dict
 
-        Args:
-            result(List) - contains the classification accuracy and inference time and of one video
+        @param result(List) - contains the classification accuracy,
+        number of predictions for each label, number of detections
         """
         self.results[self.class_label]['number_of_videos'] += 1
         # below is just a running sum which gets divided by the number of videos at the end
         self.results[self.class_label]['average_class_accuracy'] += result[0]
         self.results[self.class_label]['individual_video_results'][self.video] = {}
         self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0]
-        self.results[self.class_label]['individual_video_results'][self.video]["num_correct"] = result[1]
+        self.results[self.class_label]['individual_video_results'][self.video]["glasses"] = result[1]['Glasses']
+        self.results[self.class_label]['individual_video_results'][self.video]["goggles"] = result[1]['Goggles']
+        self.results[self.class_label]['individual_video_results'][self.video]["neither"] = result[1]['Neither']
         self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2]
+        self.results[self.class_label]['individual_video_results'][self.video]["num_frames"] = self.video_len
         self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition
 
     def record_detections(self, file, detections):
         """
         Save face detections in a file for evaluation
+        TODO improve how this is stored
         Args:
             file (str): Records detections here
             detections (List): contains all the bounding boxes and confidence values
@@ -160,7 +270,7 @@ def record_detections(self, file, detections):
 
     def infer(self):
         """
-        Performs inference on a video by using the face detection
+        Performs inference on a video using the face detection
         and goggle classification models.
         @param rate: How often to run detection (every 1/rate frames).
         It returns:
@@ -172,11 +282,11 @@ def infer(self):
 
         # check if the video needs to be rotated
         rotate_code = check_rotation(self.video)
-        video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        self.video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
-        for frame_num in tqdm(range(video_len)):
+        for frame_num in tqdm(range(self.video_len)):
             ret, img = self.cap.read()
-            if frame_num % self.rate == 0:
+            if frame_num % self.rate == 0 and img is not None:
                 if rotate_code is not None:
                     correct_rotation(img, rotate_code)
                 frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str(
@@ -193,8 +303,8 @@ def infer(self):
                     preds.append(label.item())
                     bboxes.append([frame_id, x1, y1, x2, y2, conf])
 
-        inference_dict["Goggles"] += preds.count(1)
         inference_dict["Glasses"] += preds.count(0)
+        inference_dict["Goggles"] += preds.count(1)
         inference_dict["Neither"] += preds.count(2)
 
         self.record_detections(DETECTIONS_FILE, bboxes)
@@ -219,121 +329,6 @@ def get_condition(self):
         """
         return self.video.split('/')[-2]
 
-    def get_ground_truth_detections(self, directory):
-        """
-        Get ground truth detection labels (from annotation file)
-        """
-        ground_truths = {}
-
-        for file in os.listdir(directory):
-            f = open(directory + file, "r")
-            key = file.strip('.txt')
-            content = f.readlines()
-            f.close()
-
-            content = [list(map(float, x.strip(' \n').split(' '))) for x in content]
-            ground_truths[key] = content
-
-        return ground_truths
-
-    def evaluate_classifications(self):
-        """
-        Returns the accuracy (percentage_of_correct_predictions) of the
-        predictions for a video
-        """
-        inferences = self.infer()
-        if sum(inferences.values()) == 0:
-            percentage_of_correct_predictions = 0
-        else:
-            percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values())
-
-        return percentage_of_correct_predictions, inferences[self.class_label], sum(inferences.values())
-
-    def evaluate_detections(self, annotations_dir, detection_dir, overlap_threshold=0.5):
-        """
-        Calculates the recall and precision of face detection for a video.
-        TODO explain what that means... seems like overlap of x and y coords? I.e. IoU?
-        
-        @param annotations_dir: directory containing annotation files (created by annotator.py)
-        @param detection_dir: directory of predicted detections TODO ???
-        @param overlap_threshold: greater than threshold counts as correct, less than is incorrect
-        """
-
-        ground_truth_detections = self.get_ground_truth_detections(annotations_dir)
-        with open(detection_dir, 'r') as f:
-            # TODO verify variable name accurate
-            predicted_detections = f.readlines()
-
-        total_ground_truths = 0
-        for frame_id in ground_truth_detections:
-            total_ground_truths += len(ground_truth_detections[frame_id])
-
-        # TODO ugly parsing and such here. Need to debug it. ==1 means...?
-        if any(predicted_detections) == 1:
-            splitlines = [x.strip().split('|') for x in predicted_detections]
-            image_ids = [x[0] for x in splitlines]
-            confidence = np.array([float(x[5]) for x in splitlines])
-            bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines])
-
-            # sort by confidence
-            sorted_ind = np.argsort(-confidence)
-            sorted_scores = np.sort(-confidence)
-            bboxes = bboxes[sorted_ind, :]
-            image_ids = [image_ids[x] for x in sorted_ind]
-
-            nd = len(image_ids)
-            true_pos = np.zeros(nd)
-            false_pos = np.zeros(nd)
-
-            # TODO for frame in frames?
-            for d in range(nd):
-                try:
-                    bbox = bboxes[d, :].astype(float)
-                    max_overlap = -np.inf
-                    bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32)
-                    if bbox_ground_truth_detections.size > 0:
-                        # TODO max and min variable names are backwards?
-                        ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0])
-                        iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1])
-                        ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2])
-                        iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3])
-                        iw = np.maximum(ixmax - ixmin, 0.)
-                        ih = np.maximum(iymax - iymin, 0.)
-                        # TODO debug. inters = intersection? uni = union? Overlaps is actual value?
-                        # TODO import IoU from box_utils should work
-                        inters = iw * ih
-                        uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) +
-                               (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) *
-                               (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters)
-                        overlaps = inters / uni
-                        max_overlap = np.max(overlaps)
-                        # jmax = np.argmax(overlaps)
-
-                    if max_overlap > overlap_threshold:
-                        true_pos[d] = 1.
-                    else:
-                        false_pos[d] = 1.
-
-                except KeyError:
-                    continue
-
-            print("Total ground truths: ", total_ground_truths)
-            false_pos = np.cumsum(false_pos)
-            true_pos = np.cumsum(true_pos)
-            recall = true_pos / float(total_ground_truths)
-            # avoid divide by zero in case the first detection matches a difficult
-            # ground truth
-            precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps)
-        else:
-            recall = -1.
-            precision = -1.
-            ap = -1.
-
-        print("Precision: ", precision)
-        print("Recall: ", recall)
-
-        return precision[len(precision)], recall[len(recall)]  # final precision, recall
-
     def get_video_files(self, input_directory: str):
         """
         Gets all the video files in the input directory
@@ -380,6 +375,6 @@ def get_evaluator_results(self):
     with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file:
         json.dump(individual_video_results, json_file, indent=4)
 
-    print(f"\n Output saved at {args.output_file}")
+    print(f"\n Output saved at {CLASSIFICATION_RESULTS_FILE}")
 
     exit()
diff --git a/scripts/utils.py b/scripts/utils.py
index 5c8361c3..3c8d5336 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -1,11 +1,16 @@
 import cv2
 import ffmpeg
 
+"""
+Miscellaneous utility functions that apply to multiple scripts.
+"""
+
+
 """
 check_rotation and correct_rotation adapted from 
 https://stackoverflow.com/questions/53097092/frame-from-video-is-upside-down-after-extracting
 to handle the fact that some videos store rotation metadata while others do not,
-and OpenCV can't tell the difference
+and OpenCV can't tell the difference.
 """
 
 

From 275dea45e393e5fd808163bdfa3d1a28e43adcf6 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Wed, 22 Jul 2020 19:19:17 -0400
Subject: [PATCH 13/25] Finish annotator saving to csv. check_rotation only
 rotates .MOV files

---
 scripts/annotator.py      | 221 ++++++++------------------------------
 scripts/evaluator.py      |   1 +
 scripts/face_extractor.py |   2 +-
 scripts/utils.py          |   8 +-
 4 files changed, 53 insertions(+), 179 deletions(-)

diff --git a/scripts/annotator.py b/scripts/annotator.py
index 3b8ed341..34e489c1 100644
--- a/scripts/annotator.py
+++ b/scripts/annotator.py
@@ -1,65 +1,25 @@
 from __future__ import print_function
-import os
+
 import argparse
-import torch
-import torch.backends.cudnn as cudnn
-import numpy as np
-from src.jetson.models.Retinaface.data.config import cfg_mnet, cfg_re50
-from src.jetson.models.Retinaface.layers.functions.prior_box import PriorBox
-from src.jetson.models.utils.box_utils import nms_numpy, decode_landm, decode
+import csv
+import os
+
 import cv2
-from src.jetson.models.Retinaface.retinaface import RetinaFace
-import time
-import json
+import torch
+from tqdm import tqdm
+
+from src.jetson.main import FaceDetector
+from scripts.utils import check_rotation, correct_rotation
 
 """
-Run the face detector model on TestVideos (on the Drive, also args.input_directory).
-Save bbox detections to SEPARATE text files for evaluation by evaluator.py
+Run the face detector model on a folder of videos (most recently used on TestVideos from the Drive).
+Save bbox detections to a csv file to be compared in evaluator.py.
+An earlier version of this script was used to compare Retinaface with
+a Mobilenet backbone versus a Resnet backbone; comparison of object
+detectors would be its most applicable use.
 """
 
-# TODO output large json file or something
-# TODO add instructions for running annotator and evaluator
-
-CLASSES = ['Glasses/', 'Goggles/', 'Neither/']
-CONDITIONS = ['Ideal/', 'Low_lighting/', 'Occlusion_bottom/', 'Occlusion_left_right/', 'Pose_45_degrees_down/',
-              'Pose_45_degrees_up/',
-              'Pose_looking_left/', 'Pose_looking_right/', 'Scale_3-5m/', 'Scale_<3m/', 'Scale_>5m/']
-
-
-def check_keys(model, pretrained_state_dict):
-    ckpt_keys = set(pretrained_state_dict.keys())
-    model_keys = set(model.state_dict().keys())
-    used_pretrained_keys = model_keys & ckpt_keys
-    unused_pretrained_keys = ckpt_keys - model_keys
-    missing_keys = model_keys - ckpt_keys
-    print('Missing keys: {}'.format(len(missing_keys)))
-    print('Unused checkpoint keys: {}'.format(len(unused_pretrained_keys)))
-    print('Used keys: {}'.format(len(used_pretrained_keys)))
-    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
-    return True
-
-
-def remove_prefix(state_dict, prefix):
-    """ Old style model is stored with all names of parameters sharing common prefix 'module.' """
-    print('remove prefix \'{}\''.format(prefix))
-    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
-    return {f(key): value for key, value in state_dict.items()}
-
-
-def load_model(model, pretrained_path, load_to_cuda):
-    print('Loading pretrained model from {}'.format(pretrained_path))
-    if not load_to_cuda:
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
-    else:
-        device = torch.cuda.current_device()
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
-    if "state_dict" in pretrained_dict.keys():
-        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
-    else:
-        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
-    check_keys(model, pretrained_dict)
-    model.load_state_dict(pretrained_dict, strict=False)
-    return model
+DETECTIONS_FILE = 'detection_results.csv'
 
 
 def create_directory(root_directory):
@@ -67,149 +27,60 @@ def create_directory(root_directory):
         os.mkdir(root_directory)
 
 
-def get_storage_location(output_directory, video_filename, input_directory):
-    # TODO ugly filename strip
-    save_dir = os.path.join(output_directory, video_filename.strip(input_directory)
-                            .strip('.mp4').strip('.mov').strip('.MOV').strip('.avi').split('/')[-1] + '_')
-
-    return save_dir
-
-
 def get_videos(input_directory):
     filenames = []
     for dirName, subdirList, fileList in os.walk(input_directory):
         for filename in fileList:
             ext = '.' + filename.split('.')[-1]
-            if ext in ['.mov', '.mp4', '.avi', '.MOV']:
+            if ext in ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']:
                 filenames.append(dirName + '/' + filename)
 
     return filenames
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Retinaface')
-
-    parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth',
-                        type=str, help='Trained face detector state_dict path')
-    parser.add_argument('--network', default='resnet50', help='Backbone network. mobile0.25 or resnet50')
-    # TODO make CUDA arg instead
+    parser = argparse.ArgumentParser(description='Save face detection results')
+    parser.add_argument('--detector', '-d', type=str, required=True, help="Path to a trained face detector .pth file")
+    parser.add_argument('--detector_type', '-t', type=str, required=True, help="Type of face detector. One of "
+                                                                               "blazeface, ssd, or retinaface.")
     parser.add_argument('--cuda', '-c', action="store_true", default=False, help='Use CUDA')
-    parser.add_argument('--confidence_threshold', default=0.5, type=float, help='Bounding box IoU required to count as '
-                                                                                'correct')
-    parser.add_argument('--top_k', default=1000, type=int, help='top_k')
-    parser.add_argument('--nms_threshold', default=0.05, type=float, help='nms_threshold')
-    parser.add_argument('--keep_top_k', default=250, type=int, help='keep_top_k')
-    # TODO not currently used
-    parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
-    parser.add_argument('--output_directory', default='ground_truth_detections_lowlight/', type=str,
-                        help='directory to store detected labels')
-    parser.add_argument('--input_directory', default='test_videos/', type=str,
+    parser.add_argument('--input_directory', '-i', default='test_videos/', type=str,
                         help='directory where test videos are located')
+    parser.add_argument('--output_directory', '-o', default='ground_truth_detections_lowlight/', type=str,
+                        help='directory to store detected labels')
 
     args = parser.parse_args()
 
+    device = torch.device('cuda:0') if args.cuda and torch.cuda.is_available() else torch.device('cpu')
+
     create_directory(args.output_directory)
 
     torch.set_grad_enabled(False)
-    cfg = None
-    if args.network == "mobile0.25":
-        cfg = cfg_mnet
-    elif args.network == "resnet50":
-        cfg = cfg_re50
-
-    # load the network
-    net = RetinaFace(cfg=cfg, phase='test')
-
-    # load the model weights # TODO rename method load_model
-    net = load_model(net, args.trained_model, args.cuda)
-    net.eval()
-    print('Finished loading model!')
-    print(net)
-    cudnn.benchmark = True
-    device = torch.device("cuda:0" if args.cuda else "cpu")
-    net = net.to(device)
-
-    resize = 0.4
+
+    # load the face detector
+    detector = FaceDetector(detector=args.detector, detector_type=args.detector_type,
+                            cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True)
 
     video_files = get_videos(args.input_directory)
 
     for video in video_files:
-        cap = cv2.VideoCapture(video)
-        storage_location = get_storage_location(args.output_directory, video, args.input_directory)
-        create_directory(storage_location)
         print("Video: ", video)
-
-        # testing begin
-        if cap.isOpened():
-            frame_number = 0
-            while True:
-                ret, img_raw = cap.read()
-                if not ret:
-                    break
-                img = np.float32(img_raw)
-                img = cv2.resize(img, (int(img.shape[1] * resize), int(img.shape[0] * resize)))
-
-                # TODO does this vvv code appear in Retinaface/ ? Or possibly in main.py
-
-                im_height, im_width, _ = img.shape
-                scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
-                img -= (104, 117, 123)
-                img = img.transpose(2, 0, 1)
-                img = torch.from_numpy(img).unsqueeze(0)
-                img = img.to(device)
-                scale = scale.to(device)
-
-                tic = time.time()
-                loc, conf, landms = net(img)  # forward pass
-                # print('net forward time: {:.4f}'.format(time.time() - tic))
-
-                priorbox = PriorBox(cfg, image_size=(im_height, im_width))
-                priors = priorbox.forward()
-                priors = priors.to(device)
-                prior_data = priors.data
-                boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
-                boxes = boxes * scale / resize
-                boxes = boxes.cpu().numpy()
-                scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
-                landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
-                scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
-                                       img.shape[3], img.shape[2], img.shape[3], img.shape[2],
-                                       img.shape[3], img.shape[2]])
-                scale1 = scale1.to(device)
-                landms = landms * scale1 / resize
-                landms = landms.cpu().numpy()
-
-                # ignore low scores
-                inds = np.where(scores > args.confidence_threshold)[0]
-                boxes = boxes[inds]
-                landms = landms[inds]
-                scores = scores[inds]
-
-                # keep top-K before NMS
-                order = scores.argsort()[::-1][:args.top_k]
-                boxes = boxes[order]
-                landms = landms[order]
-                scores = scores[order]
-
-                # do NMS
-                dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
-                keep = nms_numpy(dets, args.nms_threshold)
-                dets = dets[keep, :]
-                landms = landms[keep]
-
-                # keep top-K faster NMS
-                dets = dets[:args.keep_top_k, :]
-                landms = landms[:args.keep_top_k, :]
-
-                # dets = np.concatenate((dets, landms), axis=1)
-                output_file = os.path.join(storage_location, 'frame' + str(frame_number) + '.txt')
-                f = open(output_file, "w")
-                for detection in dets:
-                    for coord in detection:
-                        f.write(str(coord) + " ")
-                    f.write("\n")
-                f.close()
-
-                frame_number += 1
+        cap = cv2.VideoCapture(video)
+        rotate_code = check_rotation(video)
+        file_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+        detections = []
+
+        for frame_num in tqdm(range(file_len)):
+            _, frame = cap.read()
+            if rotate_code is not None:
+                frame = correct_rotation(frame, rotate_code)
+            boxes = detector.detect(frame)
+            detections.append([video, frame_num, boxes])
+
+        # save detections to csv one video at a time
+        with open(DETECTIONS_FILE, "a") as f:
+            writer = csv.writer(f)
+            writer.writerows(detections)
 
     exit(0)
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index ef8f730f..d751e882 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -25,6 +25,7 @@
 # TODO - TODO TODO don't evaluate face detection? Would have to manually label faces but we're using a
 # TODO - SOTA face detection model that could just empirically be observed to work
 # TODO make comments with @param things
+# TODO load detections as csv file
 
 
 class Evaluator():
diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py
index b5bd6104..644a483b 100644
--- a/scripts/face_extractor.py
+++ b/scripts/face_extractor.py
@@ -20,7 +20,7 @@
 
 warnings.filterwarnings('once')
 IMAGE_EXT = ['.jpg', '.JPG', '.png', '.PNG']
-VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV']
+VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV', '.avi', '.AVI']
 
 
 def get_images(input_dir):
diff --git a/scripts/utils.py b/scripts/utils.py
index 3c8d5336..bb1ddb16 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -14,15 +14,17 @@
 """
 
 
-def check_rotation(path_video_file):
+def check_rotation(path_video_file: str):
+    # only .mov files need to be rotated
+    if path_video_file.split('.')[-1] != '.MOV' or '.mov':
+        return None
+
     # this returns meta-data of the video file in form of a dictionary
     meta_dict = ffmpeg.probe(path_video_file)
 
     # from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key
     # we are looking for
     rotate_code = None
-    if 'rotate' not in meta_dict['streams'][0]['tags'].keys():
-        return rotate_code
     if int(meta_dict['streams'][0]['tags']['rotate']) == 90:
         rotate_code = cv2.ROTATE_90_CLOCKWISE
     elif int(meta_dict['streams'][0]['tags']['rotate']) == 180:

From 899cd1ccda83c93fb6699f582147c15dd1b5edfd Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Thu, 23 Jul 2020 09:35:09 -0400
Subject: [PATCH 14/25] Change csv storage format

---
 scripts/annotator.py |   8 +-
 scripts/evaluator.py | 256 ++++++++++++++++++++++---------------------
 2 files changed, 138 insertions(+), 126 deletions(-)

diff --git a/scripts/annotator.py b/scripts/annotator.py
index 34e489c1..bf51b9f1 100644
--- a/scripts/annotator.py
+++ b/scripts/annotator.py
@@ -76,7 +76,13 @@ def get_videos(input_directory):
             if rotate_code is not None:
                 frame = correct_rotation(frame, rotate_code)
             boxes = detector.detect(frame)
-            detections.append([video, frame_num, boxes])
+            detection = [video, frame_num]
+
+            # each box is one set of face coords
+            for box in boxes:
+                for b in box:
+                    detection.append(b)
+            detections.append(detection)
 
         # save detections to csv one video at a time
         with open(DETECTIONS_FILE, "a") as f:
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index d751e882..3ae3f961 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -1,4 +1,5 @@
 import argparse
+import csv
 import json
 import os
 import warnings
@@ -12,8 +13,9 @@
 from scripts.goggle_classifier import get_model
 from scripts.utils import check_rotation, correct_rotation
 from src.jetson.main import FaceDetector, Classifier
+from src.jetson.models.utils.box_utils import matrix_iou
 
-DETECTIONS_FILE = 'det_results.txt'
+PRED_DETECTIONS_FILE = 'detection_predictions.txt'
 CLASSIFICATION_RESULTS_FILE = 'results.json'
 VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
 
@@ -22,27 +24,25 @@
 Videos to be evaluated should be from the TestVideos folder on the Drive.
 """
 
-# TODO - TODO TODO don't evaluate face detection? Would have to manually label faces but we're using a
-# TODO - SOTA face detection model that could just empirically be observed to work
+
 # TODO make comments with @param things
-# TODO load detections as csv file
 
 
 class Evaluator():
-    def __init__(self, cuda, detector, detector_type, classifier, input_directory, annotation_path, rate=1):
+    def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate, det_file):
         """
         Evaluates face detection and goggle classification performance.
         Goggle Classification accuracy is given by average class accuracy and individual
         video accuracy.
         Face detection accuracy is given by precision and recall values.
 
-        Args:
-            cuda: A bool value that specifies if cuda shall be used
-            detector: A string path to a .pth weights file for a face detection model
-            classifier: A string path to a .pth weights file for a goggle classification model
-            input_directory: Directory containing test videos to run Evaluator on
-            annotation_path: Directory containing annotation files (output by annotator.py)
-            rate: Run detection and classification on every 1/rate frames
+        @param cuda: A bool value that specifies if cuda shall be used
+        @param detector: A string path to a .pth weights file for a face detection model
+        @param detector_type: One of 'blazeface', 'ssd', 'retinaface'.
+        @param classifier: A string path to a .pth weights file for a goggle classification model
+        @param input_directory: Directory containing test videos to run Evaluator on
+        @param rate: Run detection and classification on every 1/rate frames
+        @param det_file: CSV generated by annotator.py containing detection results
         """
 
         if cuda and torch.cuda.is_available():
@@ -52,16 +52,21 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a
             torch.set_default_tensor_type('torch.FloatTensor')
             self.device = torch.device('cpu')
 
-        if os.path.exists(DETECTIONS_FILE):
-            os.remove(DETECTIONS_FILE)
+        if os.path.exists(PRED_DETECTIONS_FILE):
+            os.remove(PRED_DETECTIONS_FILE)
+
+        self.detector = FaceDetector(detector=detector, detector_type=detector_type,
+                                     cuda=cuda and torch.cuda.is_available(), set_default_dev=True)
+
+        weights = torch.load(classifier, map_location=self.device)
+        if isinstance(weights, dict):
+            # if the .pth is just a state_dict, we need to
+            # load the model from goggle_classifier.py
+            model = get_model()
+            model.load_state_dict(weights)
+            weights = model
 
-        self.detector = FaceDetector(detector=detector, detector_type=detector_type, cuda=cuda and torch.cuda.is_available(),
-                                     set_default_dev=True)
-        # TODO check state_dict vs. not
-        model = get_model()
-        model.load_state_dict(torch.load(classifier, map_location=self.device))
-        self.classifier = Classifier(model, self.device)
-        #self.classifier = Classifier(torch.load(classifier, map_location=self.device), self.device)
+        self.classifier = Classifier(weights, cuda)
         self.video_filenames = self.get_video_files(input_directory)
         self.results = {'Goggles':
                             {'average_class_accuracy': 0.0,
@@ -85,16 +90,15 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, a
         self.video = ''
         self.video_len = 0
         self.rate = rate
-        self.evaluate(annotation_path)
+        self.det_file = det_file
+        self.evaluate()
 
-    def evaluate(self, annotation_path: str):
+    def evaluate(self):
         """
         Evaluates (classification and detection) every video file in the input directory
         containing test videos and stores results in self.results.
         To understand the format of self.results dict, check the constructor
 
-        Args:
-            annotation_path - Directory containing all the annotations of face detections
         """
         total_videos_processed = 0
         for video_file in self.video_filenames:
@@ -106,8 +110,7 @@ def evaluate(self, annotation_path: str):
             self.cap = cv2.VideoCapture(self.video)
 
             if self.cap.isOpened():
-                classification_result = self.evaluate_classifications()  # Also contains boxes
-                self.record_results(classification_result)
+                self.evaluate_classifications()  # Also contains boxes
                 total_videos_processed += 1
                 print(f"{self.video} : Done")
             else:
@@ -116,18 +119,15 @@ def evaluate(self, annotation_path: str):
 
         self.calculate_average_class_accuracy()
 
-        # ------- classification ^^^ detection vvv
-
         # TODO why is this returning something
-        # TODO make it an optional arg to evaluate face detection
-        #detection_results = self.evaluate_detections(annotation_path, DETECTIONS_FILE)
+        if self.det_file is not None:
+            detection_results = self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE)
 
         print(f"\n {total_videos_processed} videos processed!")
 
     def evaluate_classifications(self):
         """
-        Returns the accuracy (percentage_of_correct_predictions) of the
-        predictions for a video
+        Run classification on one video, save classification results
         """
         inferences = self.infer()
         if sum(inferences.values()) == 0:
@@ -135,31 +135,35 @@ def evaluate_classifications(self):
         else:
             percentage_of_correct_predictions = inferences[self.class_label] / sum(inferences.values())
 
-        return percentage_of_correct_predictions, inferences, sum(inferences.values())
+        self.record_results((percentage_of_correct_predictions, inferences, sum(inferences.values())))
 
-    def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file, overlap_threshold=0.5):
+    def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file):
         """
         Calculates the recall and precision of face detection for a video.
         TODO explain what that means... seems like overlap of x and y coords? I.e. IoU?
 
         @param ground_truth_detections_file: file containing actual face detections (created by annotator.py)
         @param predicted_detections_file: file containing predicted face detections
-        @param overlap_threshold: IoU greater than threshold counts as correct, less than is incorrect
         """
 
-        with open(ground_truth_detections_file) as detect_file:
-            ground_truth_detections = json.load(detect_file)
+        ground_truth_detections = []
+        predicted_detections = []
+        with open(ground_truth_detections_file, newline='') as detect_file:
+            reader = csv.reader(detect_file)
+            for row in reader:
+                ground_truth_detections.append(row)
 
-        with open(predicted_detections_file, 'r') as prediction_file:
-            predicted_detections = json.load(prediction_file)
+        with open(predicted_detections_file, newline='') as prediction_file:
+            reader = csv.reader(prediction_file)
+            for row in reader:
+                predicted_detections.append(row)
 
-        # TODO fix below based on detections format
-        total_ground_truths = 0
-        for frame_id in ground_truth_detections:
-            total_ground_truths += len(ground_truth_detections[frame_id])
+        total_ground_truths = len(ground_truth_detections)
+        true_pos = 0
+        false_pos = 0
 
-        # TODO ugly parsing and such here. Need to debug it. ==1 means...?
-        if any(predicted_detections) == 1:
+        for d in predicted_detections:
+            """            
             splitlines = [x.strip().split('|') for x in predicted_detections]
             image_ids = [x[0] for x in splitlines]
             confidence = np.array([float(x[5]) for x in splitlines])
@@ -170,12 +174,9 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections
             sorted_scores = np.sort(-confidence)
             bboxes = bboxes[sorted_ind, :]
             image_ids = [image_ids[x] for x in sorted_ind]
+            """
 
-            nd = len(image_ids)
-            true_pos = np.zeros(nd)
-            false_pos = np.zeros(nd)
-
-            # TODO for frame in frames?
+            """# TODO for frame in frames?
             for d in range(nd):
                 try:
                     bbox = bboxes[d, :].astype(float)
@@ -199,89 +200,58 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections
                         max_overlap = np.max(overlaps)
                         # jmax = np.argmax(overlaps)
 
-                    if max_overlap > overlap_threshold:
-                        true_pos[d] = 1.
+                    if max_overlap > 0.5:
+                        true_pos += 1.
                     else:
-                        false_pos[d] = 1.
-
-                except KeyError:
-                    continue
+                        false_pos += 1.
+            """
+
+            # only look at frames where a face was detected
+            if len(d) > 2:
+                ground_truth_bboxes = None
+                pred_bboxes = d[2:6]
+
+                # get matching frame detection from the ground_truth
+                for video_name, frame_num, _ in ground_truth_detections:
+                    if video_name == d[0] and frame_num == d[1]:
+                        # if the ground truth also detected a face in this frame
+                        if len(_) > 0:
+                            ground_truth_bboxes = _
+                        break
+
+                if ground_truth_bboxes is not None:
+                    # 0.5 IoU is commonly used to compare bounding boxes
+                    if matrix_iou(np.asarray(pred_bboxes), np.asarray(ground_truth_bboxes)) > 0.5:
+                        true_pos += 1
+                    else:
+                        false_pos += 1
+                else:
+                    # ground truth did not detect a face, but the prediction did
+                    false_pos += 1
 
             print("Total ground truths: ", total_ground_truths)
-            false_pos = np.cumsum(false_pos)
-            true_pos = np.cumsum(true_pos)
+
             recall = true_pos / float(total_ground_truths)
-            # avoid divide by zero in case the first detection matches a difficult
-            # ground truth
+            # avoid divide by zero in case the first detection matches a difficult ground truth
             precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps)
-        else:
-            recall = -1.
-            precision = -1.
-            ap = -1.
 
         print("Precision: ", precision)
         print("Recall: ", recall)
-
+        # TODO difference between ^ and v
         return precision[len(precision)], recall[len(recall)]  # final precision, recall
 
-    def calculate_average_class_accuracy(self):
-        """
-        Calculates the average class accuracy for each class and stores it in self.results
-        """
-        for class_label in self.results:
-            if self.results[class_label]['number_of_videos'] > 0:
-                self.results[class_label]['average_class_accuracy'] = self.results[class_label][
-                                                                          'average_class_accuracy'] / \
-                                                                      self.results[class_label]['number_of_videos']
-
-    def record_results(self, result):
-        """
-        Records results of one video in the self.results dict
-
-        @param result(List) - contains the classification accuracy,
-        number of predictions for each label, number of detections
-        """
-        self.results[self.class_label]['number_of_videos'] += 1
-        # below is just a running sum which gets divided by the number of videos at the end
-        self.results[self.class_label]['average_class_accuracy'] += result[0]
-        self.results[self.class_label]['individual_video_results'][self.video] = {}
-        self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0]
-        self.results[self.class_label]['individual_video_results'][self.video]["glasses"] = result[1]['Glasses']
-        self.results[self.class_label]['individual_video_results'][self.video]["goggles"] = result[1]['Goggles']
-        self.results[self.class_label]['individual_video_results'][self.video]["neither"] = result[1]['Neither']
-        self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2]
-        self.results[self.class_label]['individual_video_results'][self.video]["num_frames"] = self.video_len
-        self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition
-
-    def record_detections(self, file, detections):
-        """
-        Save face detections in a file for evaluation
-        TODO improve how this is stored
-        Args:
-            file (str): Records detections here
-            detections (List): contains all the bounding boxes and confidence values
-        """
-        f = open(file, "a+")
-        for detection in detections:
-            for element in detection:
-                f.write(str(element))
-                f.write("|")
-            f.write("\n")
-        f.close()
-
     def infer(self):
         """
         Performs inference on a video using the face detection
         and goggle classification models.
-        @param rate: How often to run detection (every 1/rate frames).
-        It returns:
-        1) inference_dict: the number of inferences for each class.
+        Also saves the face detections if they're going to be compared later
+
+        @return inference_dict: the number of inferences for each class
         """
-        bboxes = []
+        detections = []
         preds = []
         inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0}
 
-        # check if the video needs to be rotated
         rotate_code = check_rotation(self.video)
         self.video_len = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
@@ -290,9 +260,8 @@ def infer(self):
             if frame_num % self.rate == 0 and img is not None:
                 if rotate_code is not None:
                     correct_rotation(img, rotate_code)
-                frame_id = self.video.strip('.avi').strip('.mp4').strip('.MOV').strip('.mov').split('/')[-1] + "_" + str(
-                    frame_num)
                 boxes = self.detector.detect(img)  # Also contains confidence
+                detection = [self.video, frame_num]
                 for box in boxes:
                     x1 = max(0, box[0])
                     y1 = max(0, box[1])
@@ -302,15 +271,52 @@ def infer(self):
                     face = img[int(y1):int(y2), int(x1):int(x2), :]
                     label = self.classifier.classifyFace(face)
                     preds.append(label.item())
-                    bboxes.append([frame_id, x1, y1, x2, y2, conf])
+                    detection.append(x1, y1, x2, y2)
+                detections.append(detection)
 
         inference_dict["Glasses"] += preds.count(0)
         inference_dict["Goggles"] += preds.count(1)
         inference_dict["Neither"] += preds.count(2)
 
-        self.record_detections(DETECTIONS_FILE, bboxes)
+        # save the detections for comparison later
+        if self.det_file is not None:
+            with open(PRED_DETECTIONS_FILE, "a") as f:
+                writer = csv.writer(f)
+                writer.writerows(detections)
+
         return inference_dict
 
+    def calculate_average_class_accuracy(self):
+        """
+        Calculates the average class accuracy for each class and stores it in self.results
+        """
+        for class_label in self.results:
+            if self.results[class_label]['number_of_videos'] > 0:
+                self.results[class_label]['average_class_accuracy'] = self.results[class_label][
+                                                                          'average_class_accuracy'] / \
+                                                                      self.results[class_label]['number_of_videos']
+
+    def record_results(self, result):
+        """
+        Records results of one video in the self.results dict.
+        All of this information is necessary for getting detailed face detection results
+        and creating classifier confusion matrices.
+
+        @param result(List) - contains the classification accuracy,
+        number of predictions for each label, number of detections
+        """
+        self.results[self.class_label]['number_of_videos'] += 1
+        # average_class_accuracy is a running sum which gets divided by the number of videos at the end
+        self.results[self.class_label]['average_class_accuracy'] += result[0]
+        self.results[self.class_label]['individual_video_results'][self.video] = {}
+        self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0]
+        self.results[self.class_label]['individual_video_results'][self.video]["glasses"] = result[1]['Glasses']
+        self.results[self.class_label]['individual_video_results'][self.video]["goggles"] = result[1]['Goggles']
+        self.results[self.class_label]['individual_video_results'][self.video]["neither"] = result[1]['Neither']
+        self.results[self.class_label]['individual_video_results'][self.video]["num_detections"] = result[2]
+        self.results[self.class_label]['individual_video_results'][self.video]["num_frames"] = self.video_len
+        self.results[self.class_label]['individual_video_results'][self.video]["condition"] = self.condition
+
     def get_class_label(self):
         """
         Get class label [Goggles / Glasses / Neither] that the image belongs to
@@ -360,17 +366,17 @@ def get_evaluator_results(self):
                         help="Path to a trained classifier .pth file")
     parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
     parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files")
-    parser.add_argument('--annotation_path', type=str, required=True, help="Path to a directory containing annotation "
-                                                                           "files")
+    parser.add_argument('--detection_file', type=str, help="Path to the detections csv output by annotator.py."
+                                                           "If given, the detections will be compared.")
     parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.')
-    # TODO add store_true args for detection, evaluation (to do separately if desired)
 
     args = parser.parse_args()
 
     if not args.input_directory:
         raise Exception("Invalid input directory")
-    evaluator = Evaluator(args.cuda, args.detector, args.detector_type, args.classifier, args.input_directory,
-                          args.annotation_path)
+
+    evaluator = Evaluator(args.cuda and torch.cuda.is_available(), args.detector, args.detector_type, args.classifier, args.input_directory,
+                          args.rate, args.detection_file)
     individual_video_results = evaluator.get_evaluator_results()
 
     with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file:

From 0f5ded333858b25221e9fcfd350a629780dc5b5b Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Thu, 23 Jul 2020 16:34:55 -0400
Subject: [PATCH 15/25] Add IoU calculation

---
 scripts/evaluator.py | 88 ++++++++++----------------------------------
 scripts/utils.py     | 32 ++++++++++++++++
 2 files changed, 51 insertions(+), 69 deletions(-)

diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 3ae3f961..9f927575 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -7,27 +7,23 @@
 import cv2
 import numpy as np
 import torch
-import torchvision
 from tqdm import tqdm
 
 from scripts.goggle_classifier import get_model
-from scripts.utils import check_rotation, correct_rotation
 from src.jetson.main import FaceDetector, Classifier
-from src.jetson.models.utils.box_utils import matrix_iou
+from scripts.utils import check_rotation, correct_rotation, bbox_iou
 
-PRED_DETECTIONS_FILE = 'detection_predictions.txt'
+PRED_DETECTIONS_FILE = 'detection_predictions.csv'
 CLASSIFICATION_RESULTS_FILE = 'results.json'
 VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
 
 """
-Use this script with annotator.py. 
+Evaluate classification and (optionally) face detection ability on a set of videos.
 Videos to be evaluated should be from the TestVideos folder on the Drive.
+To compare face detection models, run annotator.py first.
 """
 
 
-# TODO make comments with @param things
-
-
 class Evaluator():
     def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate, det_file):
         """
@@ -98,7 +94,6 @@ def evaluate(self):
         Evaluates (classification and detection) every video file in the input directory
         containing test videos and stores results in self.results.
         To understand the format of self.results dict, check the constructor
-
         """
         total_videos_processed = 0
         for video_file in self.video_filenames:
@@ -119,9 +114,8 @@ def evaluate(self):
 
         self.calculate_average_class_accuracy()
 
-        # TODO why is this returning something
         if self.det_file is not None:
-            detection_results = self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE)
+            self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE)
 
         print(f"\n {total_videos_processed} videos processed!")
 
@@ -140,7 +134,7 @@ def evaluate_classifications(self):
     def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file):
         """
         Calculates the recall and precision of face detection for a video.
-        TODO explain what that means... seems like overlap of x and y coords? I.e. IoU?
+        Defined by 0.5 IoU or greater with ground truth bounding box.
 
         @param ground_truth_detections_file: file containing actual face detections (created by annotator.py)
         @param predicted_detections_file: file containing predicted face detections
@@ -158,70 +152,26 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections
             for row in reader:
                 predicted_detections.append(row)
 
-        total_ground_truths = len(ground_truth_detections)
         true_pos = 0
         false_pos = 0
 
         for d in predicted_detections:
-            """            
-            splitlines = [x.strip().split('|') for x in predicted_detections]
-            image_ids = [x[0] for x in splitlines]
-            confidence = np.array([float(x[5]) for x in splitlines])
-            bboxes = np.array([[float(z) for z in x[1:5]] for x in splitlines])
-
-            # sort by confidence
-            sorted_ind = np.argsort(-confidence)
-            sorted_scores = np.sort(-confidence)
-            bboxes = bboxes[sorted_ind, :]
-            image_ids = [image_ids[x] for x in sorted_ind]
-            """
-
-            """# TODO for frame in frames?
-            for d in range(nd):
-                try:
-                    bbox = bboxes[d, :].astype(float)
-                    max_overlap = -np.inf
-                    bbox_ground_truth_detections = np.asarray(ground_truth_detections[image_ids[d]], dtype=np.float32)
-                    if bbox_ground_truth_detections.size > 0:
-                        # TODO max and min variable names are backwards?
-                        ixmin = np.maximum(bbox_ground_truth_detections[:, 0], bbox[0])
-                        iymin = np.maximum(bbox_ground_truth_detections[:, 1], bbox[1])
-                        ixmax = np.minimum(bbox_ground_truth_detections[:, 2], bbox[2])
-                        iymax = np.minimum(bbox_ground_truth_detections[:, 3], bbox[3])
-                        iw = np.maximum(ixmax - ixmin, 0.)
-                        ih = np.maximum(iymax - iymin, 0.)
-                        # TODO debug. inters = intersection? uni = union? Overlaps is actual value?
-                        # TODO import IoU from box_utils should work
-                        inters = iw * ih
-                        uni = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) +
-                               (bbox_ground_truth_detections[:, 2] - bbox_ground_truth_detections[:, 0]) *
-                               (bbox_ground_truth_detections[:, 3] - bbox_ground_truth_detections[:, 1]) - inters)
-                        overlaps = inters / uni
-                        max_overlap = np.max(overlaps)
-                        # jmax = np.argmax(overlaps)
-
-                    if max_overlap > 0.5:
-                        true_pos += 1.
-                    else:
-                        false_pos += 1.
-            """
-
             # only look at frames where a face was detected
             if len(d) > 2:
                 ground_truth_bboxes = None
                 pred_bboxes = d[2:6]
 
                 # get matching frame detection from the ground_truth
-                for video_name, frame_num, _ in ground_truth_detections:
-                    if video_name == d[0] and frame_num == d[1]:
-                        # if the ground truth also detected a face in this frame
-                        if len(_) > 0:
-                            ground_truth_bboxes = _
+                for detection in ground_truth_detections:
+                    if detection[0] == d[0] and detection[1] == d[1]:
+                        if len(detection) > 2:
+                            # if the ground truth also detected a face in this frame
+                            ground_truth_bboxes = detection[2:6]
                         break
 
                 if ground_truth_bboxes is not None:
                     # 0.5 IoU is commonly used to compare bounding boxes
-                    if matrix_iou(np.asarray(pred_bboxes), np.asarray(ground_truth_bboxes)) > 0.5:
+                    if bbox_iou(pred_bboxes, ground_truth_bboxes) > 0.5:
                         true_pos += 1
                     else:
                         false_pos += 1
@@ -229,16 +179,16 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections
                     # ground truth did not detect a face, but the prediction did
                     false_pos += 1
 
-            print("Total ground truths: ", total_ground_truths)
+        total_ground_truths = len(ground_truth_detections)
+        print("Total ground truths: ", total_ground_truths)
 
-            recall = true_pos / float(total_ground_truths)
-            # avoid divide by zero in case the first detection matches a difficult ground truth
-            precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps)
+        recall = true_pos / float(total_ground_truths)
+        # avoid divide by zero in case the first detection matches a difficult ground truth
+        precision = true_pos / np.maximum(true_pos + false_pos, np.finfo(np.float64).eps)
 
         print("Precision: ", precision)
         print("Recall: ", recall)
-        # TODO difference between ^ and v
-        return precision[len(precision)], recall[len(recall)]  # final precision, recall
+        return precision, recall
 
     def infer(self):
         """
@@ -271,7 +221,7 @@ def infer(self):
                     face = img[int(y1):int(y2), int(x1):int(x2), :]
                     label = self.classifier.classifyFace(face)
                     preds.append(label.item())
-                    detection.append(x1, y1, x2, y2)
+                    detection.extend([x1, y1, x2, y2, conf])
                 detections.append(detection)
 
         inference_dict["Glasses"] += preds.count(0)
diff --git a/scripts/utils.py b/scripts/utils.py
index bb1ddb16..708a88a9 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -37,3 +37,35 @@ def check_rotation(path_video_file: str):
 
 def correct_rotation(frame, rotate_code):
     return cv2.rotate(frame, rotate_code)
+
+
+def bbox_iou(boxA, boxB):
+    """
+    Calculate IoU (Intersection over Union) of two bounding boxes.
+    @param boxA: the top left and bottom right coords of the box
+    as a list [xmin, ymin, xmax, ymax]
+    @param boxB: the other box, same format as boxA.
+    It doesn't matter which one is the ground truth bounding box.
+    """
+
+    for i in range(len(boxA)):
+        boxA[i] = float(boxA[i])
+        boxB[i] = float(boxB[i])
+
+    # determine the (x, y)-coordinates of the intersection rectangle
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+
+    xA += 5
+
+    # compute the area of intersection rectangle
+    inter_area = max(0, xB - xA + 1) * max(0, yB - yA + 1)
+    # compute the area of both the prediction and ground-truth
+    # rectangles
+    boxA_area = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
+    boxB_area = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
+
+    iou = inter_area / float(boxA_area + boxB_area - inter_area)
+    return iou
\ No newline at end of file

From 1f0f2a552ffe3202e041e80b5ed830e8cf0681f8 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Thu, 23 Jul 2020 17:00:35 -0400
Subject: [PATCH 16/25] Update imports

---
 scripts/annotator.py      | 2 +-
 scripts/evaluator.py      | 3 ++-
 scripts/face_extractor.py | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/annotator.py b/scripts/annotator.py
index bf51b9f1..67cb4afe 100644
--- a/scripts/annotator.py
+++ b/scripts/annotator.py
@@ -8,7 +8,7 @@
 import torch
 from tqdm import tqdm
 
-from src.jetson.main import FaceDetector
+from src.jetson.face_detector import FaceDetector
 from scripts.utils import check_rotation, correct_rotation
 
 """
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 9f927575..4a896cbb 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -10,7 +10,8 @@
 from tqdm import tqdm
 
 from scripts.goggle_classifier import get_model
-from src.jetson.main import FaceDetector, Classifier
+from src.jetson.face_detector import FaceDetector
+from src.jetson.classifier import Classifier
 from scripts.utils import check_rotation, correct_rotation, bbox_iou
 
 PRED_DETECTIONS_FILE = 'detection_predictions.csv'
diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py
index 644a483b..95d50615 100644
--- a/scripts/face_extractor.py
+++ b/scripts/face_extractor.py
@@ -10,7 +10,7 @@
 from tqdm import tqdm
 
 from scripts.utils import check_rotation, correct_rotation
-from src.jetson.main import FaceDetector
+from src.jetson.face_detector import FaceDetector
 
 """
 Given a folder of images or videos, run a face detector (literally a FaceDetector) on all images 

From 2170dfb9abe82fd6ef47fdbe921c3cd5e37255c1 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Thu, 23 Jul 2020 19:21:58 -0400
Subject: [PATCH 17/25] Update video_capturer device

---
 src/jetson/main.py           | 3 ---
 src/jetson/video_capturer.py | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/jetson/main.py b/src/jetson/main.py
index e76be8f7..9101c6b6 100644
--- a/src/jetson/main.py
+++ b/src/jetson/main.py
@@ -1,4 +1,3 @@
-import argparse
 import os
 import time
 import datetime
@@ -8,8 +7,6 @@
 
 import cv2
 import torch
-from torch.autograd import Variable
-from torchvision import transforms
 
 from src.jetson.face_detector import FaceDetector 
 from src.jetson.video_capturer import VideoCapturer
diff --git a/src/jetson/video_capturer.py b/src/jetson/video_capturer.py
index 04319250..9a94b267 100644
--- a/src/jetson/video_capturer.py
+++ b/src/jetson/video_capturer.py
@@ -32,7 +32,7 @@ def gstreamer_pipeline(
 
 
 class VideoCapturer(object):
-    def __init__(self, gstreamer, dev=1):
+    def __init__(self, gstreamer, dev=0):
         """
         This class captures videos using open-cv's VideoCapture object
         Args:

From 1205013006cd948918e65de04298e6af2c45f8d7 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Tue, 28 Jul 2020 10:18:38 -0400
Subject: [PATCH 18/25] Fix .mov mistake

---
 scripts/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/utils.py b/scripts/utils.py
index 708a88a9..657666e3 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -16,7 +16,7 @@
 
 def check_rotation(path_video_file: str):
     # only .mov files need to be rotated
-    if path_video_file.split('.')[-1] != '.MOV' or '.mov':
+    if path_video_file.split('.')[-1].upper() != '.MOV':
         return None
 
     # this returns meta-data of the video file in form of a dictionary

From f12531253c7c8030c50513147c185e320b5aba9d Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Tue, 28 Jul 2020 12:39:42 -0400
Subject: [PATCH 19/25] Updates

---
 scripts/collect_images.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index 04ba2467..51e0cb9d 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -2,14 +2,14 @@
 import datetime
 import json
 
-from src.jetson.db.db_connection import sql_cursor
-
-# from wherever import email method
+from src.db.db_connection import sql_cursor
+from scripts.automatic_notification import send_email
 
 """
 Put this file one folder up from the stored images.
-Eg. on the HELPS machine: if /local/b/embedvis/imgs contains images, 
+Eg. on ee220clnx1: if /local/b/embedvis/imgs contains images, 
 this file's path should be /local/b/embedvis/collect_images.py
+Set up a cron job to run this script daily.
 
 Collect images of non-goggle detections from the database.
 Upload images and metadata to Google Drive.
@@ -31,6 +31,7 @@ def get_metadata():
     """
 
     metadata = []
+    current_date = (datetime.date.today(),)
 
     # make sql connection
     # execute query
@@ -38,13 +39,13 @@ def get_metadata():
         try:
             cursor.execute('USE goggles')
             cursor.execute('SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, '
-                           'i.image_name, i.init_vector from bbox AS b, image as i where '
-                           'b.image_name=i.image_name and b.goggles=False')
+                           'b.init_vector, i.image_name, i.image_date from BBOX AS b, IMAGE as i where '
+                           'b.image_name=i.image_name and i.image_date=? and b.goggles=False', current_date)
 
-            for (image_name, x_min, y_min, x_max, y_max, image_name, init_vector) in cursor:
+            for (image_name, x_min, y_min, x_max, y_max, init_vector, image_name, image_date) in cursor:
                 metadata.append({'image_name': image_name,
                                  'x_min': float(x_min),  # JSON cannot serialize Decimals.
-                                 'y_min': float(y_min),  # If there is a better way to do this, someone let me know.
+                                 'y_min': float(y_min),  # If there is a better way to do this, let me know.
                                  'x_max': float(x_max),
                                  'y_max': float(y_max),
                                  'init_vector': init_vector
@@ -75,15 +76,13 @@ def upload_files(metadata, dir):
     # subprocess rclone copy METADATA_FILE [Drive name]:
 
 
-# TODO call Seoyoung's method to email
+# TODO call Seoyoung's method to email???
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser('Collect images.')
     parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload')
     args = parser.parse_args()
 
-    current_date = datetime.datetime.now().strftime("%m-%d-%Y")
-
     # call the methods
     metadata = get_metadata()
     upload_files(metadata, args.directory)

From 865dba9290d575e69bb5cfdafad34252798ee8c2 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Mon, 3 Aug 2020 17:02:12 -0400
Subject: [PATCH 20/25] Split evaluation script and Evaluator class

---
 scripts/annotator.py  |  3 +-
 scripts/evaluation.py | 47 +++++++++++++++++++++++
 scripts/evaluator.py  | 86 ++++++++++++++++---------------------------
 3 files changed, 80 insertions(+), 56 deletions(-)
 create mode 100644 scripts/evaluation.py

diff --git a/scripts/annotator.py b/scripts/annotator.py
index 67cb4afe..dbbf9382 100644
--- a/scripts/annotator.py
+++ b/scripts/annotator.py
@@ -13,7 +13,7 @@
 
 """
 Run the face detector model on a folder of videos (most recently used on TestVideos from the Drive).
-Save bbox detections to a csv file to be compared in evaluator.py.
+Save bbox detections to a csv file to be compared in evaluation.py
 An earlier version of this script was used to compare Retinaface with
 a Mobilenet backbone versus a Resnet backbone; comparison of object
 detectors would be its most applicable use.
@@ -57,7 +57,6 @@ def get_videos(input_directory):
 
     torch.set_grad_enabled(False)
 
-    # load the face detector
     detector = FaceDetector(detector=args.detector, detector_type=args.detector_type,
                             cuda=args.cuda and torch.cuda.is_available(), set_default_dev=True)
 
diff --git a/scripts/evaluation.py b/scripts/evaluation.py
new file mode 100644
index 00000000..da7d3e6b
--- /dev/null
+++ b/scripts/evaluation.py
@@ -0,0 +1,47 @@
+import argparse
+import json
+import warnings
+
+import torch
+
+from scripts.evaluator import Evaluator
+
+PRED_DETECTIONS_FILE = 'detection_predictions.csv'
+CLASSIFICATION_RESULTS_FILE = 'results.json'
+
+"""
+Evaluate classification and (optionally) compare face detection models on a set of videos.
+Videos to be evaluated should be from the TestVideos folder on the Drive 
+to get correct labels and conditions.
+To compare face detection models, run annotator.py first.
+"""
+
+if __name__ == "__main__":
+    warnings.filterwarnings("once")
+    parser = argparse.ArgumentParser(description="Face detection")
+    parser.add_argument('--detector', '-d', type=str, default='model_weights/blazeface.pth',
+                        help="Path to a trained face detector .pth file")
+    parser.add_argument('--detector_type', '-t', type=str, help="One of blazeface, retinaface, ssd")
+    parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str,
+                        help="Path to a trained classifier .pth file")
+    parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
+    parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files")
+    parser.add_argument('--detection_file', type=str, help="Path to the detections csv output by annotator.py."
+                                                           "If given, the detections will be compared.")
+    parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.')
+
+    args = parser.parse_args()
+
+    if not args.input_directory:
+        raise Exception("Invalid input directory")
+
+    evaluator = Evaluator(args.cuda and torch.cuda.is_available(), args.detector, args.detector_type, args.classifier,
+                          args.input_directory, args.rate, args.detection_file, PRED_DETECTIONS_FILE)
+    individual_video_results = evaluator.get_evaluator_results()
+
+    with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file:
+        json.dump(individual_video_results, json_file, indent=4)
+
+    print(f"\n Output saved at {CLASSIFICATION_RESULTS_FILE}")
+
+    exit()
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 4a896cbb..8c8aba74 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -14,23 +14,17 @@
 from src.jetson.classifier import Classifier
 from scripts.utils import check_rotation, correct_rotation, bbox_iou
 
-PRED_DETECTIONS_FILE = 'detection_predictions.csv'
-CLASSIFICATION_RESULTS_FILE = 'results.json'
-VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
 
-"""
-Evaluate classification and (optionally) face detection ability on a set of videos.
-Videos to be evaluated should be from the TestVideos folder on the Drive.
-To compare face detection models, run annotator.py first.
-"""
+VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
 
 
 class Evaluator():
-    def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate, det_file):
+    def __init__(self, cuda, detector, detector_type, classifier, input_directory, rate,
+                 comparison_dets_file, self_dets_file):
         """
         Evaluates face detection and goggle classification performance.
-        Goggle Classification accuracy is given by average class accuracy and individual
-        video accuracy.
+        Goggle Classification accuracy is given by average class accuracy and 
+        accuracy for each individual video.
         Face detection accuracy is given by precision and recall values.
 
         @param cuda: A bool value that specifies if cuda shall be used
@@ -39,7 +33,9 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r
         @param classifier: A string path to a .pth weights file for a goggle classification model
         @param input_directory: Directory containing test videos to run Evaluator on
         @param rate: Run detection and classification on every 1/rate frames
-        @param det_file: CSV generated by annotator.py containing detection results
+        @param comparison_dets_file: CSV generated by annotator.py containing detection results
+         of another detection model (to be compared)
+        @param self_dets_file: CSV generated by this class containing detections by self.detector
         """
 
         if cuda and torch.cuda.is_available():
@@ -49,9 +45,6 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r
             torch.set_default_tensor_type('torch.FloatTensor')
             self.device = torch.device('cpu')
 
-        if os.path.exists(PRED_DETECTIONS_FILE):
-            os.remove(PRED_DETECTIONS_FILE)
-
         self.detector = FaceDetector(detector=detector, detector_type=detector_type,
                                      cuda=cuda and torch.cuda.is_available(), set_default_dev=True)
 
@@ -87,15 +80,20 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r
         self.video = ''
         self.video_len = 0
         self.rate = rate
-        self.det_file = det_file
+        self.comparison_dets_file = comparison_dets_file
         self.evaluate()
+        self.self_dets_file = self_dets_file
+
+        if os.path.exists(self.self_dets_file):
+            os.remove(self.self_dets_file)
 
     def evaluate(self):
         """
         Evaluates (classification and detection) every video file in the input directory
         containing test videos and stores results in self.results.
-        To understand the format of self.results dict, check the constructor
+        To understand the format of the self.results dict, check the constructor
         """
+
         total_videos_processed = 0
         for video_file in self.video_filenames:
             self.video = video_file
@@ -115,8 +113,8 @@ def evaluate(self):
 
         self.calculate_average_class_accuracy()
 
-        if self.det_file is not None:
-            self.evaluate_detections(self.det_file, PRED_DETECTIONS_FILE)
+        if self.comparison_dets_file is not None:
+            self.evaluate_detections(self.comparison_dets_file, self.self_dets_file)
 
         print(f"\n {total_videos_processed} videos processed!")
 
@@ -124,6 +122,7 @@ def evaluate_classifications(self):
         """
         Run classification on one video, save classification results
         """
+
         inferences = self.infer()
         if sum(inferences.values()) == 0:
             percentage_of_correct_predictions = 0
@@ -135,10 +134,10 @@ def evaluate_classifications(self):
     def evaluate_detections(self, ground_truth_detections_file, predicted_detections_file):
         """
         Calculates the recall and precision of face detection for a video.
-        Defined by 0.5 IoU or greater with ground truth bounding box.
+        A "correct" detection is defined by 0.5 IoU or greater with the bounding box of the comparison detections.
 
-        @param ground_truth_detections_file: file containing actual face detections (created by annotator.py)
-        @param predicted_detections_file: file containing predicted face detections
+        @param ground_truth_detections_file: file containing detections to be compared (created by annotator.py)
+        @param predicted_detections_file: file containing detections by self.detector
         """
 
         ground_truth_detections = []
@@ -199,6 +198,7 @@ def infer(self):
 
         @return inference_dict: the number of inferences for each class
         """
+
         detections = []
         preds = []
         inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0}
@@ -230,8 +230,8 @@ def infer(self):
         inference_dict["Neither"] += preds.count(2)
 
         # save the detections for comparison later
-        if self.det_file is not None:
-            with open(PRED_DETECTIONS_FILE, "a") as f:
+        if self.comparison_dets_file is not None:
+            with open(self.self_dets_file, "a") as f:
                 writer = csv.writer(f)
                 writer.writerows(detections)
 
@@ -241,6 +241,7 @@ def calculate_average_class_accuracy(self):
         """
         Calculates the average class accuracy for each class and stores it in self.results
         """
+
         for class_label in self.results:
             if self.results[class_label]['number_of_videos'] > 0:
                 self.results[class_label]['average_class_accuracy'] = self.results[class_label][
@@ -254,10 +255,12 @@ def record_results(self, result):
         and creating classifier confusion matrices.
 
         @param result(List) - contains the classification accuracy,
-        number of predictions for each label, number of detections
+        number of predictions for each label, number of detections (see evaluate_classifications)
         """
+
         self.results[self.class_label]['number_of_videos'] += 1
         # average_class_accuracy is a running sum which gets divided by the number of videos at the end
+        # see calculate_average_class_accuracy
         self.results[self.class_label]['average_class_accuracy'] += result[0]
         self.results[self.class_label]['individual_video_results'][self.video] = {}
         self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0]
@@ -272,6 +275,7 @@ def get_class_label(self):
         """
         Get class label [Goggles / Glasses / Neither] that the image belongs to
         """
+
         if '/Goggles/' in self.video or '/goggles/' in self.video:
             class_label = 'Goggles'
         elif '/Glasses/' in self.video or '/glasses/' in self.video:
@@ -285,12 +289,14 @@ def get_condition(self):
         """
         Get condition [Ideal, low_lighting etc. ] that the image belongs to
         """
+
         return self.video.split('/')[-2]
 
     def get_video_files(self, input_directory: str):
         """
         Gets all the video files in the input directory
         """
+
         filenames = []
         for dirName, subdirList, fileList in os.walk(input_directory):
             for filename in fileList:
@@ -304,35 +310,7 @@ def get_evaluator_results(self):
         """
         Returns the dict containing all the test results (self.results)
         """
-        return self.results
-
-
-if __name__ == "__main__":
-    warnings.filterwarnings("once")
-    parser = argparse.ArgumentParser(description="Face detection")
-    parser.add_argument('--detector', '-d', type=str, default='model_weights/blazeface.pth',
-                        help="Path to a trained face detector .pth file")
-    parser.add_argument('--detector_type', '-t', type=str, help="One of blazeface, retinaface, ssd")
-    parser.add_argument('--classifier', default='model_weights/ensemble_100epochs.pth', type=str,
-                        help="Path to a trained classifier .pth file")
-    parser.add_argument('--cuda', '-c', default=False, action='store_true', help="Enable CUDA")
-    parser.add_argument('--input_directory', type=str, required=True, help="Path to a directory containing video files")
-    parser.add_argument('--detection_file', type=str, help="Path to the detections csv output by annotator.py."
-                                                           "If given, the detections will be compared.")
-    parser.add_argument('--rate', '-r', type=int, default=1, help='Run detection on every 1/rate frames.')
 
-    args = parser.parse_args()
-
-    if not args.input_directory:
-        raise Exception("Invalid input directory")
-
-    evaluator = Evaluator(args.cuda and torch.cuda.is_available(), args.detector, args.detector_type, args.classifier, args.input_directory,
-                          args.rate, args.detection_file)
-    individual_video_results = evaluator.get_evaluator_results()
-
-    with open(CLASSIFICATION_RESULTS_FILE, 'w+') as json_file:
-        json.dump(individual_video_results, json_file, indent=4)
+        return self.results
 
-    print(f"\n Output saved at {CLASSIFICATION_RESULTS_FILE}")
 
-    exit()

From f400f7685949f694d8f3137488d81bb4a568ebf0 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Mon, 3 Aug 2020 17:17:55 -0400
Subject: [PATCH 21/25] Add constants file

---
 scripts/constants.py      |  5 +++++
 scripts/evaluator.py      | 26 ++++----------------------
 scripts/face_extractor.py |  4 ++--
 3 files changed, 11 insertions(+), 24 deletions(-)
 create mode 100644 scripts/constants.py

diff --git a/scripts/constants.py b/scripts/constants.py
new file mode 100644
index 00000000..9987a69d
--- /dev/null
+++ b/scripts/constants.py
@@ -0,0 +1,5 @@
+# constants that may be used in multiple files
+
+# support image and video extensions
+IMAGE_EXT = ['.jpg', '.JPG', '.png', '.PNG']
+VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV', '.avi', '.AVI']
\ No newline at end of file
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
index 8c8aba74..de558191 100644
--- a/scripts/evaluator.py
+++ b/scripts/evaluator.py
@@ -1,8 +1,5 @@
-import argparse
 import csv
-import json
 import os
-import warnings
 
 import cv2
 import numpy as np
@@ -10,12 +7,10 @@
 from tqdm import tqdm
 
 from scripts.goggle_classifier import get_model
+from scripts.constants import VIDEO_EXT
+from scripts.utils import check_rotation, correct_rotation, bbox_iou
 from src.jetson.face_detector import FaceDetector
 from src.jetson.classifier import Classifier
-from scripts.utils import check_rotation, correct_rotation, bbox_iou
-
-
-VIDEO_EXT = ['.mov', '.mp4', '.avi', '.MOV', '.MP4', '.AVI']
 
 
 class Evaluator():
@@ -37,7 +32,6 @@ def __init__(self, cuda, detector, detector_type, classifier, input_directory, r
          of another detection model (to be compared)
         @param self_dets_file: CSV generated by this class containing detections by self.detector
         """
-
         if cuda and torch.cuda.is_available():
             torch.set_default_tensor_type('torch.cuda.FloatTensor')
             self.device = torch.device('cuda:0')
@@ -93,7 +87,6 @@ def evaluate(self):
         containing test videos and stores results in self.results.
         To understand the format of the self.results dict, check the constructor
         """
-
         total_videos_processed = 0
         for video_file in self.video_filenames:
             self.video = video_file
@@ -122,7 +115,6 @@ def evaluate_classifications(self):
         """
         Run classification on one video, save classification results
         """
-
         inferences = self.infer()
         if sum(inferences.values()) == 0:
             percentage_of_correct_predictions = 0
@@ -139,7 +131,6 @@ def evaluate_detections(self, ground_truth_detections_file, predicted_detections
         @param ground_truth_detections_file: file containing detections to be compared (created by annotator.py)
         @param predicted_detections_file: file containing detections by self.detector
         """
-
         ground_truth_detections = []
         predicted_detections = []
         with open(ground_truth_detections_file, newline='') as detect_file:
@@ -198,7 +189,6 @@ def infer(self):
 
         @return inference_dict: the number of inferences for each class
         """
-
         detections = []
         preds = []
         inference_dict = {"Goggles": 0, "Glasses": 0, "Neither": 0}
@@ -241,7 +231,6 @@ def calculate_average_class_accuracy(self):
         """
         Calculates the average class accuracy for each class and stores it in self.results
         """
-
         for class_label in self.results:
             if self.results[class_label]['number_of_videos'] > 0:
                 self.results[class_label]['average_class_accuracy'] = self.results[class_label][
@@ -257,10 +246,9 @@ def record_results(self, result):
         @param result(List) - contains the classification accuracy,
         number of predictions for each label, number of detections (see evaluate_classifications)
         """
-
         self.results[self.class_label]['number_of_videos'] += 1
-        # average_class_accuracy is a running sum which gets divided by the number of videos at the end
-        # see calculate_average_class_accuracy
+        # average_class_accuracy is a running sum which gets divided by the number of videos after evaluating all videos
+        # (see calculate_average_class_accuracy)
         self.results[self.class_label]['average_class_accuracy'] += result[0]
         self.results[self.class_label]['individual_video_results'][self.video] = {}
         self.results[self.class_label]['individual_video_results'][self.video]["accuracy"] = result[0]
@@ -275,7 +263,6 @@ def get_class_label(self):
         """
         Get class label [Goggles / Glasses / Neither] that the image belongs to
         """
-
         if '/Goggles/' in self.video or '/goggles/' in self.video:
             class_label = 'Goggles'
         elif '/Glasses/' in self.video or '/glasses/' in self.video:
@@ -289,14 +276,12 @@ def get_condition(self):
         """
         Get condition [Ideal, low_lighting etc. ] that the image belongs to
         """
-
         return self.video.split('/')[-2]
 
     def get_video_files(self, input_directory: str):
         """
         Gets all the video files in the input directory
         """
-
         filenames = []
         for dirName, subdirList, fileList in os.walk(input_directory):
             for filename in fileList:
@@ -310,7 +295,4 @@ def get_evaluator_results(self):
         """
         Returns the dict containing all the test results (self.results)
         """
-
         return self.results
-
-
diff --git a/scripts/face_extractor.py b/scripts/face_extractor.py
index 95d50615..1477dcbd 100644
--- a/scripts/face_extractor.py
+++ b/scripts/face_extractor.py
@@ -9,6 +9,7 @@
 import numpy as np
 from tqdm import tqdm
 
+from scripts.constants import IMAGE_EXT, VIDEO_EXT
 from scripts.utils import check_rotation, correct_rotation
 from src.jetson.face_detector import FaceDetector
 
@@ -19,8 +20,7 @@
 """
 
 warnings.filterwarnings('once')
-IMAGE_EXT = ['.jpg', '.JPG', '.png', '.PNG']
-VIDEO_EXT = ['.mp4', '.MP4', 'mov', '.MOV', '.avi', '.AVI']
+
 
 
 def get_images(input_dir):

From eca9ab468c22404ae943877e1301a731495167ea Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Wed, 5 Aug 2020 20:24:29 -0400
Subject: [PATCH 22/25] Update ftb transfer location. Make rclone to Drive work
 on remote server (HELPS machine).

---
 scripts/collect_images.py | 58 ++++++++++++++++++++++++++-------------
 scripts/prepare_images.py |  3 ++
 src/db/data_insertion.py  |  7 +++--
 src/db/db_connection.py   |  4 ---
 4 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index 51e0cb9d..2f40b940 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -1,25 +1,26 @@
 import argparse
 import datetime
 import json
+import os
+import subprocess
 
 from src.db.db_connection import sql_cursor
-from scripts.automatic_notification import send_email
 
 """
 Put this file one folder up from the stored images.
-Eg. on ee220clnx1: if /local/b/embedvis/imgs contains images, 
+Eg. on ee220clnx1: if /local/b/embedvis/Nano_Images contains images, 
 this file's path should be /local/b/embedvis/collect_images.py
 Set up a cron job to run this script daily.
+rclone should be set up, in our case pointing to a Google Drive folder: https://rclone.org/drive/
 
 Collect images of non-goggle detections from the database.
 Upload images and metadata to Google Drive.
-Email end-user with the Drive link.
 """
 
-METADATA_FILE = 'metadata.json'
+METADATA_FILE = os.path.join(os.path.dirname(__file__), 'metadata.json')
+TODAY = datetime.datetime.today().strftime('%Y-%m-%d')
 
 
-# TODO rename method
 def get_metadata():
     """
     Get image filenames and other relevant metadata from the database.
@@ -31,7 +32,12 @@ def get_metadata():
     """
 
     metadata = []
-    current_date = (datetime.date.today(),)
+    #current_date = (datetime.date.today(),)
+
+    # for testing
+    date = datetime.datetime(2020, 7, 23)
+    current_date = (date,)
+    # for testing
 
     # make sql connection
     # execute query
@@ -39,10 +45,10 @@ def get_metadata():
         try:
             cursor.execute('USE goggles')
             cursor.execute('SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max, '
-                           'b.init_vector, i.image_name, i.image_date from BBOX AS b, IMAGE as i where '
-                           'b.image_name=i.image_name and i.image_date=? and b.goggles=False', current_date)
+                           'b.init_vector, b.goggles from BBOX AS b, IMAGE as i where '
+                           'b.image_name=i.image_name and i.image_date=%s and b.goggles=False', current_date)
 
-            for (image_name, x_min, y_min, x_max, y_max, init_vector, image_name, image_date) in cursor:
+            for (image_name, x_min, y_min, x_max, y_max, init_vector, goggles) in cursor:
                 metadata.append({'image_name': image_name,
                                  'x_min': float(x_min),  # JSON cannot serialize Decimals.
                                  'y_min': float(y_min),  # If there is a better way to do this, let me know.
@@ -58,31 +64,45 @@ def get_metadata():
     return metadata
 
 
-# TODO make folder with date to contain images and metadata file
-def upload_files(metadata, dir):
+def upload_files(metadata, dir, rclone_path, remote_name):
     """
     For each filename returned by get_metadata, upload image
     to Drive. Upload the day's metadata file.
     @param metadata: the list of dictionaries returned by get_metadata
     @param dir: the folder containing the images to upload
+    @param rclone_path: path to rclone installation. Must be an absolute path if on the HELPS machine.
+    @param remote_name: name of remote location in rclone
     """
 
-    for image in metadata:
-        # upload image using rclone
-        # subprocess rclone copy os.path.join(dir, image['image_name']) [Drive name]
-        pass
+    # prevent sending the same image twice (if two faces are detected)
+    images = []
 
-    # upload metadata json file to Drive
-    # subprocess rclone copy METADATA_FILE [Drive name]:
+    # send images to the Drive
+    for image in metadata:
+        if image not in images:
+            images.append(image)
+            image_path = os.path.join(os.path.dirname(__file__), dir, image['image_name'])
+            subprocess.run([rclone_path, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)])
 
+    # upload metadata json to the Drive
+    subprocess.run([rclone_path, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)])
 
-# TODO call Seoyoung's method to email???
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser('Collect images.')
     parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload')
+    parser.add_argument('--rclone_path', '-r', type=str, default='rclone', help='Path to rclone installation. If not '
+                                                                                'on the HELPS machine, the default '
+                                                                                'should work (if you have rclone '
+                                                                                'installed).')
+    parser.add_argument('--remote_name', type=str, default='EmbedVisDrive', help='Name of remote location according '
+                                                                                  'to rclone (default is the Drive '
+                                                                                  'name on the HELPS machine). Don\'t '
+                                                                                  'include the semicolon.')
     args = parser.parse_args()
 
     # call the methods
     metadata = get_metadata()
-    upload_files(metadata, args.directory)
+    upload_files(metadata, args.directory, args.rclone_path, args.remote_name)
+
+    exit(0)
diff --git a/scripts/prepare_images.py b/scripts/prepare_images.py
index 4d78b22e..eaa46bca 100644
--- a/scripts/prepare_images.py
+++ b/scripts/prepare_images.py
@@ -5,8 +5,11 @@
 """
 After having run collect_images, decrypt the associated images
 (if necessary) and combine images together into a short video (using metadata).
+
+This file is assumed to be on the end user's machine.
 """
 
+# the metadata file generated by collect_images
 METADATA_FILE = 'metadata.json'
 
 
diff --git a/src/db/data_insertion.py b/src/db/data_insertion.py
index f6b3b6d9..4f7ab1c4 100644
--- a/src/db/data_insertion.py
+++ b/src/db/data_insertion.py
@@ -3,6 +3,9 @@
 from decimal import Decimal
 import datetime
 
+# location where the images will be stored (on the HELPS machine)
+IMAGE_DIR = '/local/b/embedvis/Nano_Images'
+
 
 def data_insert(image_name: str, image_date: datetime, image_time: datetime, init_vecs: list, bboxes: list, input_dir: str, labels: list):
     """Transfer image to remote storage then inserts image metadata and bounding boxes data in database
@@ -18,8 +21,8 @@ def data_insert(image_name: str, image_date: datetime, image_time: datetime, ini
     """
 
     # Below ftp transfer has been commented out for testing purposes
-    #with ftp_transfer() as transfer:
-        #transfer(input_dir, './Documents', image_name)
+    with ftp_transfer() as transfer:
+        transfer(input_dir, IMAGE_DIR, image_name)
 
     sql_insert(IMAGE(image_name, image_date, image_time))
 
diff --git a/src/db/db_connection.py b/src/db/db_connection.py
index d7241ff2..d719104c 100644
--- a/src/db/db_connection.py
+++ b/src/db/db_connection.py
@@ -1,13 +1,9 @@
-import datetime
-
 import mysql.connector
-import datetime
 
 from src.db.config import get_config
 from contextlib import contextmanager, closing
 import datetime
 
-from src.jetson.db.config import get_config
 
 class Table:
     def __init__(self):

From 310b7472be4af2c993287b998d9019f78b6937f0 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Wed, 12 Aug 2020 18:50:14 -0400
Subject: [PATCH 23/25] Made rclone path to be fixed. Renamed to decrypt_images
 (that's all it does now).

---
 scripts/collect_images.py | 49 ++++++++++++++++--------------------
 scripts/decrypt_images.py | 42 +++++++++++++++++++++++++++++++
 scripts/prepare_images.py | 53 ---------------------------------------
 3 files changed, 64 insertions(+), 80 deletions(-)
 create mode 100644 scripts/decrypt_images.py
 delete mode 100644 scripts/prepare_images.py

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index 2f40b940..6078a620 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -7,35 +7,35 @@
 from src.db.db_connection import sql_cursor
 
 """
-Put this file one folder up from the stored images.
-Eg. on ee220clnx1: if /local/b/embedvis/Nano_Images contains images, 
-this file's path should be /local/b/embedvis/collect_images.py
-Set up a cron job to run this script daily.
+This script should be set up with a cron job to run daily.
 rclone should be set up, in our case pointing to a Google Drive folder: https://rclone.org/drive/
 
 Collect images of non-goggle detections from the database.
 Upload images and metadata to Google Drive.
 """
 
-METADATA_FILE = os.path.join(os.path.dirname(__file__), 'metadata.json')
+METADATA_FILE = 'metadata.json'
+# rclone on ee220clnx1 is an earlier version that doesn't support copying to shared folders
+RCLONE_PATH = '/home/shay/a/bergz/rclone-v1.52.2-linux-amd64/rclone'
 TODAY = datetime.datetime.today().strftime('%Y-%m-%d')
 
 
 def get_metadata():
     """
     Get image filenames and other relevant metadata from the database.
-    @return: A list of dictionaries with the metadata for each image TODO describe the metadata
+    Save metadata to a file for future decryption.
+    @return: A list of dictionaries with the metadata for each image
 
-    Query:
-    SELECT b.image_name, b.X_Min, b.Y_Min, b.X_Max, b.Y_Max,
-    i.image_name, i.init_vector from bbox AS b, image as i where b.image_name=i.image_name and b.goggles=False
+    Example list: [
+    {'image_name': "0.jpg", 'x_min': 0.0, 'y_min': 0.0, 'x_max': 100.0, 'y_max': 100.0, 'init_vector': "example"}
+    {'image_name': "1.jpg", 'x_min': 25.0, 'y_min': 25.0, 'x_max': 120.0, 'y_max': 140.0, 'init_vector': "example2"}]
     """
 
     metadata = []
-    #current_date = (datetime.date.today(),)
+    # current_date = (datetime.date.today(),)
 
     # for testing
-    date = datetime.datetime(2020, 7, 23)
+    date = datetime.datetime(2020, 8, 10)
     current_date = (date,)
     # for testing
 
@@ -64,45 +64,40 @@ def get_metadata():
     return metadata
 
 
-def upload_files(metadata, dir, rclone_path, remote_name):
+def upload_files(metadata, dir, remote_name):
     """
     For each filename returned by get_metadata, upload image
     to Drive. Upload the day's metadata file.
     @param metadata: the list of dictionaries returned by get_metadata
     @param dir: the folder containing the images to upload
-    @param rclone_path: path to rclone installation. Must be an absolute path if on the HELPS machine.
     @param remote_name: name of remote location in rclone
     """
 
-    # prevent sending the same image twice (if two faces are detected)
     images = []
 
     # send images to the Drive
     for image in metadata:
+        # prevent sending the same image twice (if two faces are detected)
         if image not in images:
             images.append(image)
-            image_path = os.path.join(os.path.dirname(__file__), dir, image['image_name'])
-            subprocess.run([rclone_path, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)])
+            image_path = os.path.join(dir, image['image_name'])
+            subprocess.run([RCLONE_PATH, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)])
 
-    # upload metadata json to the Drive
-    subprocess.run([rclone_path, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)])
+    # upload metadata.json to the Drive
+    subprocess.run([RCLONE_PATH, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)])
+    os.remove(METADATA_FILE)
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser('Collect images.')
     parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload')
-    parser.add_argument('--rclone_path', '-r', type=str, default='rclone', help='Path to rclone installation. If not '
-                                                                                'on the HELPS machine, the default '
-                                                                                'should work (if you have rclone '
-                                                                                'installed).')
     parser.add_argument('--remote_name', type=str, default='EmbedVisDrive', help='Name of remote location according '
-                                                                                  'to rclone (default is the Drive '
-                                                                                  'name on the HELPS machine). Don\'t '
-                                                                                  'include the semicolon.')
+                                                                                 'to rclone (default is the Drive '
+                                                                                 'name on ee220clnx1). Don\'t '
+                                                                                 'include the semicolon.')
     args = parser.parse_args()
 
-    # call the methods
     metadata = get_metadata()
-    upload_files(metadata, args.directory, args.rclone_path, args.remote_name)
+    upload_files(metadata, args.directory, args.remote_name)
 
     exit(0)
diff --git a/scripts/decrypt_images.py b/scripts/decrypt_images.py
new file mode 100644
index 00000000..f81e6cf4
--- /dev/null
+++ b/scripts/decrypt_images.py
@@ -0,0 +1,42 @@
+import argparse
+import getpass
+import json
+import os
+
+from src.jetson.AES import Encryption
+
+"""
+After having collect_images has run and the output folder has been downloaded,
+decrypt the associated images.
+This file is assumed to be on the end user's machine.
+"""
+
+# the metadata file generated by collect_images
+METADATA_FILE = 'metadata.json'
+
+
+def decrypt_images(dir):
+    # ask for decryption key
+    decrypt_key = getpass.getpass('Decryption password: ')
+
+    # convert to PKBDF2 or whatever
+
+    # make decryptor; probably changes once Jason finishes
+    decryptor = Encryption
+
+    with open(os.path.join(dir, METADATA_FILE)) as meta_file:
+        metadata = json.load(meta_file)
+        # use face coords to find where to decrypt in video frame
+        for image in metadata:
+            # TODO handle multiple faces in one frame. append to coords list
+            coords = [(image['x_min'], image['y_min'], image['x_max'], image['y_max'])]
+            init_vector = image['init_vector']
+            # overwrite encrypted image
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser('Decrypt images.')
+    parser.add_argument('--directory', '-d', type=str, required=True, help='Folder of images to be decrypted.')
+    args = parser.parse_args()
+
+    decrypt_images(args.directory)
diff --git a/scripts/prepare_images.py b/scripts/prepare_images.py
deleted file mode 100644
index eaa46bca..00000000
--- a/scripts/prepare_images.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import json
-import os
-
-"""
-After having run collect_images, decrypt the associated images
-(if necessary) and combine images together into a short video (using metadata).
-
-This file is assumed to be on the end user's machine.
-"""
-
-# the metadata file generated by collect_images
-METADATA_FILE = 'metadata.json'
-
-
-def decrypt_images(dir):
-    # ask for decryption key
-    with open(os.path.join(dir, METADATA_FILE)) as meta_file:
-        metadata = json.load(meta_file)
-        # use face coords to find where to decrypt in video frame
-        # decrypt
-        pass
-
-
-def make_videos(dir):
-    # use a heuristic (such as images within 5 seconds of each other)
-    # to combine similar images into one video for easier viewing
-    with open(os.path.join(dir, METADATA_FILE)) as meta_file:
-        metadata = json.load(meta_file)
-        # for each image, if within 5 seconds of the previous one,
-        # concatenate them and make them into a video
-        pass
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser('Combine images into a short video for easier viewing.'
-                                     'Decrypt if needed.')
-    parser.add_argument('--directory', '-d', type=str, required=True, help='Folder of images to be prepared.')
-    parser.add_argument('--decrypt', default=False, action='store_true', help='Decrypt faces in the images.')
-    parser.add_argument('--make_videos', '-m', default=False, action='store_true',
-                        help='Combine frames from the same time period into a single video.')
-
-    args = parser.parse_args()
-
-    if not args.decrypt and not args.make_videos:
-        print('No options selected. Please select at least one of --decrypt or --make_videos.')
-        exit(0)
-
-    if args.decrypt:
-        decrypt_images(args.directory)
-
-    if args.make_videos():
-        make_videos(args.directory)

From f8ef93b5a49ebcb05133736dbb5b9779592c7039 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Wed, 12 Aug 2020 19:00:18 -0400
Subject: [PATCH 24/25] Remove testing date

---
 scripts/collect_images.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index 6078a620..01b3cdc0 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -32,12 +32,7 @@ def get_metadata():
     """
 
     metadata = []
-    # current_date = (datetime.date.today(),)
-
-    # for testing
-    date = datetime.datetime(2020, 8, 10)
-    current_date = (date,)
-    # for testing
+    current_date = (datetime.date.today(),)
 
     # make sql connection
     # execute query

From 50c10c9805507d92582a4efd1684d15cd2f00529 Mon Sep 17 00:00:00 2001
From: ZPBerg <bergz@purdue.edu>
Date: Thu, 13 Aug 2020 12:14:50 -0400
Subject: [PATCH 25/25] Changed rclone instructions.

---
 scripts/collect_images.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/scripts/collect_images.py b/scripts/collect_images.py
index 01b3cdc0..572ba299 100644
--- a/scripts/collect_images.py
+++ b/scripts/collect_images.py
@@ -8,16 +8,14 @@
 
 """
 This script should be set up with a cron job to run daily.
-rclone should be set up, in our case pointing to a Google Drive folder: https://rclone.org/drive/
+Each user will have to set up their rclone config, in our case pointing to a Google Drive folder: https://rclone.org/drive/
+This can be done with /local/b/embedvis/rclone-v1.52.2-linux-amd64/rclone config
 
 Collect images of non-goggle detections from the database.
 Upload images and metadata to Google Drive.
 """
 
 METADATA_FILE = 'metadata.json'
-# rclone on ee220clnx1 is an earlier version that doesn't support copying to shared folders
-RCLONE_PATH = '/home/shay/a/bergz/rclone-v1.52.2-linux-amd64/rclone'
-TODAY = datetime.datetime.today().strftime('%Y-%m-%d')
 
 
 def get_metadata():
@@ -45,8 +43,8 @@ def get_metadata():
 
             for (image_name, x_min, y_min, x_max, y_max, init_vector, goggles) in cursor:
                 metadata.append({'image_name': image_name,
-                                 'x_min': float(x_min),  # JSON cannot serialize Decimals.
-                                 'y_min': float(y_min),  # If there is a better way to do this, let me know.
+                                 'x_min': float(x_min),
+                                 'y_min': float(y_min),
                                  'x_max': float(x_max),
                                  'y_max': float(y_max),
                                  'init_vector': init_vector
@@ -59,7 +57,7 @@ def get_metadata():
     return metadata
 
 
-def upload_files(metadata, dir, remote_name):
+def upload_files(metadata, dir, rclone_path, remote_name):
     """
     For each filename returned by get_metadata, upload image
     to Drive. Upload the day's metadata file.
@@ -69,6 +67,7 @@ def upload_files(metadata, dir, remote_name):
     """
 
     images = []
+    today = datetime.datetime.today().strftime('%Y-%m-%d')
 
     # send images to the Drive
     for image in metadata:
@@ -76,23 +75,24 @@ def upload_files(metadata, dir, remote_name):
         if image not in images:
             images.append(image)
             image_path = os.path.join(dir, image['image_name'])
-            subprocess.run([RCLONE_PATH, 'copy', image_path, '{}:{}'.format(remote_name, TODAY)])
+            subprocess.run([rclone_path, 'copy', image_path, '{}:{}'.format(remote_name, today)])
 
     # upload metadata.json to the Drive
-    subprocess.run([RCLONE_PATH, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, TODAY)])
+    subprocess.run([rclone_path, 'copy', METADATA_FILE, '{}:{}'.format(remote_name, today)])
     os.remove(METADATA_FILE)
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser('Collect images.')
     parser.add_argument('--directory', '-d', type=str, required=True, help='Folder containing images to upload')
-    parser.add_argument('--remote_name', type=str, default='EmbedVisDrive', help='Name of remote location according '
-                                                                                 'to rclone (default is the Drive '
-                                                                                 'name on ee220clnx1). Don\'t '
-                                                                                 'include the semicolon.')
+    parser.add_argument('--rclone_path', '-r', type=str, default='/local/b/embedvis/rclone-v1.52.2-linux-amd64/rclone',
+                        help='Location of rclone binary. Default version on ee220clnx1 doesn\'t support copying to '
+                             'shared folders.')
+    parser.add_argument('--remote_name', type=str, help='Name of remote location according to rclone config. You must '
+                                                        'create your own config.')
     args = parser.parse_args()
 
     metadata = get_metadata()
-    upload_files(metadata, args.directory, args.remote_name)
+    upload_files(metadata, args.directory, args.rclone_path, args.remote_name)
 
     exit(0)