-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataset.py
More file actions
105 lines (90 loc) · 4.17 KB
/
dataset.py
File metadata and controls
105 lines (90 loc) · 4.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from PIL import Image
import matplotlib.patches as patches
import os
import cv2
from torchvision.io import read_image
import torch
from torchvision import transforms
from torch.utils.data import Dataset
from torchvision import tv_tensors
DEFAULT_IMG_PATH = "data/train/images"
DEFAULT_ANNOTATION_PATH = "data/train/labels"
def parse_path(img_path, annotation_path):
# List all files in the given directory
file_names = os.listdir(img_path)
# Filter out non-image files and remove extensions, assuming images end with typical image file extensions
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
path_images = [f"{img_path}/{file_name}" for file_name in file_names if ".jpg" in file_name]
image_filenames_no_ext = [os.path.splitext(file)[0] for file in file_names if os.path.splitext(file)[1].lower() in image_extensions]
assert len(image_filenames_no_ext) == (len(path_images))
path_annotations = [f"{annotation_path}/{file_name}.txt" for file_name in image_filenames_no_ext]
return path_images, path_annotations
def parse_annotation(annotation_path, img_dim = None, yolo_format=False):
boxes = []
labels = []
with open(annotation_path, 'r') as file:
for line in file:
data = line.strip().split()
# class_label = int(data[0])
class_label = 0
# Convert coordinates to [x_min, y_min, x_max, y_max]
if yolo_format:
# Convert YOLO format to [x_min, y_min, x_max, y_max]
x_center = float(data[1]) * img_dim[0]
y_center = float(data[2]) * img_dim[1]
width = float(data[3]) * img_dim[0]
height = float(data[4]) * img_dim[1]
x_min = x_center - (width / 2)
y_min = y_center - (height / 2)
x_max = x_center + (width / 2)
y_max = y_center + (height / 2)
else:
# Assuming data is already in [x_min, y_min, x_max, y_max] format
x_min = float(data[1])
y_min = float(data[2])
x_max = float(data[3])
y_max = float(data[4])
boxes.append([x_min, y_min, x_max, y_max])
labels.append(class_label)
return boxes, labels
class BoarDataset(Dataset):
def __init__(self, img_path=DEFAULT_IMG_PATH, annotation_path=DEFAULT_ANNOTATION_PATH, transform=None, yolo_format=False, augmentations=None):
self.annotation_path = annotation_path
self.img_path = img_path
self.transform = transform or transforms.Compose([
transforms.ToTensor(),
])
self.augmentations = augmentations
self.yolo_format = yolo_format
self.imgs, self.boxes, self.labels = self.get_data()
def __len__(self):
return len(self.imgs)
def __getitem__(self, idx):
img = self.imgs[idx]
target = {}
bboxes = self.boxes[idx]
labels = self.labels[idx]
if self.augmentations:
transformed = self.augmentations(image=img, bboxes=bboxes, labels=labels)
img = transformed['image']
bboxes = transformed['bboxes']
labels = transformed['labels']
bboxes_tensor = torch.Tensor(bboxes)
labels_tensor = torch.tensor(labels, dtype=torch.int64)
img = self.transform(img)
target["boxes"] = bboxes_tensor
target["labels"] = labels_tensor
return {"image": img, "target": target}
def get_data(self):
imgs = []
classes = []
boxes = []
path_images, path_annotations = parse_path(img_path=self.img_path, annotation_path=self.annotation_path)
for img_path, annotation_path in zip(path_images, path_annotations):
img = cv2.imread(img_path)
img_dim = img.shape[:2]
boxes_array, labels_array = parse_annotation(annotation_path, img_dim, yolo_format=self.yolo_format)
imgs.append(img)
boxes.append(boxes_array)
classes.append(labels_array)
return imgs, boxes, classes