DataLoader
The datasets linked below were used, and the following code was used as the DataLoader to transfer to the training part. In the examples, the HuggingFace Datasets library is generally used, so such a transformation was made to convert any segmentation dataset into the datasets format.
class WaterDatasetLoader:
def __init__(self, dataset_root, image_subfolder, annotation_subfolder):
self.dataset_root = dataset_root
self.image_subfolder = image_subfolder
self.annotation_subfolder = annotation_subfolder
self.image_paths = []
self.annotation_paths = []
self.load_paths()
def load_paths(self):
for root, dirs, files in os.walk(self.dataset_root):
if self.image_subfolder in root:
for filename in files:
if filename.endswith(".png"):
image_path = os.path.join(root, filename)
annotation_path = image_path.replace(self.image_subfolder, self.annotation_subfolder).replace(".png", ".png")
if os.path.exists(annotation_path):
self.image_paths.append(image_path)
self.annotation_paths.append(annotation_path)
else:
print(f"Warning: Annotation file not found for image {image_path}")
if len(self.image_paths) != len(self.annotation_paths):
print("Warning: Mismatch between the number of images and annotations.")
@staticmethod
def load_and_preprocess_image(image_path, target_size=(256, 256), dtype=np.uint8):
img = Image.open(image_path)
img = img.resize(target_size)
img = np.array(img, dtype=dtype)
return img
@staticmethod
def load_and_preprocess_annotation(annotation_path, target_size=(256, 256), dtype=np.uint8):
ann = Image.open(annotation_path)
ann = ann.resize(target_size)
ann = np.array(ann, dtype=dtype) / 255
return ann
def create_dataset(self):
images = [self.load_and_preprocess_image(path) for path in self.image_paths]
annotations = [self.load_and_preprocess_annotation(path) for path in self.annotation_paths]
images = np.array(images, dtype=np.uint8)
annotations = np.array(annotations, dtype=np.uint8)
# dataset_dict = {
# "image": [Image.fromarray(img, 'L') for img in images[:, :, :, 0]],
# "label": [Image.fromarray(ann, 'L') for ann in annotations[:, :, :, 0]],
# }
images_train, images_test, annotations_train, annotations_test = train_test_split(images, annotations, test_size=0.2, random_state=42)
train_dataset_dict = {
"image": [Image.fromarray(img, 'RGB') for img in images_train],
"label": [Image.fromarray(ann, 'L') for ann in annotations_train[:, :, :, 0]],
}
test_dataset_dict = {
"image": [Image.fromarray(img, 'RGB') for img in images_test],
"label": [Image.fromarray(ann, 'L') for ann in annotations_test[:, :, :, 0]],
}
train_dataset = Dataset.from_dict(train_dataset_dict)
test_dataset = Dataset.from_dict(test_dataset_dict)
# return Dataset.from_dict(dataset_dict)
return train_dataset, test_dataset
The code snippet below was taken from this link
class SAMDataset(TorchDataset):
def __init__(self, dataset, processor):
self.dataset = dataset
self.processor = processor
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
item = self.dataset[idx]
image = item["image"]
ground_truth_mask = np.array(item["label"])
prompt = get_bounding_box(ground_truth_mask)
inputs = self.processor(image, input_boxes=[[prompt]], return_tensors="pt")
inputs = {k: v.squeeze(0) for k, v in inputs.items()}
inputs["ground_truth_mask"] = ground_truth_mask
return inputs
Here, we briefly describe the dataset and the pre-trained SAM Processor used to prepare the dataset for the Segment Anything Model (SAM). Then, we select the ground truth mask using the bounding box method from the label part. After that, we match the image with the label and send it to our model. The purpose here is a process necessary for fine-tuning the mask decoder. However, as can be seen in the Test section, since there is no ground truth mask, and as seen in the Finetune section, if there is no ground truth mask or any guide in the input part, an efficient result cannot be obtained.
Links
-
[https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SAM/Fine_tune_SAM_(segment_anything)on_a_custom_dataset.ipynb](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SAM/Fine_tune_SAM(segment_anything)_on_a_custom_dataset.ipynb)