How to setup anomalib.data.image.folder.Folder datamodule against MVTec dataset #2246
Replies: 2 comments 1 reply
-
I think you should move the 'good' folder to another directory, and then set |
Beta Was this translation helpful? Give feedback.
-
Hi @syaifulnizamyahya and @JinYuannn, I've created this PR to introduce bunch of updates to the dataset capabilities. If you want to use CSV datamodule with MVTec, you could do something like this from pathlib import Path
import pandas as pd
def create_mvtec_csv(
root_dir: str | Path = "./datasets/MVTec",
output_dir: str | Path = "./csv/mvtec_csv",
duplicate_val: bool = True,
) -> None:
root_path = Path(root_dir)
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
for category_path in root_path.iterdir():
if not category_path.is_dir():
continue
category = category_path.name
data = []
defect_index = {}
current_index = 0
# Process training data (all normal samples)
train_good_path = category_path / "train" / "good"
if train_good_path.exists():
for img_path in train_good_path.glob("*.png"):
data.append(
{
"image_path": str(img_path),
"label": "normal",
"label_index": 0,
"mask_path": "",
"split": "train",
},
)
# Process test data
test_path = category_path / "test"
if test_path.exists():
for defect_path in test_path.iterdir():
if defect_path.is_dir():
defect_type = defect_path.name
if defect_type != "good" and defect_type not in defect_index:
current_index += 1
defect_index[defect_type] = current_index
for img_path in defect_path.glob("*.png"):
mask_path = (
category_path / "ground_truth" / defect_type / img_path.name.replace(".png", "_mask.png")
)
sample = {
"image_path": str(img_path),
"label": "normal" if defect_type == "good" else "abnormal",
"label_index": 0 if defect_type == "good" else defect_index[defect_type],
"mask_path": str(mask_path) if mask_path.exists() else "",
"split": "test",
}
# Add the sample to test set
data.append(sample)
# Duplicate the sample for validation set
if duplicate_val:
val_sample = sample.copy()
val_sample["split"] = "val"
data.append(val_sample)
df = pd.DataFrame(data)
output_file = output_path / f"{category}.csv"
df.to_csv(output_file, index=False)
# This will create the MVTec csv files.
create_mvtec_csv() Now you could use the CSV datamodule. Let's say you want to use the pre-defined, train and test sets, with validation set being duplicated from test (note MVTec does not have validation set ) from anomalib.data import CSV
datamodule = CSV(
name="bottle",
csv_path="./csv/mvtec_csv/bottle.csv",
test_split_mode="predefined",
val_split_mode="predefined",
)
datamodule.setup()
# Check the samples
print(datamodule.train_data.samples)
print(datamodule.val_data.samples)
print(datamodule.test_data.samples) If you want to use val/test split rather than duplication, you could remove |
Beta Was this translation helpful? Give feedback.
-
I want to know how use Folder datamodule on MVTec dataset.
There will be no change on MVTec dataset folder structure. I want to configure Folder datamodule so it will be exactly the same as anomalib.data.image.mvtec.MVTec
Error received
ValueError: NumPy boolean array indexing assignment cannot assign 63 input values to the 83 output values where the mask is true
I believe this is caused by 'good' subfolder under d:\Dev\openvino\datasets\MVTec\bottle\test directory where in d:/Dev/openvino/datasets/MVTec/bottle/ground_truth there is no good subfolder.
In other words, abnormal directory has extra folder 'good' compare to mask directory.
Is there a way to fix this other than delete the good subfolder in test directory?
Beta Was this translation helpful? Give feedback.
All reactions