detectron2 maskrcnn检测道路水坑
之前的文章中蹭介绍过使用matterport MASKRCNN 检测道路水坑,为自动驾驶机器人提供道路模型环境。但是检测出来的水坑边缘分界来说不是特别的理想,水坑的边界并没有完全准确的框对,对于要求精度比较高的场合,行驶错误便会进入水坑中无法自拔。文章在这:
http://blog.cvosrobot.com/?post=656
接下来笔者在入门detectron2后打算使用同样的样本来训练MASKRCNN 的模型,比对一下效果。
detectron2 的效果如上图 ,之前maskrcnn的效果如下图
本文章内容结构
- 自定义数据集
- 使用VIA标注
- 训练
- 预测
- 自定义数据集
数据集依旧从网上搜罗而来,并通过颠倒,镜像, 选装等方式增加到72训练集加10个验证集。
保存到 roadWater_datasets下的train 文件夹和val文件夹。
- 使用VIA标注
via标注使用最新的via工具,https://www.robots.ox.ac.uk/~vgg/software/via/via_demo.html
以前的1.6版本不能适用于detectron2。标注的样子:
将验证集和训练集所有的图片都标注完毕,生成json文件放到相应的文件夹下。
本工程下使用 vis_roadwater.py 来查看数据是否标注正确,以及是否能够正确解读数集。
如果出现标注效果则说明数据集正确
训练
使用个人电脑 进行训练,迭代500次大约需要5分钟,这个是真的出乎我的意料,还记得在MATTERPORT MASKRCNN
需要训练3个小时。
训练的文件
train_water.py
import torch, torchvision import detectron2 from detectron2.utils.logger import setup_logger import os import numpy as np import json import cv2 import matplotlib.pyplot as plt from detectron2.structures import BoxMode from detectron2.data import DatasetCatalog, MetadataCatalog import random from detectron2.utils.visualizer import Visualizer from detectron2.engine import DefaultTrainer from detectron2.config import get_cfg from detectron2 import model_zoo from detectron2.engine import DefaultPredictor #print(torch.__version__, torch.cuda.is_available()) # 1.5.0+cu101 True #setup_logger() def get_dicts(img_dir): json_file = os.path.join(img_dir, "via_export_json.json") with open(json_file) as f: imgs_anns = json.load(f) dataset_dicts = [] for idx, v in enumerate(imgs_anns.values()): record = {} filename = os.path.join(img_dir, v["filename"]) height, width = cv2.imread(filename).shape[:2] record["file_name"] = filename record["image_id"] = idx record["height"] = height record["width"] = width annos = v["regions"] objs = [] for anno in annos: anno = anno["shape_attributes"] px = anno["all_points_x"] py = anno["all_points_y"] poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] poly = [p for x in poly for p in x] obj = { "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)], "bbox_mode": BoxMode.XYXY_ABS, "segmentation": [poly], "category_id": 0, "iscrowd": 0 } objs.append(obj) #print(objs) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts path = "roadWater_datasets" # path to your image folder for d in ["train", "val"]: DatasetCatalog.register("BLOCK_" + d, lambda d=d: get_dicts(path + "/" + d)) MetadataCatalog.get("BLOCK_" + d).set(thing_classes=["water"]) if __name__ == '__main__': #load config cfg = get_cfg() #config output path cfg.OUTPUT_DIR = "logs" #load MASK RCNN MODEL cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml")) cfg.DATASETS.TRAIN = ("BLOCK_train",) # our training dataset cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 2 # number of parallel data loading workers cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml") # use pretrained weights cfg.SOLVER.IMS_PER_BATCH = 2 # in 1 iteration the model sees 2 images cfg.SOLVER.BASE_LR = 0.00025 # learning rate cfg.SOLVER.MAX_ITER = 500 # number of iteration cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # number of proposals to sample for training cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (BLOCK) os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train()
训练完成后会在config.OUTPUT_DIR 配置的路径下生成 model_final.pth 权重文件。
- 预测
使用上述的权重问价结合网络模型对图片进行预测。
inference_water.py
import torch, torchvision import detectron2 from detectron2.utils.logger import setup_logger import os import numpy as np import json import cv2 import matplotlib.pyplot as plt from detectron2.structures import BoxMode from detectron2.data import DatasetCatalog, MetadataCatalog import random from detectron2.utils.visualizer import Visualizer from detectron2.engine import DefaultTrainer from detectron2.config import get_cfg from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.utils.visualizer import ColorMode #print(torch.__version__, torch.cuda.is_available()) # 1.5.0+cu101 True #setup_logger() def get_dicts(img_dir): json_file = os.path.join(img_dir, "via_export_json.json") with open(json_file) as f: imgs_anns = json.load(f) dataset_dicts = [] for idx, v in enumerate(imgs_anns.values()): record = {} filename = os.path.join(img_dir, v["filename"]) height, width = cv2.imread(filename).shape[:2] record["file_name"] = filename record["image_id"] = idx record["height"] = height record["width"] = width annos = v["regions"] objs = [] for anno in annos: anno = anno["shape_attributes"] px = anno["all_points_x"] py = anno["all_points_y"] poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] poly = [p for x in poly for p in x] obj = { "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)], "bbox_mode": BoxMode.XYXY_ABS, "segmentation": [poly], "category_id": 0, "iscrowd": 0 } objs.append(obj) #print(objs) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts path = "roadWater_datasets" # path to your image folder #for d in ["train", "val"]: #DatasetCatalog.register("BLOCK_" + d, lambda d=d: get_dicts(path + "/" + d)) #MetadataCatalog.get("BLOCK_" + d).set(thing_classes=["water"]) if __name__ == '__main__': #load config cfg = get_cfg() #config output path cfg.OUTPUT_DIR = "logs" #cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml")) cfg.merge_from_file( "../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" ) # Inference should use the config with parameters that are used in training # cfg now already contains everything we've set previously. We changed it a little bit for inference: cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6 # set a custom testing threshold cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (BLOCK) cfg.DATASETS.TEST = ("BLOCK_val", ) predictor = DefaultPredictor(cfg) dataset_dicts = get_dicts(path + "/" + "val") for d in random.sample(dataset_dicts, 3): img = cv2.imread(d["file_name"]) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("BLOCK_train"), scale=0.8,instance_mode=ColorMode.IMAGE_BW) v = v.draw_instance_predictions(outputs["instances"].to("cpu")) res = v.get_image()[:, :, ::-1] cv2.imshow("res",res) cv2.waitKey(0)
最新评论