detectron2 maskrcnn检测道路水坑

小v 2021-8-5 2401 1

之前的文章中蹭介绍过使用matterport MASKRCNN 检测道路水坑，为自动驾驶机器人提供道路模型环境。但是检测出来的水坑边缘分界来说不是特别的理想，水坑的边界并没有完全准确的框对，对于要求精度比较高的场合，行驶错误便会进入水坑中无法自拔。文章在这：

http://blog.cvosrobot.com/?post=656

接下来笔者在入门detectron2后打算使用同样的样本来训练MASKRCNN 的模型，比对一下效果。

detectron2 的效果如上图，之前maskrcnn的效果如下图

本文章内容结构

自定义数据集
使用VIA标注
训练
预测

自定义数据集

数据集依旧从网上搜罗而来，并通过颠倒，镜像，选装等方式增加到72训练集加10个验证集。

保存到 roadWater_datasets下的train 文件夹和val文件夹。

使用VIA标注

via标注使用最新的via工具，https://www.robots.ox.ac.uk/~vgg/software/via/via_demo.html

以前的1.6版本不能适用于detectron2。标注的样子：

将验证集和训练集所有的图片都标注完毕，生成json文件放到相应的文件夹下。

本工程下使用 vis_roadwater.py 来查看数据是否标注正确，以及是否能够正确解读数集。

如果出现标注效果则说明数据集正确

训练

使用个人电脑进行训练，迭代500次大约需要5分钟，这个是真的出乎我的意料，还记得在MATTERPORT MASKRCNN

需要训练3个小时。

训练的文件

train_water.py

import torch, torchvision
import detectron2
from detectron2.utils.logger import setup_logger
import os
import numpy as np
import json
import cv2
import matplotlib.pyplot as plt

from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
import random
from detectron2.utils.visualizer import Visualizer

from detectron2.engine import DefaultTrainer 
from detectron2.config import get_cfg 
from detectron2 import model_zoo 
from detectron2.engine import DefaultPredictor
#print(torch.__version__, torch.cuda.is_available()) # 1.5.0+cu101 True
#setup_logger()

def get_dicts(img_dir):
    json_file = os.path.join(img_dir, "via_export_json.json")
    with open(json_file) as f:
        imgs_anns = json.load(f)
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}

        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width

        annos = v["regions"]
        objs = []
        for anno in annos:
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
                "iscrowd": 0
                }
            objs.append(obj)
            #print(objs)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

path = "roadWater_datasets" # path to your image folder
for d in ["train", "val"]:
    DatasetCatalog.register("BLOCK_" + d, lambda d=d: get_dicts(path + "/" +  d))
    MetadataCatalog.get("BLOCK_" + d).set(thing_classes=["water"])

if __name__ == '__main__':
    #load config
    cfg = get_cfg() 
    #config output path 
    cfg.OUTPUT_DIR = "logs" 
    #load MASK RCNN MODEL
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"))

    cfg.DATASETS.TRAIN = ("BLOCK_train",)     # our training dataset
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2     # number of parallel data loading workers
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml")     # use pretrained weights
    cfg.SOLVER.IMS_PER_BATCH = 2     # in 1 iteration the model sees 2 images
    cfg.SOLVER.BASE_LR = 0.00025     # learning rate

    cfg.SOLVER.MAX_ITER = 500        # number of iteration
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128     # number of proposals to sample for training
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (BLOCK)
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

训练完成后会在config.OUTPUT_DIR 配置的路径下生成 model_final.pth 权重文件。

预测

使用上述的权重问价结合网络模型对图片进行预测。

inference_water.py

import torch, torchvision
import detectron2
from detectron2.utils.logger import setup_logger
import os
import numpy as np
import json
import cv2
import matplotlib.pyplot as plt

from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
import random
from detectron2.utils.visualizer import Visualizer

from detectron2.engine import DefaultTrainer 
from detectron2.config import get_cfg 
from detectron2 import model_zoo 
from detectron2.engine import DefaultPredictor

from detectron2.utils.visualizer import ColorMode

#print(torch.__version__, torch.cuda.is_available()) # 1.5.0+cu101 True
#setup_logger()

def get_dicts(img_dir):
    json_file = os.path.join(img_dir, "via_export_json.json")
    with open(json_file) as f:
        imgs_anns = json.load(f)
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}

        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width

        annos = v["regions"]
        objs = []
        for anno in annos:
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
                "iscrowd": 0
                }
            objs.append(obj)
            #print(objs)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

path = "roadWater_datasets" # path to your image folder
#for d in ["train", "val"]:
    #DatasetCatalog.register("BLOCK_" + d, lambda d=d: get_dicts(path + "/" +  d))
    #MetadataCatalog.get("BLOCK_" + d).set(thing_classes=["water"])

if __name__ == '__main__':
    #load config
    cfg = get_cfg() 
    #config output path 
    cfg.OUTPUT_DIR = "logs" 
    #cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"))
    cfg.merge_from_file(
        "../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
    )
    # Inference should use the config with parameters that are used in training
    # cfg now already contains everything we've set previously. We changed it a little bit for inference:
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained

    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6   # set a custom testing threshold
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (BLOCK)
    cfg.DATASETS.TEST = ("BLOCK_val", )
    predictor = DefaultPredictor(cfg)
    dataset_dicts = get_dicts(path + "/" + "val")
    for d in random.sample(dataset_dicts, 3):
        img = cv2.imread(d["file_name"])
        outputs = predictor(img)
        v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("BLOCK_train"), scale=0.8,instance_mode=ColorMode.IMAGE_BW)
        v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        res = v.get_image()[:, :, ::-1]
        cv2.imshow("res",res)
        cv2.waitKey(0)

您需要先登录才能查看隐藏内容

视频图像处理