mmdet.structures.DetDataSample 数据结构解析-Toy模板网

这篇具有很好参考价值的文章主要介绍了mmdet.structures.DetDataSample 数据结构解析。希望对大家有所帮助。如果存在错误或未考虑完全的地方，请大家不吝赐教，您也可以点击"举报违法"按钮提交疑问。

官方文档 https://mmdetection.readthedocs.io/zh-cn/latest/api.html#mmdet-structures

``gt_instances``(InstanceData): Ground truth of instance annotations. 标注的数据

<InstanceData(

    META INFORMATION

    DATA FIELDS
    bboxes: tensor([], size=(0, 4))
    labels: tensor([], dtype=torch.int64)
    masks: BitmapMasks(num_masks=0, height=3456, width=4608)
) at 0x7fc26252dbb0>

``pred_instances``(InstanceData): Instances of detection predictions. 预测的数据

<InstanceData(

    META INFORMATION

    DATA FIELDS
    labels: tensor([0])
    bboxes: tensor([[ 687.5951, 1478.1307, 2670.1741, 2215.1914]])
    scores: tensor([0.9946])
    masks: tensor([[[False, False, False,  ..., False, False, False],
                 [False, False, False,  ..., False, False, False],
                 [False, False, False,  ..., False, False, False],
                 ...,
                 [False, False, False,  ..., False, False, False],
                 [False, False, False,  ..., False, False, False],
                 [False, False, False,  ..., False, False, False]]])
) at 0x7fc25f2c3d00>

``ignored_instances``(InstanceData): Instances to be ignored during training/testing. 训练或测试中忽略的数据

<InstanceData(

    META INFORMATION

    DATA FIELDS
    bboxes: tensor([], size=(0, 4))
    labels: tensor([], dtype=torch.int64)
    masks: BitmapMasks(num_masks=0, height=3456, width=4608)
) at 0x7fc25f2cb610>

``gt_panoptic_seg``(PixelData): Ground truth of panoptic segmentation. 全景分割的标注数据,
``pred_panoptic_seg``(PixelData): Prediction of panoptic segmentation. 全景分割的预测
``gt_sem_seg``(PixelData): Ground truth of semantic segmentation. 语义分割的标注数据
``pred_sem_seg``(PixelData): Prediction of semantic segmentation. 语义分割预测数据

我这里面主要使用的是 pred_instances

from mmengine.structures import InstanceData, PixelData

问题: 获取mask后, 如果做计算?

根据mask将图片扣取出来

如下程序获取mask, 当然 epoch_24.pth 是预先训练好的

from mmdet.apis import init_detector,inference_detector
import mmcv
import matplotlib.pyplot as plt

from mmdet.registry import VISUALIZERS


config_file_retrain = 'configs/radish/mask_rcnn_r50_fpn_2x_coco_radish.py'
checkpoint_file_retrain = 'work_dirs/mask_rcnn_r50_fpn_2x_coco_radish/epoch_24.pth'

model = init_detector(config_file_retrain,checkpoint_file_retrain,device='cpu')

# image_path = './data/radish/20231121145620_1-1.jpg'
# image_path = './data/radish/20231220/20231202115005_1-1.jpg'
# image_path = './data/radish/20231220/20231129161723_1-1.jpg'
# image_path = './data/radish/test2012/2012120076_20201212094819.png'
image_path = './data/radish/test2012/2012120261_20201212151200.png'
image_path = './data/radish/test2012/2012140043_20201214085421.png'
image_path = './data/radish/test2012/2012120158_20201212111739.png'
image_path = './data/radish/test2012/2012120305_20201212154238.png'
image_path = './data/radish/20231220/20231129161409_1-1.jpg'
image_path = './data/radish/test2012/2012120179_20201212133724.png'


img = mmcv.imread( image_path, channel_order='rgb')
plt.imshow(img)

result_retrain = inference_detector(model,img)

# init the visualizer(execute this block only once)
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
# visualizer.dataset_meta = model0.dataset_meta
visualizer.dataset_meta = model.dataset_meta   # set the visualizer metadata when you changed your dataset


# print(result_retrain)

# show the results
visualizer.add_datasample(
    'result',
    img,
    data_sample=result_retrain,
    draw_gt=False,
    wait_time=0,
)
visualizer.show()

处理mask

import cv2
import numpy as np
masks = result_retrain.pred_instances.masks.detach().cpu().numpy()
# print(masks[0])
height = masks[0].shape[0]
width = masks[0].shape[1]
# print(height)
# print(width)
# print(result_retrain.pred_instances.masks[0].cpu().numpy())

from mmdet.structures.mask import encode_mask_results, mask2bbox
encode_masks = encode_mask_results(result_retrain.pred_instances.masks)
# print(encode_masks)

bboxes = mask2bbox(result_retrain.pred_instances.masks.cpu()).numpy().tolist()
#  mask的 x, y, w, h
bboxes = bboxes[0]
print(bboxes)


image_path = './data/radish/test2012/2012120179_20201212133724.png'
#img=cv2.imread(image_path, cv2.COLOR_BGR2GRAY)

img = mmcv.imread( image_path, channel_order='rgb')

# bboxes 画框
# mask_BGR = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
color = (0, 0, 255) # Red color in BGR；红色：rgb(255,0,0)
thickness = 2 # Line thickness of 1 px 
# start_point, end_point = (bboxes[0], bboxes[1]), (bboxes[0]+bboxes[2], bboxes[1]+bboxes[3])

x0, y0 = int(bboxes[0]), int(bboxes[1])
x1 = int(bboxes[2]-1)
y1 = int(bboxes[3]-1)
print(f'x0:{x0}, y0:{y0}, x1:{x1}, y1:{y1}')
start_point, end_point = (x0, y0), (x1, y1)

print(start_point)
print(end_point)
mask_bboxs = cv2.rectangle(img, start_point, end_point, color, thickness)
plt.imshow(img)

#剪裁
patch_bboxes = np.array(bboxes)
patch = mmcv.imcrop(img, patch_bboxes)
plt.imshow(patch)

mask = masks[0]


# 将其它区域颜色变成0
# change color of background and balloon
masked_b = img[:, :, 0] * mask
masked_g = img[:, :, 1] * mask
masked_r = img[:, :, 2] * mask
masked = np.concatenate([masked_b[:, :, None], masked_g[:, :, None], masked_r[:, :, None]], axis=2)

un_mask = 1 - mask
frame_b = img[:, :, 0] * un_mask
frame_g = img[:, :, 1] * un_mask
frame_r = img[:, :, 2] * un_mask
img = np.concatenate([frame_b[:, :, None], frame_g[:, :, None], frame_r[:, :, None]], axis=2).astype(np.uint8)

# background gray color with 3 channels
img = mmcv.bgr2gray(img, keepdim=True)
img = np.concatenate([img, img, img], axis=2)
# img += masked
plt.imshow(masked)

# 旋转



# show the results
# visualizer.add_datasample(
#     'result',
#     img,
#     data_sample=result_retrain,
#     draw_gt=False,
#     wait_time=0,
# )
# visualizer.show()

打印出来处理结果

mmdet.structures.DetDataSample 数据结构解析,AI,深度学习,机器学习,人工智能