PointNet++ 物体分类代码
import torch.nn as nn
import torch.nn.functional as F
from pointnet_util import PointNetSetAbstraction
class get_model(nn.Module):
def __init__(self,num_class,normal_channel=True):
super(get_model, self).__init__()
in_channel = 6 if normal_channel else 3
self.normal_channel = normal_channel
# 512 = points sampled in farthest point sampling
# 0.2 = search radius in local region
# 32 = how many points in each local region
# [64,64,128] = output size for MLP on each point
# 3 = 3-dim coordinates
self.sa1 = PointNetSetAbstraction(npoint=512, radius=0.2, nsample=32, in_channel=in_channel, mlp=[64, 64, 128], group_all=False)
self.sa2 = PointNetSetAbstraction(npoint=128, radius=0.4, nsample=64, in_channel=128 + 3, mlp=[128, 128, 256], group_all=False)
self.sa3 = PointNetSetAbstraction(npoint=None, radius=None, nsample=None, in_channel=256 + 3, mlp=[256, 512, 1024], group_all=True)
# fc1 input:1024
self.fc1 = nn.Linear(1024, 512)
self.bn1 = nn.BatchNorm1d(512)
self.drop1 = nn.Dropout(0.4)
# fc2 input:512
self.fc2 = nn.Linear(512, 256)
self.bn2 = nn.BatchNorm1d(256)
self.drop2 = nn.Dropout(0.4)
# fc3 input:256
self.fc3 = nn.Linear(256, num_class)
def forward(self, xyz):
B, _, _ = xyz.shape
if self.normal_channel:
norm = xyz[:, 3:, :]
xyz = xyz[:, :3, :]
else:
norm = None
# l1_points作为sa1的特征输出
l1_xyz, l1_points = self.sa1(xyz, norm)
# l2_points作为sa2的特征输出
l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
# l3_points作为sa3的特征输出
l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)
x = l3_points.view(B, 1024)
x = self.drop1(F.relu(self.bn1(self.fc1(x))))
x = self.drop2(F.relu(self.bn2(self.fc2(x))))
x = self.fc3(x)
x = F.log_softmax(x, -1) # 计算对数概率
return x, l3_points
class get_loss(nn.Module):
def __init__(self):
super(get_loss, self).__init__()
def forward(self, pred, target, trans_feat):
# NLLLoss的输入是一个对数概率向量和一个目标标签. 它不会计算对数概率.
# 适合网络的最后一层是log_softmax.
# 损失函数 nn.CrossEntropyLoss()与NLLLoss()相同, 唯一的不同是它去做softmax.
total_loss = F.nll_loss(pred, target)
return total_loss
PointNet++ 部件分割代码
import torch.nn as nn
import torch
import torch.nn.functional as F
from models.pointnet_util import PointNetSetAbstraction,PointNetFeaturePropagation
class get_model(nn.Module):
def __init__(self, num_classes, normal_channel=False): # num_part = 50
super(get_model, self).__init__()
if normal_channel:
additional_channel = 3
else:
additional_channel = 0
self.normal_channel = normal_channel
# Set Abstraction layers
self.sa1 = PointNetSetAbstraction(npoint=512, radius=0.2, nsample=32, in_channel=6+additional_channel, mlp=[64, 64, 128], group_all=False)
self.sa2 = PointNetSetAbstraction(npoint=128, radius=0.4, nsample=64, in_channel=128 + 3, mlp=[128, 128, 256], group_all=False)
self.sa3 = PointNetSetAbstraction(npoint=None, radius=None, nsample=None, in_channel=256 + 3, mlp=[256, 512, 1024], group_all=True)
# Feature Propogation layers
self.fp3 = PointNetFeaturePropagation(in_channel=1280, mlp=[256, 256])
self.fp2 = PointNetFeaturePropagation(in_channel=384, mlp=[256, 128])
self.fp1 = PointNetFeaturePropagation(in_channel=128+16+6+additional_channel, mlp=[128, 128, 128])
self.conv1 = nn.Conv1d(128, 128, 1)
self.bn1 = nn.BatchNorm1d(128)
self.drop1 = nn.Dropout(0.5)
self.conv2 = nn.Conv1d(128, num_classes, 1)
def forward(self, xyz, cls_label):
# Set Abstraction layers
B,C,N = xyz.shape
if self.normal_channel:
l0_points = xyz
l0_xyz = xyz[:,:3,:]
else:
l0_points = xyz
l0_xyz = xyz
l1_xyz, l1_points = self.sa1(l0_xyz, l0_points)
l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)
# Feature Propagation layers
l2_points = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)
l1_points = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points)
cls_label_one_hot = cls_label.view(B,16,1).repeat(1,1,N)
l0_points = self.fp1(l0_xyz, l1_xyz, torch.cat([cls_label_one_hot,l0_xyz,l0_points],1), l1_points)
# FC layers
feat = F.relu(self.bn1(self.conv1(l0_points)))
x = self.drop1(feat)
x = self.conv2(x)
x = F.log_softmax(x, dim=1)
x = x.permute(0, 2, 1)
return x, l3_points
class get_loss(nn.Module):
def __init__(self):
super(get_loss, self).__init__()
def forward(self, pred, target, trans_feat):
total_loss = F.nll_loss(pred, target)
return total_loss
PointNet++ 语义分割代码
import torch.nn as nn
import torch.nn.functional as F
from models.pointnet_util import PointNetSetAbstraction,PointNetFeaturePropagation
class get_model(nn.Module):
def __init__(self, num_classes):
super(get_model, self).__init__()
# Set Abstraction layers
self.sa1 = PointNetSetAbstraction(1024, 0.1, 32, 9 + 3, [32, 32, 64], False)
self.sa2 = PointNetSetAbstraction(256, 0.2, 32, 64 + 3, [64, 64, 128], False)
self.sa3 = PointNetSetAbstraction(64, 0.4, 32, 128 + 3, [128, 128, 256], False)
self.sa4 = PointNetSetAbstraction(16, 0.8, 32, 256 + 3, [256, 256, 512], False)
# Feature Propogation layers
self.fp4 = PointNetFeaturePropagation(768, [256, 256])
self.fp3 = PointNetFeaturePropagation(384, [256, 256])
self.fp2 = PointNetFeaturePropagation(320, [256, 128])
self.fp1 = PointNetFeaturePropagation(128, [128, 128, 128])
self.conv1 = nn.Conv1d(128, 128, 1)
self.bn1 = nn.BatchNorm1d(128)
self.drop1 = nn.Dropout(0.5)
self.conv2 = nn.Conv1d(128, num_classes, 1)
def forward(self, xyz):
l0_points = xyz
l0_xyz = xyz[:,:3,:]
l1_xyz, l1_points = self.sa1(l0_xyz, l0_points)
l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)
l4_xyz, l4_points = self.sa4(l3_xyz, l3_points)
l3_points = self.fp4(l3_xyz, l4_xyz, l3_points, l4_points)
l2_points = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)
l1_points = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points)
l0_points = self.fp1(l0_xyz, l1_xyz, None, l1_points)
x = self.drop1(F.relu(self.bn1(self.conv1(l0_points))))
x = self.conv2(x)
x = F.log_softmax(x, dim=1)
x = x.permute(0, 2, 1)
return x, l4_points
class get_loss(nn.Module):
def __init__(self):
super(get_loss, self).__init__()
def forward(self, pred, target, trans_feat, weight):
total_loss = F.nll_loss(pred, target, weight=weight)
return total_loss
if __name__ == '__main__':
import torch
model = get_model(13)
xyz = torch.rand(6, 9, 2048)
(model(xyz))
物体分类 DataLoader
import numpy as np
import warnings
import os
from torch.utils.data import Dataset
warnings.filterwarnings('ignore')
# ModelNet40:用来训练物体形状分类(40个)。训练集有9843个点云、测试集有2468个点云。
# 点云归一化,以centroid为中心,半径为1
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
pc = pc / m
return pc
# farthest_point_sample函数完成最远点采样:
# 从一个输入点云中按照所需要的点的个数npoint采样出足够多的点,
# 并且点与点之间的距离要足够远。
# 返回结果是npoint个采样点在原始点云中的索引。
def farthest_point_sample(point, npoint):
"""
Input:
xyz: pointcloud data, [N, D]
npoint: number of samples
Return:
centroids: sampled pointcloud index, [npoint, D]
"""
N, D = point.shape
xyz = point[:,:3]
centroids = np.zeros((npoint,))
distance = np.ones((N,)) * 1e10
farthest = np.random.randint(0, N)
for i in range(npoint):
centroids[i] = farthest
centroid = xyz[farthest, :]
dist = np.sum((xyz - centroid) ** 2, -1)
mask = dist < distance
distance[mask] = dist[mask]
farthest = np.argmax(distance, -1)
point = point[centroids.astype(np.int32)]
return point
class ModelNetDataLoader(Dataset):
def __init__(self, root, npoint=1024, split='train', uniform=False, normal_channel=True, cache_size=15000):
self.root = root
self.npoints = npoint
self.uniform = uniform
self.catfile = os.path.join(self.root, 'modelnet40_shape_names.txt')
self.cat = [line.rstrip() for line in open(self.catfile)]
self.classes = dict(zip(self.cat, range(len(self.cat))))
self.normal_channel = normal_channel
shape_ids = {}
# rstrip() 删除 string 字符串末尾的指定字符(默认为空格)
shape_ids['train'] = [line.rstrip() for line in open(os.path.join(self.root, 'modelnet40_train.txt'))]
shape_ids['test'] = [line.rstrip() for line in open(os.path.join(self.root, 'modelnet40_test.txt'))]
assert (split == 'train' or split == 'test')
shape_names = ['_'.join(x.split('_')[0:-1]) for x in shape_ids[split]]
# list of (shape_name, shape_txt_file_path) tuple
self.datapath = [(shape_names[i], os.path.join(self.root, shape_names[i], shape_ids[split][i]) + '.txt') for i
in range(len(shape_ids[split]))]
print('The size of %s data is %d'%(split,len(self.datapath)))
self.cache_size = cache_size # how many data points to cache in memory
self.cache = {} # from index to (point_set, cls) tuple
def __len__(self):
return len(self.datapath)
def _get_item(self, index):
if index in self.cache:
point_set, cls = self.cache[index]
else:
fn = self.datapath[index]
cls = self.classes[self.datapath[index][0]]
cls = np.array([cls]).astype(np.int32)
point_set = np.loadtxt(fn[1], delimiter=',').astype(np.float32)
# 数据集采样npoints个点送入网络
if self.uniform:
point_set = farthest_point_sample(point_set, self.npoints)
else:
point_set = point_set[0:self.npoints,:]
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
if not self.normal_channel:
point_set = point_set[:, 0:3]
if len(self.cache) < self.cache_size:
self.cache[index] = (point_set, cls)
return point_set, cls
def __getitem__(self, index):
return self._get_item(index)
if __name__ == '__main__':
import torch
data = ModelNetDataLoader('/data/modelnet40_normal_resampled/',split='train', uniform=False, normal_channel=True,)
DataLoader = torch.utils.data.DataLoader(data, batch_size=12, shuffle=True)
for point,label in DataLoader:
print(point.shape)
print(label.shape)
部件分割 DataLoader
# *_*coding:utf-8 *_*
import os
import json
import warnings
import numpy as np
from torch.utils.data import Dataset
warnings.filterwarnings('ignore')
# ShapeNet:可以用来训练部件分割(part segmentation)。
# 训练集有14007个点云,测试集有2874个点云。
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc ** 2, axis=1)))
pc = pc / m
return pc
class PartNormalDataset(Dataset):
def __init__(self,root = './data/shapenetcore_partanno_segmentation_benchmark_v0_normal', npoints=2500, split='train', class_choice=None, normal_channel=False):
self.npoints = npoints
self.root = root
self.catfile = os.path.join(self.root, 'synsetoffset2category.txt')
self.cat = {}
self.normal_channel = normal_channel
with open(self.catfile, 'r') as f:
for line in f:
ls = line.strip().split()
self.cat[ls[0]] = ls[1]
self.cat = {k: v for k, v in self.cat.items()}
self.classes_original = dict(zip(self.cat, range(len(self.cat))))
if not class_choice is None:
self.cat = {k:v for k,v in self.cat.items() if k in class_choice}
# print(self.cat)
self.meta = {}
with open(os.path.join(self.root, 'train_test_split', 'shuffled_train_file_list.json'), 'r') as f:
train_ids = set([str(d.split('/')[2]) for d in json.load(f)])
with open(os.path.join(self.root, 'train_test_split', 'shuffled_val_file_list.json'), 'r') as f:
val_ids = set([str(d.split('/')[2]) for d in json.load(f)])
with open(os.path.join(self.root, 'train_test_split', 'shuffled_test_file_list.json'), 'r') as f:
test_ids = set([str(d.split('/')[2]) for d in json.load(f)])
for item in self.cat:
# print('category', item)
self.meta[item] = []
dir_point = os.path.join(self.root, self.cat[item])
fns = sorted(os.listdir(dir_point))
# print(fns[0][0:-4])
if split == 'trainval':
fns = [fn for fn in fns if ((fn[0:-4] in train_ids) or (fn[0:-4] in val_ids))]
elif split == 'train':
fns = [fn for fn in fns if fn[0:-4] in train_ids]
elif split == 'val':
fns = [fn for fn in fns if fn[0:-4] in val_ids]
elif split == 'test':
fns = [fn for fn in fns if fn[0:-4] in test_ids]
else:
print('Unknown split: %s. Exiting..' % (split))
exit(-1)
# print(os.path.basename(fns))
for fn in fns:
token = (os.path.splitext(os.path.basename(fn))[0])
self.meta[item].append(os.path.join(dir_point, token + '.txt'))
self.datapath = []
for item in self.cat:
for fn in self.meta[item]:
self.datapath.append((item, fn))
self.classes = {}
for i in self.cat.keys():
self.classes[i] = self.classes_original[i]
# Mapping from category ('Chair') to a list of int [10,11,12,13] as segmentation labels
self.seg_classes = {'Earphone': [16, 17, 18], 'Motorbike': [30, 31, 32, 33, 34, 35], 'Rocket': [41, 42, 43],
'Car': [8, 9, 10, 11], 'Laptop': [28, 29], 'Cap': [6, 7], 'Skateboard': [44, 45, 46],
'Mug': [36, 37], 'Guitar': [19, 20, 21], 'Bag': [4, 5], 'Lamp': [24, 25, 26, 27],
'Table': [47, 48, 49], 'Airplane': [0, 1, 2, 3], 'Pistol': [38, 39, 40],
'Chair': [12, 13, 14, 15], 'Knife': [22, 23]}
# for cat in sorted(self.seg_classes.keys()):
# print(cat, self.seg_classes[cat])
self.cache = {} # from index to (point_set, cls, seg) tuple
self.cache_size = 20000
def __getitem__(self, index):
if index in self.cache:
ppoint_set, cls, seg = self.cache[index]
else:
fn = self.datapath[index]
cat = self.datapath[index][0]
cls = self.classes[cat]
cls = np.array([cls]).astype(np.int32)
data = np.loadtxt(fn[1]).astype(np.float32)
if not self.normal_channel:
point_set = data[:, 0:3]
else:
point_set = data[:, 0:6]
seg = data[:, -1].astype(np.int32)
if len(self.cache) < self.cache_size:
self.cache[index] = (point_set, cls, seg)
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
choice = np.random.choice(len(seg), self.npoints, replace=True)
# resample
point_set = point_set[choice, :]
seg = seg[choice]
return point_set, cls, seg
def __len__(self):
return len(self.datapath)
语义分割 DataLoader
import os
import numpy as np
from torch.utils.data import Dataset
# S3DIS:可以用来训练语义分割。其中分为6个area,每个area内有若干个room场景。
# room被切割成1*1平方米的block,每个方块采样4096个点
class S3DISDataset(Dataset):
def __init__(self, split='train', data_root='trainval_fullarea', num_point=4096, test_area=5, block_size=1.0, sample_rate=1.0, transform=None):
super().__init__()
self.num_point = num_point
self.block_size = block_size
self.transform = transform
rooms = sorted(os.listdir(data_root))
rooms = [room for room in rooms if 'Area_' in room]
if split == 'train':
rooms_split = [room for room in rooms if not 'Area_{}'.format(test_area) in room]
else:
rooms_split = [room for room in rooms if 'Area_{}'.format(test_area) in room]
self.room_points, self.room_labels = [], []
self.room_coord_min, self.room_coord_max = [], []
num_point_all = []
labelweights = np.zeros(13)
for room_name in rooms_split:
room_path = os.path.join(data_root, room_name)
room_data = np.load(room_path) # xyzrgbl, N*7
points, labels = room_data[:, 0:6], room_data[:, 6] # xyzrgb, N*6; l, N
tmp, _ = np.histogram(labels, range(14))
labelweights += tmp
coord_min, coord_max = np.amin(points, axis=0)[:3], np.amax(points, axis=0)[:3]
self.room_points.append(points), self.room_labels.append(labels)
self.room_coord_min.append(coord_min), self.room_coord_max.append(coord_max)
num_point_all.append(labels.size)
labelweights = labelweights.astype(np.float32)
labelweights = labelweights / np.sum(labelweights)
self.labelweights = np.power(np.amax(labelweights) / labelweights, 1 / 3.0)
print(self.labelweights)
sample_prob = num_point_all / np.sum(num_point_all)
num_iter = int(np.sum(num_point_all) * sample_rate / num_point)
room_idxs = []
for index in range(len(rooms_split)):
room_idxs.extend([index] * int(round(sample_prob[index] * num_iter)))
self.room_idxs = np.array(room_idxs)
print("Totally {} samples in {} set.".format(len(self.room_idxs), split))
def __getitem__(self, idx):
room_idx = self.room_idxs[idx]
points = self.room_points[room_idx] # N * 6
labels = self.room_labels[room_idx] # N
N_points = points.shape[0]
while (True):
center = points[np.random.choice(N_points)][:3]
block_min = center - [self.block_size / 2.0, self.block_size / 2.0, 0]
block_max = center + [self.block_size / 2.0, self.block_size / 2.0, 0]
point_idxs = np.where((points[:, 0] >= block_min[0]) & (points[:, 0] <= block_max[0]) & (points[:, 1] >= block_min[1]) & (points[:, 1] <= block_max[1]))[0]
if point_idxs.size > 1024:
break
if point_idxs.size >= self.num_point:
selected_point_idxs = np.random.choice(point_idxs, self.num_point, replace=False)
else:
selected_point_idxs = np.random.choice(point_idxs, self.num_point, replace=True)
# normalize
selected_points = points[selected_point_idxs, :] # num_point * 6
current_points = np.zeros((self.num_point, 9)) # num_point * 9
current_points[:, 6] = selected_points[:, 0] / self.room_coord_max[room_idx][0]
current_points[:, 7] = selected_points[:, 1] / self.room_coord_max[room_idx][1]
current_points[:, 8] = selected_points[:, 2] / self.room_coord_max[room_idx][2]
selected_points[:, 0] = selected_points[:, 0] - center[0]
selected_points[:, 1] = selected_points[:, 1] - center[1]
selected_points[:, 3:6] /= 255.0
current_points[:, 0:6] = selected_points
current_labels = labels[selected_point_idxs]
if self.transform is not None:
current_points, current_labels = self.transform(current_points, current_labels)
return current_points, current_labels
def __len__(self):
return len(self.room_idxs)
class ScannetDatasetWholeScene():
# prepare to give prediction on each points
def __init__(self, root, block_points=4096, split='test', test_area=5, stride=0.5, block_size=1.0, padding=0.001):
self.block_points = block_points
self.block_size = block_size
self.padding = padding
self.root = root
self.split = split
self.stride = stride
self.scene_points_num = []
assert split in ['train', 'test']
if self.split == 'train':
self.file_list = [d for d in os.listdir(root) if d.find('Area_%d' % test_area) is -1]
else:
self.file_list = [d for d in os.listdir(root) if d.find('Area_%d' % test_area) is not -1]
self.scene_points_list = []
self.semantic_labels_list = []
self.room_coord_min, self.room_coord_max = [], []
for file in self.file_list:
data = np.load(root + file)
points = data[:, :3]
self.scene_points_list.append(data[:, :6])
self.semantic_labels_list.append(data[:, 6])
coord_min, coord_max = np.amin(points, axis=0)[:3], np.amax(points, axis=0)[:3]
self.room_coord_min.append(coord_min), self.room_coord_max.append(coord_max)
assert len(self.scene_points_list) == len(self.semantic_labels_list)
labelweights = np.zeros(13)
for seg in self.semantic_labels_list:
tmp, _ = np.histogram(seg, range(14))
self.scene_points_num.append(seg.shape[0])
labelweights += tmp
labelweights = labelweights.astype(np.float32)
labelweights = labelweights / np.sum(labelweights)
self.labelweights = np.power(np.amax(labelweights) / labelweights, 1 / 3.0)
def __getitem__(self, index):
point_set_ini = self.scene_points_list[index]
points = point_set_ini[:,:6]
labels = self.semantic_labels_list[index]
coord_min, coord_max = np.amin(points, axis=0)[:3], np.amax(points, axis=0)[:3]
grid_x = int(np.ceil(float(coord_max[0] - coord_min[0] - self.block_size) / self.stride) + 1)
grid_y = int(np.ceil(float(coord_max[1] - coord_min[1] - self.block_size) / self.stride) + 1)
data_room, label_room, sample_weight, index_room = np.array([]), np.array([]), np.array([]), np.array([])
for index_y in range(0, grid_y):
for index_x in range(0, grid_x):
s_x = coord_min[0] + index_x * self.stride
e_x = min(s_x + self.block_size, coord_max[0])
s_x = e_x - self.block_size
s_y = coord_min[1] + index_y * self.stride
e_y = min(s_y + self.block_size, coord_max[1])
s_y = e_y - self.block_size
point_idxs = np.where(
(points[:, 0] >= s_x - self.padding) & (points[:, 0] <= e_x + self.padding) & (points[:, 1] >= s_y - self.padding) & (
points[:, 1] <= e_y + self.padding))[0]
if point_idxs.size == 0:
continue
num_batch = int(np.ceil(point_idxs.size / self.block_points))
point_size = int(num_batch * self.block_points)
replace = False if (point_size - point_idxs.size <= point_idxs.size) else True
point_idxs_repeat = np.random.choice(point_idxs, point_size - point_idxs.size, replace=replace)
point_idxs = np.concatenate((point_idxs, point_idxs_repeat))
np.random.shuffle(point_idxs)
data_batch = points[point_idxs, :]
normlized_xyz = np.zeros((point_size, 3))
normlized_xyz[:, 0] = data_batch[:, 0] / coord_max[0]
normlized_xyz[:, 1] = data_batch[:, 1] / coord_max[1]
normlized_xyz[:, 2] = data_batch[:, 2] / coord_max[2]
data_batch[:, 0] = data_batch[:, 0] - (s_x + self.block_size / 2.0)
data_batch[:, 1] = data_batch[:, 1] - (s_y + self.block_size / 2.0)
data_batch[:, 3:6] /= 255.0
data_batch = np.concatenate((data_batch, normlized_xyz), axis=1)
label_batch = labels[point_idxs].astype(int)
batch_weight = self.labelweights[label_batch]
data_room = np.vstack([data_room, data_batch]) if data_room.size else data_batch
label_room = np.hstack([label_room, label_batch]) if label_room.size else label_batch
sample_weight = np.hstack([sample_weight, batch_weight]) if label_room.size else batch_weight
index_room = np.hstack([index_room, point_idxs]) if index_room.size else point_idxs
data_room = data_room.reshape((-1, self.block_points, data_room.shape[1]))
label_room = label_room.reshape((-1, self.block_points))
sample_weight = sample_weight.reshape((-1, self.block_points))
index_room = index_room.reshape((-1, self.block_points))
return data_room, label_room, sample_weight, index_room
def __len__(self):
return len(self.scene_points_list)
if __name__ == '__main__':
data_root = '/data/yxu/PointNonLocal/data/stanford_indoor3d/'
num_point, test_area, block_size, sample_rate = 4096, 5, 1.0, 0.01
point_data = S3DISDataset(split='train', data_root=data_root, num_point=num_point, test_area=test_area, block_size=block_size, sample_rate=sample_rate, transform=None)
print('point data size:', point_data.__len__())
print('point data 0 shape:', point_data.__getitem__(0)[0].shape)
print('point label 0 shape:', point_data.__getitem__(0)[1].shape)
import torch, time, random
manual_seed = 123
random.seed(manual_seed)
np.random.seed(manual_seed)
torch.manual_seed(manual_seed)
torch.cuda.manual_seed_all(manual_seed)
def worker_init_fn(worker_id):
random.seed(manual_seed + worker_id)
train_loader = torch.utils.data.DataLoader(point_data, batch_size=16, shuffle=True, num_workers=16, pin_memory=True, worker_init_fn=worker_init_fn)
for idx in range(4):
end = time.time()
for i, (input, target) in enumerate(train_loader):
print('time: {}/{}--{}'.format(i+1, len(train_loader), time.time() - end))
end = time.time()
数据增强 Data Augmentation
import numpy as np
# 归一化batch_data,使用以centroid为中心的块的坐标
def normalize_data(batch_data):
""" Normalize the batch data, use coordinates of the block centered at origin,
Input:
BxNxC array
Output:
BxNxC array
"""
B, N, C = batch_data.shape
normal_data = np.zeros((B, N, C))
for b in range(B):
pc = batch_data[b]
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc ** 2, axis=1)))
pc = pc / m
normal_data[b] = pc
return normal_data
# 打乱数据(有相应标签)
def shuffle_data(data, labels):
""" Shuffle data and labels.
Input:
data: B,N,... numpy array
label: B,... numpy array
Return:
shuffled data, label and shuffle indices
"""
# arange创建等差数列,0到最大值,也就是labels的编号
idx = np.arange(len(labels))
# 随机打乱idx
np.random.shuffle(idx)
return data[idx, ...], labels[idx], idx
# 打乱每个点云中的点顺序-用于更改FPS行为。 对整个batch使用相同的打乱索引idx。
def shuffle_points(batch_data):
""" Shuffle orders of points in each point cloud -- changes FPS behavior.
Use the same shuffling idx for the entire batch.
Input:
BxNxC array
Output:
BxNxC array
"""
idx = np.arange(batch_data.shape[1])
np.random.shuffle(idx)
return batch_data[:,idx,:]
# 随机旋转点云进行数据集增广;每个形状沿向上方向旋转
def rotate_point_cloud(batch_data):
""" Randomly rotate the point clouds to augument the dataset
rotation is per shape based along up direction
Input:
BxNx3 array, original batch of point clouds
Return:
BxNx3 array, rotated batch of point clouds
"""
# 根据batch_data的矩阵结构,构造一个元素都是0的矩阵
rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
for k in range(batch_data.shape[0]):
# 产生0~1之间的随机数,乘以2*np.pi,得到一个角度
rotation_angle = np.random.uniform() * 2 * np.pi
# 求此角度的cos和sin
cosval = np.cos(rotation_angle)
sinval = np.sin(rotation_angle)
# 然后组成一个3x3的旋转矩阵
rotation_matrix = np.array([[cosval, 0, sinval],
[0, 1, 0],
[-sinval, 0, cosval]])
# 一个shape_pc内是把batch_data切成多个3元素的数组
shape_pc = batch_data[k, ...]
# 旋转点云数据:乘上旋转矩阵
rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
return rotated_data
# 沿z轴旋转点云做数据增强
def rotate_point_cloud_z(batch_data):
""" Randomly rotate the point clouds to augument the dataset
rotation is per shape based along up direction
Input:
BxNx3 array, original batch of point clouds
Return:
BxNx3 array, rotated batch of point clouds
"""
rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
for k in range(batch_data.shape[0]):
rotation_angle = np.random.uniform() * 2 * np.pi
cosval = np.cos(rotation_angle)
sinval = np.sin(rotation_angle)
rotation_matrix = np.array([[cosval, sinval, 0],
[-sinval, cosval, 0],
[0, 0, 1]])
shape_pc = batch_data[k, ...]
rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
return rotated_data
# 旋转具有法向量信息的点云做数据增强
def rotate_point_cloud_with_normal(batch_xyz_normal):
''' Randomly rotate XYZ, normal point cloud.
Input:
batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal
Output:
B,N,6, rotated XYZ, normal point cloud
'''
for k in range(batch_xyz_normal.shape[0]):
rotation_angle = np.random.uniform() * 2 * np.pi
cosval = np.cos(rotation_angle)
sinval = np.sin(rotation_angle)
rotation_matrix = np.array([[cosval, 0, sinval],
[0, 1, 0],
[-sinval, 0, cosval]])
shape_pc = batch_xyz_normal[k,:,0:3]
shape_normal = batch_xyz_normal[k,:,3:6]
batch_xyz_normal[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
batch_xyz_normal[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), rotation_matrix)
return batch_xyz_normal
# 通过小的旋转随机扰动点云
def rotate_perturbation_point_cloud_with_normal(batch_data, angle_sigma=0.06, angle_clip=0.18):
""" Randomly perturb the point clouds by small rotations
Input:
BxNx6 array, original batch of point clouds and point normals
Return:
BxNx3 array, rotated batch of point clouds
"""
rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
for k in range(batch_data.shape[0]):
angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
Rx = np.array([[1,0,0],
[0,np.cos(angles[0]),-np.sin(angles[0])],
[0,np.sin(angles[0]),np.cos(angles[0])]])
Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
[0,1,0],
[-np.sin(angles[1]),0,np.cos(angles[1])]])
Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
[np.sin(angles[2]),np.cos(angles[2]),0],
[0,0,1]])
R = np.dot(Rz, np.dot(Ry,Rx))
shape_pc = batch_data[k,:,0:3]
shape_normal = batch_data[k,:,3:6]
rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), R)
rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), R)
return rotated_data
# 将点云沿向上方向旋转一定角度
def rotate_point_cloud_by_angle(batch_data, rotation_angle):
""" Rotate the point cloud along up direction with certain angle.
Input:
BxNx3 array, original batch of point clouds
Return:
BxNx3 array, rotated batch of point clouds
"""
rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
for k in range(batch_data.shape[0]):
#rotation_angle = np.random.uniform() * 2 * np.pi
cosval = np.cos(rotation_angle)
sinval = np.sin(rotation_angle)
rotation_matrix = np.array([[cosval, 0, sinval],
[0, 1, 0],
[-sinval, 0, cosval]])
shape_pc = batch_data[k,:,0:3]
rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
return rotated_data
# 将具有法向量信息的点云沿向上方向旋转一定角度
def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle):
""" Rotate the point cloud along up direction with certain angle.
Input:
BxNx6 array, original batch of point clouds with normal
scalar, angle of rotation
Return:
BxNx6 array, rotated batch of point clouds iwth normal
"""
rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
for k in range(batch_data.shape[0]):
#rotation_angle = np.random.uniform() * 2 * np.pi
cosval = np.cos(rotation_angle)
sinval = np.sin(rotation_angle)
rotation_matrix = np.array([[cosval, 0, sinval],
[0, 1, 0],
[-sinval, 0, cosval]])
shape_pc = batch_data[k,:,0:3]
shape_normal = batch_data[k,:,3:6]
rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1,3)), rotation_matrix)
return rotated_data
# 通过小的旋转随机扰动点云
def rotate_perturbation_point_cloud(batch_data, angle_sigma=0.06, angle_clip=0.18):
""" Randomly perturb the point clouds by small rotations
Input:
BxNx3 array, original batch of point clouds
Return:
BxNx3 array, rotated batch of point clouds
"""
rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
for k in range(batch_data.shape[0]):
angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
Rx = np.array([[1,0,0],
[0,np.cos(angles[0]),-np.sin(angles[0])],
[0,np.sin(angles[0]),np.cos(angles[0])]])
Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
[0,1,0],
[-np.sin(angles[1]),0,np.cos(angles[1])]])
Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
[np.sin(angles[2]),np.cos(angles[2]),0],
[0,0,1]])
R = np.dot(Rz, np.dot(Ry,Rx))
shape_pc = batch_data[k, ...]
rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R)
return rotated_data
# 随机抖动点。抖动是针对每个点。
def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05):
""" Randomly jitter points. jittering is per point.
Input:
BxNx3 array, original batch of point clouds
Return:
BxNx3 array, jittered batch of point clouds
"""
B, N, C = batch_data.shape
assert(clip > 0)
# 把正负clip间的正态分布的随机数加到batch_data
jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1*clip, clip)
jittered_data += batch_data
return jittered_data
# 随机移位点云。移位是针对每个点云。
def shift_point_cloud(batch_data, shift_range=0.1):
""" Randomly shift point cloud. Shift is per point cloud.
Input:
BxNx3 array, original batch of point clouds
Return:
BxNx3 array, shifted batch of point clouds
"""
B, N, C = batch_data.shape
shifts = np.random.uniform(-shift_range, shift_range, (B,3))
for batch_index in range(B):
batch_data[batch_index,:,:] += shifts[batch_index,:]
return batch_data
# 随机缩放点云。缩放是针对每个点云。
def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25):
""" Randomly scale the point cloud. Scale is per point cloud.
Input:
BxNx3 array, original batch of point clouds
Return:
BxNx3 array, scaled batch of point clouds
"""
B, N, C = batch_data.shape
scales = np.random.uniform(scale_low, scale_high, B)
for batch_index in range(B):
batch_data[batch_index,:,:] *= scales[batch_index]
return batch_data
# 随机丢弃点云中的点
def random_point_dropout(batch_pc, max_dropout_ratio=0.875):
''' batch_pc: BxNx3 '''
for b in range(batch_pc.shape[0]):
dropout_ratio = np.random.random()*max_dropout_ratio # 0~0.875
drop_idx = np.where(np.random.random((batch_pc.shape[1]))<=dropout_ratio)[0]
if len(drop_idx)>0:
batch_pc[b,drop_idx,:] = batch_pc[b,0,:] # set to the first point
return batch_pc
物体分类
训练代码
"""
Author: Benny
Date: Nov 2019
"""
from data_utils.ModelNetDataLoader import ModelNetDataLoader
import argparse # python的命令行解析的模块,内置于python,不需要安装
import numpy as np
import os
import torch
import datetime
import logging # 处理日志的模块
from pathlib import Path
from tqdm import tqdm
import sys
import provider
import importlib
import shutil
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # '/home/bai/Pointnet_Pointnet2_pytorch'
ROOT_DIR = BASE_DIR # '/home/bai/Pointnet_Pointnet2_pytorch'
sys.path.append(os.path.join(ROOT_DIR, 'models'))
def parse_args(): # 解析命令行参数
'''PARAMETERS'''
# 建立参数解析对象
parser = argparse.ArgumentParser('PointNet')
# 添加属性:给xx实例增加一个aa属性,如 xx.add_argument("aa")
parser.add_argument('--batch_size', type=int, default=24, help='batch size in training [default: 24]')
parser.add_argument('--model', default='pointnet_cls', help='model name [default: pointnet_cls]')
parser.add_argument('--epoch', default=200, type=int, help='number of epoch in training [default: 200]')
parser.add_argument('--learning_rate', default=0.001, type=float, help='learning rate in training [default: 0.001]')
parser.add_argument('--gpu', type=str, default='0', help='specify gpu device [default: 0]')
parser.add_argument('--num_point', type=int, default=1024, help='Point Number [default: 1024]')
parser.add_argument('--optimizer', type=str, default='Adam', help='optimizer for training [default: Adam]')
parser.add_argument('--log_dir', type=str, default=None, help='experiment root')
parser.add_argument('--decay_rate', type=float, default=1e-4, help='decay rate [default: 1e-4]')
parser.add_argument('--normal', action='store_true', default=False, help='Whether to use normal information [default: False]')
# 采用parser对象的parse_args函数获取解析的参数
return parser.parse_args()
def test(model, loader, num_class=40):
mean_correct = []
class_acc = np.zeros((num_class,3))
for j, data in tqdm(enumerate(loader), total=len(loader)):
points, target = data
target = target[:, 0]
points = points.transpose(2, 1)
points, target = points.cuda(), target.cuda()
classifier = model.eval()
pred, _ = classifier(points)
pred_choice = pred.data.max(1)[1]
for cat in np.unique(target.cpu()):
classacc = pred_choice[target==cat].eq(target[target==cat].long().data).cpu().sum()
class_acc[cat,0]+= classacc.item()/float(points[target==cat].size()[0])
class_acc[cat,1]+=1
correct = pred_choice.eq(target.long().data).cpu().sum()
mean_correct.append(correct.item()/float(points.size()[0]))
class_acc[:,2] = class_acc[:,0]/ class_acc[:,1]
class_acc = np.mean(class_acc[:,2])
instance_acc = np.mean(mean_correct)
return instance_acc, class_acc
def main(args):
def log_string(str):
logger.info(str)
print(str)
'''HYPER PARAMETER'''
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
'''CREATE DIR'''
timestr = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))
experiment_dir = Path('./log/')
experiment_dir.mkdir(exist_ok=True)
experiment_dir = experiment_dir.joinpath('classification')
experiment_dir.mkdir(exist_ok=True)
if args.log_dir is None:
experiment_dir = experiment_dir.joinpath(timestr)
else:
experiment_dir = experiment_dir.joinpath(args.log_dir)
# 'log/classification/pointnet2_cls_msg'
experiment_dir.mkdir(exist_ok=True)
checkpoints_dir = experiment_dir.joinpath('checkpoints/')
# 'log/classification/pointnet2_cls_msg/checkpoints'
checkpoints_dir.mkdir(exist_ok=True)
log_dir = experiment_dir.joinpath('logs/')
# 'log/classification/pointnet2_cls_msg/logs'
log_dir.mkdir(exist_ok=True)
'''LOG'''
args = parse_args()
logger = logging.getLogger("Model")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('%s/%s.txt' % (log_dir, args.model))
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
log_string('PARAMETER ...')
log_string(args)
'''DATA LOADING'''
log_string('Load dataset ...')
DATA_PATH = 'data/modelnet40_normal_resampled/'
TRAIN_DATASET = ModelNetDataLoader(root=DATA_PATH, npoint=args.num_point, split='train',
normal_channel=args.normal)
# 训练集:9843个样本
TEST_DATASET = ModelNetDataLoader(root=DATA_PATH, npoint=args.num_point, split='test',
normal_channel=args.normal)
# 测试集:2468个样本
trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=args.batch_size, shuffle=True, num_workers=4)
testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size, shuffle=False, num_workers=4)
'''MODEL LOADING'''
# 分类类别数目
num_class = 40
# import network module
MODEL = importlib.import_module(args.model)
shutil.copy('./models/%s.py' % args.model, str(experiment_dir))
shutil.copy('./models/pointnet_util.py', str(experiment_dir))
classifier = MODEL.get_model(num_class,normal_channel=args.normal).cuda()
criterion = MODEL.get_loss().cuda()
try:
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')
start_epoch = checkpoint['epoch']
classifier.load_state_dict(checkpoint['model_state_dict'])
log_string('Use pretrain model')
except:
log_string('No existing model, starting training from scratch...')
start_epoch = 0
if args.optimizer == 'Adam':
optimizer = torch.optim.Adam(
classifier.parameters(),
lr=args.learning_rate,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=args.decay_rate
)
else:
optimizer = torch.optim.SGD(classifier.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.7)
global_epoch = 0
global_step = 0
best_instance_acc = 0.0
best_class_acc = 0.0
mean_correct = []
'''TRANING'''
logger.info('Start training...')
for epoch in range(start_epoch,args.epoch):
log_string('Epoch %d (%d/%s):' % (global_epoch + 1, epoch + 1, args.epoch))
# optimizer.step()通常用在每个mini-batch之中,而scheduler.step()通常用在epoch里面,
# 但也不是绝对的,可以根据具体的需求来做。
# 只有用了optimizer.step(),模型才会更新,而scheduler.step()是对lr进行调整。
scheduler.step()
for batch_id, data in tqdm(enumerate(trainDataLoader, 0), total=len(trainDataLoader), smoothing=0.9):
points, target = data # data: (B,1024,6)
points = points.data.numpy()
# 点云预处理;数据增强
points = provider.random_point_dropout(points)
points[:,:, 0:3] = provider.random_scale_point_cloud(points[:,:, 0:3]) # (B,1024,6)
points[:,:, 0:3] = provider.shift_point_cloud(points[:,:, 0:3])
points = torch.Tensor(points)
target = target[:, 0] # B
points = points.transpose(2, 1) # (B,6,1024)
points, target = points.cuda(), target.cuda() # target shape: B
optimizer.zero_grad()
# 训练分类器
classifier = classifier.train()
pred, trans_feat = classifier(points) # pred:(B,40); trans_feat: (B, 1024,1)
loss = criterion(pred, target.long(), trans_feat)
pred_choice = pred.data.max(1)[1] # pre_choice shape: B
correct = pred_choice.eq(target.long().data).cpu().sum()
mean_correct.append(correct.item() / float(points.size()[0])) # 分母为B
loss.backward() # 反向传播(梯度计算)
optimizer.step() # 更新权重
global_step += 1
train_instance_acc = np.mean(mean_correct)
log_string('Train Instance Accuracy: %f' % train_instance_acc)
# 性能评估
with torch.no_grad():
instance_acc, class_acc = test(classifier.eval(), testDataLoader)
if (instance_acc >= best_instance_acc):
best_instance_acc = instance_acc
best_epoch = epoch + 1
if (class_acc >= best_class_acc):
best_class_acc = class_acc
log_string('Test Instance Accuracy: %f, Class Accuracy: %f'% (instance_acc, class_acc))
log_string('Best Instance Accuracy: %f, Class Accuracy: %f'% (best_instance_acc, best_class_acc))
if (instance_acc >= best_instance_acc):
logger.info('Save model...')
savepath = str(checkpoints_dir) + '/best_model.pth'
log_string('Saving at %s'% savepath)
state = {
'epoch': best_epoch,
'instance_acc': instance_acc,
'class_acc': class_acc,
'model_state_dict': classifier.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}
# 保存网络模型
torch.save(state, savepath)
global_epoch += 1
logger.info('End of training...')
if __name__ == '__main__':
args = parse_args()
main(args)
测试代码
"""
Author: Benny
Date: Nov 2019
"""
from data_utils.ModelNetDataLoader import ModelNetDataLoader
import argparse # python的命令行解析的模块,内置于python,不需要安装
import numpy as np
import os
import torch
import logging # 日志处理
from tqdm import tqdm # 进度条模块
import sys
import importlib
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # '/home/bai/Pointnet_Pointnet2_pytorch'
ROOT_DIR = BASE_DIR # '/home/bai/Pointnet_Pointnet2_pytorch'
sys.path.append(os.path.join(ROOT_DIR, 'models'))
def parse_args(): # 解析命令行参数
'''PARAMETERS'''
# 建立参数解析对象
parser = argparse.ArgumentParser('PointNet')
# 添加属性:给xx实例增加一个aa属性,如 xx.add_argument("aa")
parser.add_argument('--batch_size', type=int, default=24, help='batch size in training')
parser.add_argument('--gpu', type=str, default='0', help='specify gpu device')
parser.add_argument('--num_point', type=int, default=1024, help='Point Number [default: 1024]')
parser.add_argument('--log_dir', type=str, default='pointnet2_ssg_normal', help='Experiment root')
parser.add_argument('--normal', action='store_true', default=True, help='Whether to use normal information [default: False]')
parser.add_argument('--num_votes', type=int, default=3, help='Aggregate classification scores with voting [default: 3]')
# 采用parser对象的parse_args函数获取解析的参数
return parser.parse_args()
def test(model, loader, num_class=40, vote_num=1):
mean_correct = []
class_acc = np.zeros((num_class,3)) # (40,3)
for j, data in tqdm(enumerate(loader), total=len(loader)):
points, target = data
target = target[:, 0]
points = points.transpose(2, 1)
points, target = points.cuda(), target.cuda() # 张量shape都是默认的batch_size,即24
classifier = model.eval() # 测试时不启用 BatchNormalization 和 Dropout
vote_pool = torch.zeros(target.size()[0],num_class).cuda()
for _ in range(vote_num): # default: 3
pred, _ = classifier(points)
vote_pool += pred
pred = vote_pool/vote_num #求vote_num次数的平均
pred_choice = pred.data.max(1)[1] # pred_choice的shape: (24)
for cat in np.unique(target.cpu()):
# classacc tensor(B)
# 求类别的accuracy
classacc = pred_choice[target==cat].eq(target[target==cat].long().data).cpu().sum()
class_acc[cat,0]+= classacc.item()/float(points[target==cat].size()[0])
class_acc[cat,1]+=1
correct = pred_choice.eq(target.long().data).cpu().sum()
mean_correct.append(correct.item()/float(points.size()[0]))
# 求类别的accuracy
class_acc[:,2] = class_acc[:,0]/ class_acc[:,1]
class_acc = np.mean(class_acc[:,2])
# 求instance的accuracy
instance_acc = np.mean(mean_correct) # mean_correct list(103); 2468/24=102.83333
return instance_acc, class_acc # 都是浮点数
def main(args):
def log_string(str):
logger.info(str)
print(str)
'''HYPER PARAMETER'''
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
'''CREATE DIR'''
experiment_dir = 'log/classification/' + args.log_dir
'''LOG'''
args = parse_args()
logger = logging.getLogger("Model")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('%s/eval.txt' % experiment_dir)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
log_string('PARAMETER ...')
log_string(args)
'''DATA LOADING'''
log_string('Load dataset ...')
DATA_PATH = 'data/modelnet40_normal_resampled/'
TEST_DATASET = ModelNetDataLoader(root=DATA_PATH, npoint=args.num_point, split='test', normal_channel=args.normal)
testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size, shuffle=False, num_workers=4)
'''MODEL LOADING'''
num_class = 40
model_name = os.listdir(experiment_dir+'/logs')[0].split('.')[0]
MODEL = importlib.import_module(model_name)
classifier = MODEL.get_model(num_class,normal_channel=args.normal).cuda()
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')
classifier.load_state_dict(checkpoint['model_state_dict'])
with torch.no_grad():
instance_acc, class_acc = test(classifier.eval(), testDataLoader, vote_num=args.num_votes)
log_string('Test Instance Accuracy: %f, Class Accuracy: %f' % (instance_acc, class_acc))
if __name__ == '__main__':
args = parse_args()
main(args)
部件分割 训练代码
训练代码
"""
Author: Benny
Date: Nov 2019
"""
import argparse
import os
from data_utils.ShapeNetDataLoader import PartNormalDataset
import torch
import datetime
import logging
from pathlib import Path
import sys
import importlib
import shutil
from tqdm import tqdm
import provider
import numpy as np
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = BASE_DIR
sys.path.append(os.path.join(ROOT_DIR, 'models'))
seg_classes = {'Earphone': [16, 17, 18], 'Motorbike': [30, 31, 32, 33, 34, 35], 'Rocket': [41, 42, 43], 'Car': [8, 9, 10, 11], 'Laptop': [28, 29], 'Cap': [6, 7], 'Skateboard': [44, 45, 46], 'Mug': [36, 37], 'Guitar': [19, 20, 21], 'Bag': [4, 5], 'Lamp': [24, 25, 26, 27], 'Table': [47, 48, 49], 'Airplane': [0, 1, 2, 3], 'Pistol': [38, 39, 40], 'Chair': [12, 13, 14, 15], 'Knife': [22, 23]}
seg_label_to_cat = {} # {0:Airplane, 1:Airplane, ...49:Table}
for cat in seg_classes.keys():
for label in seg_classes[cat]:
seg_label_to_cat[label] = cat
def to_categorical(y, num_classes):
""" 1-hot encodes a tensor """
new_y = torch.eye(num_classes)[y.cpu().data.numpy(),]
if (y.is_cuda):
return new_y.cuda()
return new_y
def parse_args():
parser = argparse.ArgumentParser('Model')
parser.add_argument('--model', type=str, default='pointnet2_part_seg_msg', help='model name [default: pointnet2_part_seg_msg]')
parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]')
parser.add_argument('--epoch', default=251, type=int, help='Epoch to run [default: 251]')
parser.add_argument('--learning_rate', default=0.001, type=float, help='Initial learning rate [default: 0.001]')
parser.add_argument('--gpu', type=str, default='0', help='GPU to use [default: GPU 0]')
parser.add_argument('--optimizer', type=str, default='Adam', help='Adam or SGD [default: Adam]')
parser.add_argument('--log_dir', type=str, default=None, help='Log path [default: None]')
parser.add_argument('--decay_rate', type=float, default=1e-4, help='weight decay [default: 1e-4]')
parser.add_argument('--npoint', type=int, default=2048, help='Point Number [default: 2048]')
parser.add_argument('--normal', action='store_true', default=False, help='Whether to use normal information [default: False]')
parser.add_argument('--step_size', type=int, default=20, help='Decay step for lr decay [default: every 20 epochs]')
parser.add_argument('--lr_decay', type=float, default=0.5, help='Decay rate for lr decay [default: 0.5]')
return parser.parse_args()
def main(args):
def log_string(str):
logger.info(str)
print(str)
'''HYPER PARAMETER'''
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
'''CREATE DIR'''
timestr = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))
experiment_dir = Path('./log/')
experiment_dir.mkdir(exist_ok=True)
experiment_dir = experiment_dir.joinpath('part_seg')
experiment_dir.mkdir(exist_ok=True)
if args.log_dir is None:
experiment_dir = experiment_dir.joinpath(timestr)
else:
experiment_dir = experiment_dir.joinpath(args.log_dir)
experiment_dir.mkdir(exist_ok=True)
checkpoints_dir = experiment_dir.joinpath('checkpoints/')
checkpoints_dir.mkdir(exist_ok=True)
log_dir = experiment_dir.joinpath('logs/')
log_dir.mkdir(exist_ok=True)
'''LOG'''
args = parse_args()
logger = logging.getLogger("Model")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('%s/%s.txt' % (log_dir, args.model))
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
log_string('PARAMETER ...')
log_string(args)
root = 'data/shapenetcore_partanno_segmentation_benchmark_v0_normal/'
TRAIN_DATASET = PartNormalDataset(root = root, npoints=args.npoint, split='trainval', normal_channel=args.normal)
trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=args.batch_size,shuffle=True, num_workers=4)
TEST_DATASET = PartNormalDataset(root = root, npoints=args.npoint, split='test', normal_channel=args.normal)
testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size,shuffle=False, num_workers=4)
log_string("The number of training data is: %d" % len(TRAIN_DATASET))
log_string("The number of test data is: %d" % len(TEST_DATASET))
num_classes = 16
num_part = 50
'''MODEL LOADING'''
MODEL = importlib.import_module(args.model)
shutil.copy('models/%s.py' % args.model, str(experiment_dir))
shutil.copy('models/pointnet_util.py', str(experiment_dir))
classifier = MODEL.get_model(num_part, normal_channel=args.normal).cuda()
criterion = MODEL.get_loss().cuda()
def weights_init(m): # 权重初始化
classname = m.__class__.__name__
if classname.find('Conv2d') != -1:
torch.nn.init.xavier_normal_(m.weight.data)
torch.nn.init.constant_(m.bias.data, 0.0)
elif classname.find('Linear') != -1:
torch.nn.init.xavier_normal_(m.weight.data)
torch.nn.init.constant_(m.bias.data, 0.0)
try:
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')
start_epoch = checkpoint['epoch']
classifier.load_state_dict(checkpoint['model_state_dict'])
log_string('Use pretrain model')
except:
log_string('No existing model, starting training from scratch...')
start_epoch = 0
classifier = classifier.apply(weights_init)
if args.optimizer == 'Adam': # Adam 优化器
optimizer = torch.optim.Adam(
classifier.parameters(),
lr=args.learning_rate,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=args.decay_rate
)
else:
optimizer = torch.optim.SGD(classifier.parameters(), lr=args.learning_rate, momentum=0.9)
def bn_momentum_adjust(m, momentum):
if isinstance(m, torch.nn.BatchNorm2d) or isinstance(m, torch.nn.BatchNorm1d):
m.momentum = momentum
LEARNING_RATE_CLIP = 1e-5
MOMENTUM_ORIGINAL = 0.1
MOMENTUM_DECCAY = 0.5
MOMENTUM_DECCAY_STEP = args.step_size
best_acc = 0
global_epoch = 0
best_class_avg_iou = 0
best_inctance_avg_iou = 0
for epoch in range(start_epoch,args.epoch):
log_string('Epoch %d (%d/%s):' % (global_epoch + 1, epoch + 1, args.epoch))
'''Adjust learning rate and BN momentum'''
lr = max(args.learning_rate * (args.lr_decay ** (epoch // args.step_size)), LEARNING_RATE_CLIP)
log_string('Learning rate:%f' % lr)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
mean_correct = []
momentum = MOMENTUM_ORIGINAL * (MOMENTUM_DECCAY ** (epoch // MOMENTUM_DECCAY_STEP))
if momentum < 0.01:
momentum = 0.01
print('BN momentum updated to: %f' % momentum)
classifier = classifier.apply(lambda x: bn_momentum_adjust(x,momentum))
'''learning one epoch'''
for i, data in tqdm(enumerate(trainDataLoader), total=len(trainDataLoader), smoothing=0.9):
points, label, target = data
points = points.data.numpy()
# 数据增强
points[:,:, 0:3] = provider.random_scale_point_cloud(points[:,:, 0:3])
points[:,:, 0:3] = provider.shift_point_cloud(points[:,:, 0:3])
points = torch.Tensor(points)
points, label, target = points.float().cuda(),label.long().cuda(), target.long().cuda()
points = points.transpose(2, 1)
optimizer.zero_grad()
classifier = classifier.train() # 训练
seg_pred, trans_feat = classifier(points, to_categorical(label, num_classes))
seg_pred = seg_pred.contiguous().view(-1, num_part)
target = target.view(-1, 1)[:, 0]
pred_choice = seg_pred.data.max(1)[1]
correct = pred_choice.eq(target.data).cpu().sum()
mean_correct.append(correct.item() / (args.batch_size * args.npoint))
loss = criterion(seg_pred, target, trans_feat) # 求loss
loss.backward() # 反向传播
optimizer.step() # 参数更新
train_instance_acc = np.mean(mean_correct)
log_string('Train accuracy is: %.5f' % train_instance_acc)
with torch.no_grad():
test_metrics = {}
total_correct = 0
total_seen = 0
total_seen_class = [0 for _ in range(num_part)] # list:50;元素初始化为0
total_correct_class = [0 for _ in range(num_part)] # list:50;元素初始化为0
shape_ious = {cat: [] for cat in seg_classes.keys()} # dict:16
seg_label_to_cat = {} # {0:Airplane, 1:Airplane, ...49:Table}
for cat in seg_classes.keys(): # 16个物体类别
for label in seg_classes[cat]: # 50个部件类别
seg_label_to_cat[label] = cat
for batch_id, (points, label, target) in tqdm(enumerate(testDataLoader), total=len(testDataLoader), smoothing=0.9):
cur_batch_size, NUM_POINT, _ = points.size() # cur_batch_size:24 NUM_POINT:2048
# points, label, target:2维度tensor;shape is 24,
points, label, target = points.float().cuda(), label.long().cuda(), target.long().cuda()
points = points.transpose(2, 1)
classifier = classifier.eval()
seg_pred, _ = classifier(points, to_categorical(label, num_classes))
cur_pred_val = seg_pred.cpu().data.numpy() # (24,2048,50)
cur_pred_val_logits = cur_pred_val
cur_pred_val = np.zeros((cur_batch_size, NUM_POINT)).astype(np.int32)
target = target.cpu().data.numpy() # (24,2048)
for i in range(cur_batch_size): # cur_batch_size = 24
cat = seg_label_to_cat[target[i, 0]] # 类别字符串,如‘Airplane’
logits = cur_pred_val_logits[i, :, :] # (2048,50)
cur_pred_val[i, :] = np.argmax(logits[:, seg_classes[cat]], 1) + seg_classes[cat][0]
correct = np.sum(cur_pred_val == target) # 正确分类的点数
total_correct += correct # 累计正确分类的点数
total_seen += (cur_batch_size * NUM_POINT) # 累计测试的点数
for l in range(num_part):
total_seen_class[l] += np.sum(target == l) # list:50
total_correct_class[l] += (np.sum((cur_pred_val == l) & (target == l))) # list:50
for i in range(cur_batch_size):
segp = cur_pred_val[i, :] # (2048)
segl = target[i, :] # (2048)
cat = seg_label_to_cat[segl[0]] # 类别字符串,如‘Airplane’
part_ious = [0.0 for _ in range(len(seg_classes[cat]))] # list:4(对于飞机而言)
for l in seg_classes[cat]:
if (np.sum(segl == l) == 0) and (
np.sum(segp == l) == 0): # part is not present, no prediction as well
part_ious[l - seg_classes[cat][0]] = 1.0
else:
part_ious[l - seg_classes[cat][0]] = np.sum((segl == l) & (segp == l)) / float(
np.sum((segl == l) | (segp == l)))
shape_ious[cat].append(np.mean(part_ious)) # dict:16
all_shape_ious = []
for cat in shape_ious.keys():
for iou in shape_ious[cat]:
all_shape_ious.append(iou)
shape_ious[cat] = np.mean(shape_ious[cat])
mean_shape_ious = np.mean(list(shape_ious.values())) # 浮点数
test_metrics['accuracy'] = total_correct / float(total_seen)
test_metrics['class_avg_accuracy'] = np.mean(
np.array(total_correct_class) / np.array(total_seen_class, dtype=np.float))
for cat in sorted(shape_ious.keys()):
log_string('eval mIoU of %s %f' % (cat + ' ' * (14 - len(cat)), shape_ious[cat]))
test_metrics['class_avg_iou'] = mean_shape_ious
test_metrics['inctance_avg_iou'] = np.mean(all_shape_ious)
log_string('Epoch %d test Accuracy: %f Class avg mIOU: %f Inctance avg mIOU: %f' % (
epoch+1, test_metrics['accuracy'],test_metrics['class_avg_iou'],test_metrics['inctance_avg_iou']))
if (test_metrics['inctance_avg_iou'] >= best_inctance_avg_iou):
logger.info('Save model...')
savepath = str(checkpoints_dir) + '/best_model.pth'
log_string('Saving at %s'% savepath)
state = {
'epoch': epoch,
'train_acc': train_instance_acc,
'test_acc': test_metrics['accuracy'],
'class_avg_iou': test_metrics['class_avg_iou'],
'inctance_avg_iou': test_metrics['inctance_avg_iou'],
'model_state_dict': classifier.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}
torch.save(state, savepath) # 保存模型
log_string('Saving model....')
if test_metrics['accuracy'] > best_acc:
best_acc = test_metrics['accuracy']
if test_metrics['class_avg_iou'] > best_class_avg_iou:
best_class_avg_iou = test_metrics['class_avg_iou']
if test_metrics['inctance_avg_iou'] > best_inctance_avg_iou:
best_inctance_avg_iou = test_metrics['inctance_avg_iou']
log_string('Best accuracy is: %.5f'%best_acc)
log_string('Best class avg mIOU is: %.5f'%best_class_avg_iou)
log_string('Best inctance avg mIOU is: %.5f'%best_inctance_avg_iou)
global_epoch+=1
if __name__ == '__main__':
args = parse_args()
main(args)
测试代码
"""
Author: Benny
Date: Nov 2019
"""
import argparse
import os
from data_utils.ShapeNetDataLoader import PartNormalDataset
import torch
import logging
import sys
import importlib
from tqdm import tqdm
import numpy as np
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = BASE_DIR
sys.path.append(os.path.join(ROOT_DIR, 'models'))
seg_classes = {'Earphone': [16, 17, 18], 'Motorbike': [30, 31, 32, 33, 34, 35], 'Rocket': [41, 42, 43], 'Car': [8, 9, 10, 11], 'Laptop': [28, 29], 'Cap': [6, 7], 'Skateboard': [44, 45, 46], 'Mug': [36, 37], 'Guitar': [19, 20, 21], 'Bag': [4, 5], 'Lamp': [24, 25, 26, 27], 'Table': [47, 48, 49], 'Airplane': [0, 1, 2, 3], 'Pistol': [38, 39, 40], 'Chair': [12, 13, 14, 15], 'Knife': [22, 23]}
seg_label_to_cat = {} # 字典 {0:Airplane, 1:Airplane, ...49:Table}
for cat in seg_classes.keys():
for label in seg_classes[cat]:
seg_label_to_cat[label] = cat
def to_categorical(y, num_classes): # num_classes = 16
""" 1-hot encodes a tensor """
new_y = torch.eye(num_classes)[y.cpu().data.numpy(),]
if (y.is_cuda):
return new_y.cuda()
return new_y
def parse_args():
'''PARAMETERS'''
parser = argparse.ArgumentParser('PointNet')
parser.add_argument('--batch_size', type=int, default=24, help='batch size in testing [default: 24]')
parser.add_argument('--gpu', type=str, default='0', help='specify gpu device [default: 0]')
parser.add_argument('--num_point', type=int, default=2048, help='Point Number [default: 2048]')
parser.add_argument('--log_dir', type=str, default='pointnet2_part_seg_ssg', help='Experiment root')
parser.add_argument('--normal', action='store_true', default=False, help='Whether to use normal information [default: False]')
parser.add_argument('--num_votes', type=int, default=3, help='Aggregate segmentation scores with voting [default: 3]')
return parser.parse_args()
def main(args):
def log_string(str):
logger.info(str)
print(str)
'''HYPER PARAMETER'''
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
experiment_dir = 'log/part_seg/' + args.log_dir
'''LOG'''
args = parse_args()
logger = logging.getLogger("Model")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('%s/eval.txt' % experiment_dir)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
log_string('PARAMETER ...')
log_string(args)
root = 'data/shapenetcore_partanno_segmentation_benchmark_v0_normal/'
TEST_DATASET = PartNormalDataset(root = root, npoints=args.num_point, split='test', normal_channel=args.normal)
testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size,shuffle=False, num_workers=4)
log_string("The number of test data is: %d" % len(TEST_DATASET))
num_classes = 16
num_part = 50
'''MODEL LOADING'''
model_name = os.listdir(experiment_dir+'/logs')[0].split('.')[0]
MODEL = importlib.import_module(model_name)
classifier = MODEL.get_model(num_part, normal_channel=args.normal).cuda()
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')
classifier.load_state_dict(checkpoint['model_state_dict'])
with torch.no_grad():
test_metrics = {}
total_correct = 0
total_seen = 0
total_seen_class = [0 for _ in range(num_part)] # list:50;元素初始化为0
total_correct_class = [0 for _ in range(num_part)] # list:50;元素初始化为0
shape_ious = {cat: [] for cat in seg_classes.keys()} # dict:16个类别
seg_label_to_cat = {} # {0:Airplane, 1:Airplane, ...49:Table}
for cat in seg_classes.keys(): # 16个物体类别
for label in seg_classes[cat]: # 50个部件类别
seg_label_to_cat[label] = cat
for batch_id, (points, label, target) in tqdm(enumerate(testDataLoader), total=len(testDataLoader), smoothing=0.9):
batchsize, num_point, _ = points.size()
cur_batch_size, NUM_POINT, _ = points.size() # cur_batch_size:24 NUM_POINT:2048
# points, label, target:2维度tensor;shape is (24,)
points, label, target = points.float().cuda(), label.long().cuda(), target.long().cuda()
points = points.transpose(2, 1)
classifier = classifier.eval()
vote_pool = torch.zeros(target.size()[0], target.size()[1], num_part).cuda()
for _ in range(args.num_votes):
seg_pred, _ = classifier(points, to_categorical(label, num_classes)) # 推理
vote_pool += seg_pred
seg_pred = vote_pool / args.num_votes
cur_pred_val = seg_pred.cpu().data.numpy() # (24,2048,50)
cur_pred_val_logits = cur_pred_val
cur_pred_val = np.zeros((cur_batch_size, NUM_POINT)).astype(np.int32)
target = target.cpu().data.numpy() # (24,2048)
for i in range(cur_batch_size): # cur_batch_size = 24
cat = seg_label_to_cat[target[i, 0]] # 类别字符串,如‘Airplane’
logits = cur_pred_val_logits[i, :, :] # (2048,50)
cur_pred_val[i, :] = np.argmax(logits[:, seg_classes[cat]], 1) + seg_classes[cat][0]
correct = np.sum(cur_pred_val == target) # 正确分类的点数
total_correct += correct # 累计正确分类的点数
total_seen += (cur_batch_size * NUM_POINT) # 累计测试的点数
for l in range(num_part):
total_seen_class[l] += np.sum(target == l) # list:50
total_correct_class[l] += (np.sum((cur_pred_val == l) & (target == l))) # list:50
for i in range(cur_batch_size):
segp = cur_pred_val[i, :] # (2048)
segl = target[i, :] # (2048)
cat = seg_label_to_cat[segl[0]] # 类别字符串,如‘Airplane’
# 计算part IoU
part_ious = [0.0 for _ in range(len(seg_classes[cat]))] # list:4(对于飞机而言)
for l in seg_classes[cat]:
if (np.sum(segl == l) == 0) and (
np.sum(segp == l) == 0): # part is not present, no prediction as well
part_ious[l - seg_classes[cat][0]] = 1.0
else:
part_ious[l - seg_classes[cat][0]] = np.sum((segl == l) & (segp == l)) / float(
np.sum((segl == l) | (segp == l))) # 计算交并比
#计算类别的shape IoU
shape_ious[cat].append(np.mean(part_ious)) # dict:16个类别
all_shape_ious = []
for cat in shape_ious.keys():
for iou in shape_ious[cat]:
all_shape_ious.append(iou)
shape_ious[cat] = np.mean(shape_ious[cat])
mean_shape_ious = np.mean(list(shape_ious.values())) # mean shape IoU
test_metrics['accuracy'] = total_correct / float(total_seen) # 平均accuracy
test_metrics['class_avg_accuracy'] = np.mean(
np.array(total_correct_class) / np.array(total_seen_class, dtype=np.float)) # 50个类别平均accuracy
for cat in sorted(shape_ious.keys()):
log_string('eval mIoU of %s %f' % (cat + ' ' * (14 - len(cat)), shape_ious[cat]))
test_metrics['class_avg_iou'] = mean_shape_ious # 类别的平均IoU
test_metrics['inctance_avg_iou'] = np.mean(all_shape_ious) # instance平均IoU
log_string('Accuracy is: %.5f'%test_metrics['accuracy'])
log_string('Class avg accuracy is: %.5f'%test_metrics['class_avg_accuracy'])
log_string('Class avg mIOU is: %.5f'%test_metrics['class_avg_iou'])
log_string('Inctance avg mIOU is: %.5f'%test_metrics['inctance_avg_iou'])
if __name__ == '__main__':
args = parse_args()
main(args)
语义分割 训练代码
训练代码
"""
Author: Benny
Date: Nov 2019
"""
import argparse
import os
from data_utils.S3DISDataLoader import S3DISDataset
import torch
import datetime
import logging
from pathlib import Path
import sys
import importlib
import shutil
from tqdm import tqdm
import provider
import numpy as np
import time
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = BASE_DIR
sys.path.append(os.path.join(ROOT_DIR, 'models'))
classes = ['ceiling','floor','wall','beam','column','window','door','table','chair','sofa','bookcase','board','clutter'] # 13个类别
class2label = {cls: i for i,cls in enumerate(classes)}
seg_classes = class2label
seg_label_to_cat = {}
for i,cat in enumerate(seg_classes.keys()):
seg_label_to_cat[i] = cat
def parse_args():
parser = argparse.ArgumentParser('Model')
parser.add_argument('--model', type=str, default='pointnet_sem_seg', help='model name [default: pointnet_sem_seg]')
parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]')
parser.add_argument('--epoch', default=128, type=int, help='Epoch to run [default: 128]')
parser.add_argument('--learning_rate', default=0.001, type=float, help='Initial learning rate [default: 0.001]')
parser.add_argument('--gpu', type=str, default='0', help='GPU to use [default: GPU 0]')
parser.add_argument('--optimizer', type=str, default='Adam', help='Adam or SGD [default: Adam]')
parser.add_argument('--log_dir', type=str, default=None, help='Log path [default: None]')
parser.add_argument('--decay_rate', type=float, default=1e-4, help='weight decay [default: 1e-4]')
parser.add_argument('--npoint', type=int, default=4096, help='Point Number [default: 4096]')
parser.add_argument('--step_size', type=int, default=10, help='Decay step for lr decay [default: every 10 epochs]')
parser.add_argument('--lr_decay', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]')
parser.add_argument('--test_area', type=int, default=5, help='Which area to use for test, option: 1-6 [default: 5]')
return parser.parse_args()
def main(args):
def log_string(str):
logger.info(str)
print(str)
'''HYPER PARAMETER'''
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
'''CREATE DIR'''
timestr = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))
experiment_dir = Path('./log/')
experiment_dir.mkdir(exist_ok=True)
experiment_dir = experiment_dir.joinpath('sem_seg')
experiment_dir.mkdir(exist_ok=True)
if args.log_dir is None:
experiment_dir = experiment_dir.joinpath(timestr)
else:
experiment_dir = experiment_dir.joinpath(args.log_dir)
experiment_dir.mkdir(exist_ok=True)
checkpoints_dir = experiment_dir.joinpath('checkpoints/')
checkpoints_dir.mkdir(exist_ok=True)
log_dir = experiment_dir.joinpath('logs/')
log_dir.mkdir(exist_ok=True)
'''LOG'''
args = parse_args()
logger = logging.getLogger("Model")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('%s/%s.txt' % (log_dir, args.model))
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
log_string('PARAMETER ...')
log_string(args)
root = 'data/stanford_indoor3d/'
NUM_CLASSES = 13
NUM_POINT = args.npoint
BATCH_SIZE = args.batch_size
print("start loading training data ...")
TRAIN_DATASET = S3DISDataset(split='train', data_root=root, num_point=NUM_POINT, test_area=args.test_area, block_size=1.0, sample_rate=1.0, transform=None)
print("start loading test data ...")
TEST_DATASET = S3DISDataset(split='test', data_root=root, num_point=NUM_POINT, test_area=args.test_area, block_size=1.0, sample_rate=1.0, transform=None)
trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True, drop_last=True, worker_init_fn = lambda x: np.random.seed(x+int(time.time())))
testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True, drop_last=True)
weights = torch.Tensor(TRAIN_DATASET.labelweights).cuda()
log_string("The number of training data is: %d" % len(TRAIN_DATASET))
log_string("The number of test data is: %d" % len(TEST_DATASET))
'''MODEL LOADING'''
MODEL = importlib.import_module(args.model)
shutil.copy('models/%s.py' % args.model, str(experiment_dir))
shutil.copy('models/pointnet_util.py', str(experiment_dir))
classifier = MODEL.get_model(NUM_CLASSES).cuda()
criterion = MODEL.get_loss().cuda()
def weights_init(m): # 权重初始化
classname = m.__class__.__name__
if classname.find('Conv2d') != -1:
torch.nn.init.xavier_normal_(m.weight.data)
torch.nn.init.constant_(m.bias.data, 0.0)
elif classname.find('Linear') != -1:
torch.nn.init.xavier_normal_(m.weight.data)
torch.nn.init.constant_(m.bias.data, 0.0)
try:
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')
start_epoch = checkpoint['epoch']
classifier.load_state_dict(checkpoint['model_state_dict'])
log_string('Use pretrain model')
except:
log_string('No existing model, starting training from scratch...')
start_epoch = 0
classifier = classifier.apply(weights_init)
if args.optimizer == 'Adam':
optimizer = torch.optim.Adam(
classifier.parameters(),
lr=args.learning_rate,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=args.decay_rate
)
else:
optimizer = torch.optim.SGD(classifier.parameters(), lr=args.learning_rate, momentum=0.9)
def bn_momentum_adjust(m, momentum): # 调节BN的momentum
if isinstance(m, torch.nn.BatchNorm2d) or isinstance(m, torch.nn.BatchNorm1d):
m.momentum = momentum
LEARNING_RATE_CLIP = 1e-5
MOMENTUM_ORIGINAL = 0.1
MOMENTUM_DECCAY = 0.5
MOMENTUM_DECCAY_STEP = args.step_size
global_epoch = 0
best_iou = 0
for epoch in range(start_epoch,args.epoch):
'''Train on chopped scenes'''
log_string('**** Epoch %d (%d/%s) ****' % (global_epoch + 1, epoch + 1, args.epoch))
lr = max(args.learning_rate * (args.lr_decay ** (epoch // args.step_size)), LEARNING_RATE_CLIP)
log_string('Learning rate:%f' % lr)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
momentum = MOMENTUM_ORIGINAL * (MOMENTUM_DECCAY ** (epoch // MOMENTUM_DECCAY_STEP))
if momentum < 0.01:
momentum = 0.01
print('BN momentum updated to: %f' % momentum)
classifier = classifier.apply(lambda x: bn_momentum_adjust(x,momentum))
num_batches = len(trainDataLoader)
total_correct = 0
total_seen = 0
loss_sum = 0
for i, data in tqdm(enumerate(trainDataLoader), total=len(trainDataLoader), smoothing=0.9):
points, target = data
points = points.data.numpy()
# 数据增强
points[:,:, :3] = provider.rotate_point_cloud_z(points[:,:, :3])
points = torch.Tensor(points)
points, target = points.float().cuda(),target.long().cuda()
points = points.transpose(2, 1)
optimizer.zero_grad()
classifier = classifier.train() # 训练
seg_pred, trans_feat = classifier(points) # 推理
seg_pred = seg_pred.contiguous().view(-1, NUM_CLASSES)
batch_label = target.view(-1, 1)[:, 0].cpu().data.numpy()
target = target.view(-1, 1)[:, 0]
loss = criterion(seg_pred, target, trans_feat, weights) # 求loss
loss.backward() # 反向传播
optimizer.step() # 更新参数
pred_choice = seg_pred.cpu().data.max(1)[1].numpy()
correct = np.sum(pred_choice == batch_label)
total_correct += correct
total_seen += (BATCH_SIZE * NUM_POINT)
loss_sum += loss
log_string('Training mean loss: %f' % (loss_sum / num_batches))
log_string('Training accuracy: %f' % (total_correct / float(total_seen)))
if epoch % 5 == 0:
logger.info('Save model...')
savepath = str(checkpoints_dir) + '/model.pth'
log_string('Saving at %s' % savepath)
state = {
'epoch': epoch,
'model_state_dict': classifier.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}
torch.save(state, savepath) # 保存模型
log_string('Saving model....')
'''Evaluate on chopped scenes'''
with torch.no_grad():
num_batches = len(testDataLoader)
total_correct = 0
total_seen = 0
loss_sum = 0
labelweights = np.zeros(NUM_CLASSES)
total_seen_class = [0 for _ in range(NUM_CLASSES)]
total_correct_class = [0 for _ in range(NUM_CLASSES)]
total_iou_deno_class = [0 for _ in range(NUM_CLASSES)]
log_string('---- EPOCH %03d EVALUATION ----' % (global_epoch + 1))
for i, data in tqdm(enumerate(testDataLoader), total=len(testDataLoader), smoothing=0.9):
points, target = data
points = points.data.numpy()
points = torch.Tensor(points)
points, target = points.float().cuda(), target.long().cuda()
points = points.transpose(2, 1)
classifier = classifier.eval() # 推理
seg_pred, trans_feat = classifier(points)
pred_val = seg_pred.contiguous().cpu().data.numpy()
seg_pred = seg_pred.contiguous().view(-1, NUM_CLASSES)
batch_label = target.cpu().data.numpy()
target = target.view(-1, 1)[:, 0]
loss = criterion(seg_pred, target, trans_feat, weights)
loss_sum += loss
pred_val = np.argmax(pred_val, 2)
correct = np.sum((pred_val == batch_label))
total_correct += correct
total_seen += (BATCH_SIZE * NUM_POINT)
tmp, _ = np.histogram(batch_label, range(NUM_CLASSES + 1))
labelweights += tmp
for l in range(NUM_CLASSES):
total_seen_class[l] += np.sum((batch_label == l) )
total_correct_class[l] += np.sum((pred_val == l) & (batch_label == l) )
total_iou_deno_class[l] += np.sum(((pred_val == l) | (batch_label == l)) )
labelweights = labelweights.astype(np.float32) / np.sum(labelweights.astype(np.float32))
mIoU = np.mean(np.array(total_correct_class) / (np.array(total_iou_deno_class, dtype=np.float) + 1e-6))
log_string('eval mean loss: %f' % (loss_sum / float(num_batches)))
log_string('eval point avg class IoU: %f' % (mIoU))
log_string('eval point accuracy: %f' % (total_correct / float(total_seen)))
log_string('eval point avg class acc: %f' % (
np.mean(np.array(total_correct_class) / (np.array(total_seen_class, dtype=np.float) + 1e-6))))
iou_per_class_str = '------- IoU --------\n'
for l in range(NUM_CLASSES):
iou_per_class_str += 'class %s weight: %.3f, IoU: %.3f \n' % (
seg_label_to_cat[l] + ' ' * (14 - len(seg_label_to_cat[l])), labelweights[l - 1],
total_correct_class[l] / float(total_iou_deno_class[l]))
log_string(iou_per_class_str)
log_string('Eval mean loss: %f' % (loss_sum / num_batches))
log_string('Eval accuracy: %f' % (total_correct / float(total_seen)))
if mIoU >= best_iou:
best_iou = mIoU
logger.info('Save model...')
savepath = str(checkpoints_dir) + '/best_model.pth'
log_string('Saving at %s' % savepath)
state = {
'epoch': epoch,
'class_avg_iou': mIoU,
'model_state_dict': classifier.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}
torch.save(state, savepath)
log_string('Saving model....')
log_string('Best mIoU: %f' % best_iou)
global_epoch += 1
if __name__ == '__main__':
args = parse_args()
main(args)
测试代码
"""
Author: Benny
Date: Nov 2019
"""
import argparse
import os
from data_utils.S3DISDataLoader import ScannetDatasetWholeScene
from data_utils.indoor3d_util import g_label2color
import torch
import logging
from pathlib import Path
import sys
import importlib
from tqdm import tqdm
import provider
import numpy as np
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = BASE_DIR
sys.path.append(os.path.join(ROOT_DIR, 'models'))
classes = ['ceiling','floor','wall','beam','column','window','door','table','chair','sofa','bookcase','board','clutter'] # 13个类别
class2label = {cls: i for i,cls in enumerate(classes)} # 字典
seg_classes = class2label
seg_label_to_cat = {}
for i,cat in enumerate(seg_classes.keys()):
seg_label_to_cat[i] = cat
# k-fold交叉验证:6-fold:训练集5个区域,测试集1个区域,防止过拟合的常用手段
def parse_args():
'''PARAMETERS'''
parser = argparse.ArgumentParser('Model')
parser.add_argument('--batch_size', type=int, default=32, help='batch size in testing [default: 32]')
parser.add_argument('--gpu', type=str, default='0', help='specify gpu device')
parser.add_argument('--num_point', type=int, default=4096, help='Point Number [default: 4096]')
parser.add_argument('--log_dir', type=str, default='pointnet2_sem_seg', help='Experiment root')
parser.add_argument('--visual', action='store_true', default=False, help='Whether visualize result [default: False]')
parser.add_argument('--test_area', type=int, default=5, help='Which area to use for test, option: 1-6 [default: 5]')
parser.add_argument('--num_votes', type=int, default=5, help='Aggregate segmentation scores with voting [default: 5]')
return parser.parse_args()
def add_vote(vote_label_pool, point_idx, pred_label, weight):
B = pred_label.shape[0]
N = pred_label.shape[1]
for b in range(B):
for n in range(N):
if weight[b,n]:
vote_label_pool[int(point_idx[b, n]), int(pred_label[b, n])] += 1
return vote_label_pool
def main(args):
def log_string(str):
logger.info(str)
print(str)
'''HYPER PARAMETER'''
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
experiment_dir = 'log/sem_seg/' + args.log_dir
visual_dir = experiment_dir + '/visual/'
visual_dir = Path(visual_dir)
visual_dir.mkdir(exist_ok=True)
'''LOG'''
args = parse_args()
logger = logging.getLogger("Model")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('%s/eval.txt' % experiment_dir)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
log_string('PARAMETER ...')
log_string(args)
NUM_CLASSES = 13
BATCH_SIZE = args.batch_size
NUM_POINT = args.num_point
root = 'data/stanford_indoor3d/'
TEST_DATASET_WHOLE_SCENE = ScannetDatasetWholeScene(root, split='test', test_area=args.test_area, block_points=NUM_POINT)
log_string("The number of test data is: %d" % len(TEST_DATASET_WHOLE_SCENE))
'''MODEL LOADING'''
model_name = os.listdir(experiment_dir+'/logs')[0].split('.')[0]
MODEL = importlib.import_module(model_name)
classifier = MODEL.get_model(NUM_CLASSES).cuda()
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')
classifier.load_state_dict(checkpoint['model_state_dict'])
with torch.no_grad():
scene_id = TEST_DATASET_WHOLE_SCENE.file_list
scene_id = [x[:-4] for x in scene_id]
num_batches = len(TEST_DATASET_WHOLE_SCENE)
total_seen_class = [0 for _ in range(NUM_CLASSES)]
total_correct_class = [0 for _ in range(NUM_CLASSES)]
total_iou_deno_class = [0 for _ in range(NUM_CLASSES)]
log_string('---- EVALUATION WHOLE SCENE----')
for batch_idx in range(num_batches):
print("visualize [%d/%d] %s ..." % (batch_idx+1, num_batches, scene_id[batch_idx]))
total_seen_class_tmp = [0 for _ in range(NUM_CLASSES)]
total_correct_class_tmp = [0 for _ in range(NUM_CLASSES)]
total_iou_deno_class_tmp = [0 for _ in range(NUM_CLASSES)]
if args.visual:
fout = open(os.path.join(visual_dir, scene_id[batch_idx] + '_pred.obj'), 'w')
fout_gt = open(os.path.join(visual_dir, scene_id[batch_idx] + '_gt.obj'), 'w')
whole_scene_data = TEST_DATASET_WHOLE_SCENE.scene_points_list[batch_idx]
whole_scene_label = TEST_DATASET_WHOLE_SCENE.semantic_labels_list[batch_idx]
vote_label_pool = np.zeros((whole_scene_label.shape[0], NUM_CLASSES))
for _ in tqdm(range(args.num_votes), total=args.num_votes):
scene_data, scene_label, scene_smpw, scene_point_index = TEST_DATASET_WHOLE_SCENE[batch_idx]
num_blocks = scene_data.shape[0]
s_batch_num = (num_blocks + BATCH_SIZE - 1) // BATCH_SIZE
batch_data = np.zeros((BATCH_SIZE, NUM_POINT, 9))
batch_label = np.zeros((BATCH_SIZE, NUM_POINT))
batch_point_index = np.zeros((BATCH_SIZE, NUM_POINT))
batch_smpw = np.zeros((BATCH_SIZE, NUM_POINT))
for sbatch in range(s_batch_num):
start_idx = sbatch * BATCH_SIZE
end_idx = min((sbatch + 1) * BATCH_SIZE, num_blocks)
real_batch_size = end_idx - start_idx
batch_data[0:real_batch_size, ...] = scene_data[start_idx:end_idx, ...]
batch_label[0:real_batch_size, ...] = scene_label[start_idx:end_idx, ...]
batch_point_index[0:real_batch_size, ...] = scene_point_index[start_idx:end_idx, ...]
batch_smpw[0:real_batch_size, ...] = scene_smpw[start_idx:end_idx, ...]
batch_data[:, :, 3:6] /= 1.0
torch_data = torch.Tensor(batch_data)
torch_data= torch_data.float().cuda()
torch_data = torch_data.transpose(2, 1)
seg_pred, _ = classifier(torch_data) # 推理
batch_pred_label = seg_pred.contiguous().cpu().data.max(2)[1].numpy()
vote_label_pool = add_vote(vote_label_pool, batch_point_index[0:real_batch_size, ...],
batch_pred_label[0:real_batch_size, ...],
batch_smpw[0:real_batch_size, ...])
pred_label = np.argmax(vote_label_pool, 1) # 投票
for l in range(NUM_CLASSES):
total_seen_class_tmp[l] += np.sum((whole_scene_label == l))
total_correct_class_tmp[l] += np.sum((pred_label == l) & (whole_scene_label == l))
total_iou_deno_class_tmp[l] += np.sum(((pred_label == l) | (whole_scene_label == l)))
total_seen_class[l] += total_seen_class_tmp[l]
total_correct_class[l] += total_correct_class_tmp[l]
total_iou_deno_class[l] += total_iou_deno_class_tmp[l]
iou_map = np.array(total_correct_class_tmp) / (np.array(total_iou_deno_class_tmp, dtype=np.float) + 1e-6)
print(iou_map)
arr = np.array(total_seen_class_tmp)
tmp_iou = np.mean(iou_map[arr != 0])
log_string('Mean IoU of %s: %.4f' % (scene_id[batch_idx], tmp_iou))
print('----------------------------')
filename = os.path.join(visual_dir, scene_id[batch_idx] + '.txt')
with open(filename, 'w') as pl_save:
for i in pred_label:
pl_save.write(str(int(i)) + '\n')
pl_save.close()
for i in range(whole_scene_label.shape[0]):
color = g_label2color[pred_label[i]]
color_gt = g_label2color[whole_scene_label[i]]
if args.visual:
fout.write('v %f %f %f %d %d %d\n' % (
whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color[0], color[1],
color[2]))
fout_gt.write(
'v %f %f %f %d %d %d\n' % (
whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color_gt[0],
color_gt[1], color_gt[2]))
if args.visual:
fout.close()
fout_gt.close()
IoU = np.array(total_correct_class) / (np.array(total_iou_deno_class, dtype=np.float) + 1e-6)
iou_per_class_str = '------- IoU --------\n'
for l in range(NUM_CLASSES):
iou_per_class_str += 'class %s, IoU: %.3f \n' % (
seg_label_to_cat[l] + ' ' * (14 - len(seg_label_to_cat[l])),
total_correct_class[l] / float(total_iou_deno_class[l]))
log_string(iou_per_class_str)
log_string('eval point avg class IoU: %f' % np.mean(IoU))
log_string('eval whole scene point avg class acc: %f' % (
np.mean(np.array(total_correct_class) / (np.array(total_seen_class, dtype=np.float) + 1e-6))))
log_string('eval whole scene point accuracy: %f' % (
np.sum(total_correct_class) / float(np.sum(total_seen_class) + 1e-6)))
print("Done!")
if __name__ == '__main__':
args = parse_args()
main(args)
文章来源地址https://www.toymoban.com/news/detail-478832.html
文章来源:https://www.toymoban.com/news/detail-478832.html
到了这里,关于自动驾驶 PointNet++ 点云处理原理与代码实战 2(代码部分)的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!