数据集准备
1.rolabelimg标注
https://blog.csdn.net/qq_42921511/article/details/127619447
2. roxml转为dota格式的txt
roxml文件格式:
dota的txt格式:
修改完路径后运行roxml_to_dota.py即可
# 文件名称 :roxml_to_dota.py
# 功能描述 :把rolabelimg标注的xml文件转换成dota能识别的xml文件,
# 再转换成dota格式的txt文件
# 把旋转框 cx,cy,w,h,angle,转换成四点坐标x1,y1,x2,y2,x3,y3,x4,y4
import os
import xml.etree.ElementTree as ET
import math
def edit_xml(xml_file,dotaxml_file):
"""
修改xml文件
:param xml_file:xml文件的路径
:return:
"""
tree = ET.parse(xml_file)
objs = tree.findall('object')
for ix, obj in enumerate(objs):
x0 = ET.Element("x0") # 创建节点
y0 = ET.Element("y0")
x1 = ET.Element("x1")
y1 = ET.Element("y1")
x2 = ET.Element("x2")
y2 = ET.Element("y2")
x3 = ET.Element("x3")
y3 = ET.Element("y3")
# obj_type = obj.find('bndbox')
# type = obj_type.text
# print(xml_file)
if (obj.find('robndbox') == None):
obj_bnd = obj.find('bndbox')
obj_xmin = obj_bnd.find('xmin')
obj_ymin = obj_bnd.find('ymin')
obj_xmax = obj_bnd.find('xmax')
obj_ymax = obj_bnd.find('ymax')
xmin = float(obj_xmin.text)
ymin = float(obj_ymin.text)
xmax = float(obj_xmax.text)
ymax = float(obj_ymax.text)
obj_bnd.remove(obj_xmin) # 删除节点
obj_bnd.remove(obj_ymin)
obj_bnd.remove(obj_xmax)
obj_bnd.remove(obj_ymax)
x0.text = str(xmin)
y0.text = str(ymax)
x1.text = str(xmax)
y1.text = str(ymax)
x2.text = str(xmax)
y2.text = str(ymin)
x3.text = str(xmin)
y3.text = str(ymin)
else:
obj_bnd = obj.find('robndbox')
obj_bnd.tag = 'bndbox' # 修改节点名
obj_cx = obj_bnd.find('cx')
obj_cy = obj_bnd.find('cy')
obj_w = obj_bnd.find('w')
obj_h = obj_bnd.find('h')
obj_angle = obj_bnd.find('angle')
cx = float(obj_cx.text)
cy = float(obj_cy.text)
w = float(obj_w.text)
h = float(obj_h.text)
angle = float(obj_angle.text)
obj_bnd.remove(obj_cx) # 删除节点
obj_bnd.remove(obj_cy)
obj_bnd.remove(obj_w)
obj_bnd.remove(obj_h)
obj_bnd.remove(obj_angle)
x0.text, y0.text = rotatePoint(cx, cy, cx - w / 2, cy - h / 2, -angle)
x1.text, y1.text = rotatePoint(cx, cy, cx + w / 2, cy - h / 2, -angle)
x2.text, y2.text = rotatePoint(cx, cy, cx + w / 2, cy + h / 2, -angle)
x3.text, y3.text = rotatePoint(cx, cy, cx - w / 2, cy + h / 2, -angle)
# obj.remove(obj_type) # 删除节点
obj_bnd.append(x0) # 新增节点
obj_bnd.append(y0)
obj_bnd.append(x1)
obj_bnd.append(y1)
obj_bnd.append(x2)
obj_bnd.append(y2)
obj_bnd.append(x3)
obj_bnd.append(y3)
tree.write(dotaxml_file, method='xml', encoding='utf-8') # 更新xml文件
# 转换成四点坐标
def rotatePoint(xc, yc, xp, yp, theta):
xoff = xp - xc;
yoff = yp - yc;
cosTheta = math.cos(theta)
sinTheta = math.sin(theta)
pResx = cosTheta * xoff + sinTheta * yoff
pResy = - sinTheta * xoff + cosTheta * yoff
return str(int(xc + pResx)), str(int(yc + pResy))
def totxt(xml_path,out_path):
# 想要生成的txt文件保存的路径,这里可以自己修改
files = os.listdir(xml_path)
for file in files:
tree = ET.parse(xml_path + os.sep + file)
root = tree.getroot()
name = file.strip('.xml')
output = out_path + name + '.txt'
file = open(output, 'w')
objs = tree.findall('object')
for obj in objs:
cls = obj.find('name').text
box = obj.find('bndbox')
x0 = int(float(box.find('x0').text))
y0 = int(float(box.find('y0').text))
x1 = int(float(box.find('x1').text))
y1 = int(float(box.find('y1').text))
x2 = int(float(box.find('x2').text))
y2 = int(float(box.find('y2').text))
x3 = int(float(box.find('x3').text))
y3 = int(float(box.find('y3').text))
file.write("{} {} {} {} {} {} {} {} {} 0\n".format(x0, y0, x1, y1, x2, y2, x3, y3, cls))
file.close()
print(output)
if __name__ == '__main__':
# -----**** 第一步:把xml文件统一转换成旋转框的xml文件 ****-----
roxml_path = "./crack/roxml" # 目录下保存的是需要转换的xml文件
dotaxml_path = './crack/dotaxml'
out_path = './crack/txt/'
filelist = os.listdir(roxml_path)
for file in filelist:
edit_xml(os.path.join(roxml_path, file), os.path.join(dotaxml_path, file))
# -----**** 第二步:把旋转框xml文件转换成txt格式 ****-----
totxt(dotaxml_path, out_path)
3. dota格式txt转为yolo格式的txt标签**
环境配置:
python=3.7
pip install opencv-python==4.1.2.30 (ps:必须为这个版本)
图片:长和宽一致的png格式
使用:
dota_utils.py和dotatoyolo.py放在同一个目录下,修改完文件路径后直接运行dotatoyolo.py
yolo格式的txt:
dota_utils.py:文章来源:https://www.toymoban.com/news/detail-443056.html
# -*- coding: utf-8 -*-
import sys
import codecs
import numpy as np
import shapely.geometry as shgeo
import os
import re
import math
# import polyiou
"""
some basic functions which are useful for process DOTA data
"""
# For DOTA v1.5
classnames_v1_5 = ['crack']
wordname_15 = ['crack']
# wordname_15 = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
# 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter']
def custombasename(fullname):
return os.path.basename(os.path.splitext(fullname)[0])
def GetFileFromThisRootDir(dir,ext = None):
allfiles = []
needExtFilter = (ext != None)
for root,dirs,files in os.walk(dir):
for filespath in files:
filepath = os.path.join(root, filespath)
extension = os.path.splitext(filepath)[1][1:]
if needExtFilter and extension in ext:
allfiles.append(filepath)
elif not needExtFilter:
allfiles.append(filepath)
return allfiles
def TuplePoly2Poly(poly):
outpoly = [poly[0][0], poly[0][1],
poly[1][0], poly[1][1],
poly[2][0], poly[2][1],
poly[3][0], poly[3][1]
]
return outpoly
def parse_dota_poly(filename):
"""
parse the dota ground truth in the format:
[(x1, y1), (x2, y2), (x3, y3), (x4, y4)]
"""
objects = []
#print('filename:', filename)
f = []
if (sys.version_info >= (3, 5)):
fd = open(filename, 'r')
f = fd
elif (sys.version_info >= 2.7):
fd = codecs.open(filename, 'r')
f = fd
# count = 0
while True:
line = f.readline()
# count = count + 1
# if count < 2:
# continue
if line:
splitlines = line.strip().split(' ')
object_struct = {}
### clear the wrong name after check all the data
#if (len(splitlines) >= 9) and (splitlines[8] in classname):
if (len(splitlines) < 9):
continue
if (len(splitlines) >= 9):
object_struct['name'] = splitlines[8]
if (len(splitlines) == 9):
object_struct['difficult'] = '0'
elif (len(splitlines) >= 10):
# if splitlines[9] == '1':
# if (splitlines[9] == 'tr'):
# object_struct['difficult'] = '1'
# else:
object_struct['difficult'] = splitlines[9]
# else:
# object_struct['difficult'] = 0
object_struct['poly'] = [(float(splitlines[0]), float(splitlines[1])),
(float(splitlines[2]), float(splitlines[3])),
(float(splitlines[4]), float(splitlines[5])),
(float(splitlines[6]), float(splitlines[7]))
]
gtpoly = shgeo.Polygon(object_struct['poly'])
object_struct['area'] = gtpoly.area
# poly = list(map(lambda x:np.array(x), object_struct['poly']))
# object_struct['long-axis'] = max(distance(poly[0], poly[1]), distance(poly[1], poly[2]))
# object_struct['short-axis'] = min(distance(poly[0], poly[1]), distance(poly[1], poly[2]))
# if (object_struct['long-axis'] < 15):
# object_struct['difficult'] = '1'
# global small_count
# small_count = small_count + 1
objects.append(object_struct)
else:
break
return objects
def parse_longsideformat(filename): # filename=??.txt
"""
parse the longsideformat ground truth in the format:
objects[i] : [classid, x_c, y_c, longside, shortside, theta]
"""
objects = []
f = []
if (sys.version_info >= (3, 5)):
fd = open(filename, 'r')
f = fd
elif (sys.version_info >= 2.7):
fd = codecs.open(filename, 'r')
f = fd
# count = 0
while True:
line = f.readline()
if line:
splitlines = line.strip().split(' ')
object_struct = {}
### clear the wrong name after check all the data
#if (len(splitlines) >= 9) and (splitlines[8] in classname):
if (len(splitlines) < 6) or (len(splitlines) > 6):
print('labels长度不为6,出现错误,与预定形式不符')
continue
object_struct = [int(splitlines[0]), float(splitlines[1]),
float(splitlines[2]), float(splitlines[3]),
float(splitlines[4]), float(splitlines[5])
]
objects.append(object_struct)
else:
break
return objects
def parse_dota_poly2(filename):
"""
parse the dota ground truth in the format:
[x1, y1, x2, y2, x3, y3, x4, y4]
"""
objects = parse_dota_poly(filename)
for obj in objects:
obj['poly'] = TuplePoly2Poly(obj['poly'])
obj['poly'] = list(map(int, obj['poly']))
return objects
def parse_dota_rec(filename):
"""
parse the dota ground truth in the bounding box format:
"xmin, ymin, xmax, ymax"
"""
objects = parse_dota_poly(filename)
for obj in objects:
poly = obj['poly']
bbox = dots4ToRec4(poly)
obj['bndbox'] = bbox
return objects
## bounding box transfer for varies format
def dots4ToRec4(poly):
"""
求出poly四点的最小外接水平矩形
@param poly: poly[4] [x,y]
@return: xmin,xmax,ymin,ymax
"""
xmin, xmax, ymin, ymax = min(poly[0][0], min(poly[1][0], min(poly[2][0], poly[3][0]))), \
max(poly[0][0], max(poly[1][0], max(poly[2][0], poly[3][0]))), \
min(poly[0][1], min(poly[1][1], min(poly[2][1], poly[3][1]))), \
max(poly[0][1], max(poly[1][1], max(poly[2][1], poly[3][1])))
return xmin, ymin, xmax, ymax
def dots4ToRec8(poly):
xmin, ymin, xmax, ymax = dots4ToRec4(poly)
return xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax
#return dots2ToRec8(dots4ToRec4(poly))
def dots2ToRec8(rec):
xmin, ymin, xmax, ymax = rec[0], rec[1], rec[2], rec[3]
return xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax
def groundtruth2Task1(srcpath, dstpath):
filelist = GetFileFromThisRootDir(srcpath)
# names = [custombasename(x.strip())for x in filelist]
filedict = {}
for cls in wordname_15:
fd = open(os.path.join(dstpath, 'Task1_') + cls + r'.txt', 'w')
filedict[cls] = fd
for filepath in filelist:
objects = parse_dota_poly2(filepath)
subname = custombasename(filepath)
pattern2 = re.compile(r'__([\d+\.]+)__\d+___')
rate = re.findall(pattern2, subname)[0]
for obj in objects:
category = obj['name']
difficult = obj['difficult']
poly = obj['poly']
if difficult == '2':
continue
if rate == '0.5':
outline = custombasename(filepath) + ' ' + '1' + ' ' + ' '.join(map(str, poly))
elif rate == '1':
outline = custombasename(filepath) + ' ' + '0.8' + ' ' + ' '.join(map(str, poly))
elif rate == '2':
outline = custombasename(filepath) + ' ' + '0.6' + ' ' + ' '.join(map(str, poly))
filedict[category].write(outline + '\n')
def Task2groundtruth_poly(srcpath, dstpath):
thresh = 0.1
filedict = {}
Tasklist = GetFileFromThisRootDir(srcpath, '.txt')
for Taskfile in Tasklist:
idname = custombasename(Taskfile).split('_')[-1]
# idname = datamap_inverse[idname]
f = open(Taskfile, 'r')
lines = f.readlines()
for line in lines:
if len(line) == 0:
continue
# print('line:', line)
splitline = line.strip().split(' ')
filename = splitline[0]
confidence = splitline[1]
bbox = splitline[2:]
if float(confidence) > thresh:
if filename not in filedict:
# filedict[filename] = codecs.open(os.path.join(dstpath, filename + '.txt'), 'w', 'utf_16')
filedict[filename] = codecs.open(os.path.join(dstpath, filename + '.txt'), 'w')
# poly = util.dots2ToRec8(bbox)
poly = bbox
# filedict[filename].write(' '.join(poly) + ' ' + idname + '_' + str(round(float(confidence), 2)) + '\n')
# print('idname:', idname)
# filedict[filename].write(' '.join(poly) + ' ' + idname + '_' + str(round(float(confidence), 2)) + '\n')
filedict[filename].write(' '.join(poly) + ' ' + idname + '\n')
def polygonToRotRectangle(bbox):
"""
:param bbox: The polygon stored in format [x1, y1, x2, y2, x3, y3, x4, y4]
:return: Rotated Rectangle in format [cx, cy, w, h, theta]
"""
bbox = np.array(bbox,dtype=np.float32)
bbox = np.reshape(bbox,newshape=(2,4),order='F')
angle = math.atan2(-(bbox[0,1]-bbox[0,0]),bbox[1,1]-bbox[1,0])
center = [[0],[0]]
for i in range(4):
center[0] += bbox[0,i]
center[1] += bbox[1,i]
center = np.array(center,dtype=np.float32)/4.0
R = np.array([[math.cos(angle), -math.sin(angle)], [math.sin(angle), math.cos(angle)]], dtype=np.float32)
normalized = np.matmul(R.transpose(),bbox-center)
xmin = np.min(normalized[0,:])
xmax = np.max(normalized[0,:])
ymin = np.min(normalized[1,:])
ymax = np.max(normalized[1,:])
w = xmax - xmin + 1
h = ymax - ymin + 1
return [float(center[0]),float(center[1]),w,h,angle]
def cal_line_length(point1, point2):
return math.sqrt( math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
def get_best_begin_point(coordinate):
x1 = coordinate[0][0]
y1 = coordinate[0][1]
x2 = coordinate[1][0]
y2 = coordinate[1][1]
x3 = coordinate[2][0]
y3 = coordinate[2][1]
x4 = coordinate[3][0]
y4 = coordinate[3][1]
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
xmax = max(x1, x2, x3, x4)
ymax = max(y1, y2, y3, y4)
combinate = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], [[x2, y2], [x3, y3], [x4, y4], [x1, y1]],
[[x3, y3], [x4, y4], [x1, y1], [x2, y2]], [[x4, y4], [x1, y1], [x2, y2], [x3, y3]]]
dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
force = 100000000.0
force_flag = 0
for i in range(4):
temp_force = cal_line_length(combinate[i][0], dst_coordinate[0]) + cal_line_length(combinate[i][1],
dst_coordinate[
1]) + cal_line_length(
combinate[i][2], dst_coordinate[2]) + cal_line_length(combinate[i][3], dst_coordinate[3])
if temp_force < force:
force = temp_force
force_flag = i
if force_flag != 0:
print("choose one direction!")
return combinate[force_flag]
def dots4ToRecC(poly, img_w, img_h):
"""
求poly四点坐标的最小外接水平矩形,并返回yolo格式的矩形框表现形式xywh_center(归一化)
@param poly: poly – poly[4] [x,y]
@param img_w: 对应图像的width
@param img_h: 对应图像的height
@return: x_center,y_center,w,h(均归一化)
"""
xmin, ymin, xmax, ymax = dots4ToRec4(poly)
x = (xmin + xmax)/2
y = (ymin + ymax)/2
w = xmax - xmin
h = ymax - ymin
return x/img_w, y/img_h, w/img_w, h/img_h
dotatoyolo.py文章来源地址https://www.toymoban.com/news/detail-443056.html
# -*- coding: utf-8 -*-
import dota_utils as util
import os
import numpy as np
from PIL import Image
import cv2
import random
import shutil
import matplotlib.pyplot as plt
from shapely.geometry import Polygon, MultiPoint # 多边形
import time
import argparse
## trans dota format to format YOLO(darknet) required
def dota2Darknet(imgpath, txtpath, dstpath, extractclassname):
"""
:param imgpath: the path of images
:param txtpath: the path of txt in dota format
:param dstpath: the path of txt in YOLO format
:param extractclassname: the category you selected
:return:
txt format: id x y w h
"""
if os.path.exists(dstpath):
shutil.rmtree(dstpath) # delete output folder
os.makedirs(dstpath) # make new output folder
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
objects = util.parse_dota_poly(fullname)
'''
objects =
[{'name': 'ship',
'difficult': '1',
'poly': [(1054.0, 1028.0), (1063.0, 1011.0), (1111.0, 1040.0), (1112.0, 1062.0)],
'area': 1159.5
},
...
]
'''
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
img = Image.open(img_fullname)
img_w, img_h = img.size
# print img_w,img_h
with open(os.path.join(dstpath, name + '.txt'), 'w') as f_out:
for obj in objects:
poly = obj['poly'] # poly=[(x1,y1),(x2,y2),(x3,y3),(x4,y4)]
bbox = np.array(util.dots4ToRecC(poly, img_w, img_h)) # bbox=[x y w h]
if (sum(bbox <= 0) + sum(bbox >= 1)) >= 1: # 若bbox中有<=0或>= 1的元素则将该box排除
continue
if (obj['name'] in extractclassname):
id = extractclassname.index(obj['name']) # id=类名的索引 比如'plane'对应id=0
else:
continue
outline = str(id) + ' ' + ' '.join(list(map(str, bbox))) # outline='id x y w h'
f_out.write(outline + '\n') # 写入txt文件中并加上换行符号 \n
## trans dota format to (cls, c_x, c_y, Longest side, short side, angle:[0,179))
def dota2LongSideFormat(imgpath, txtpath, dstpath, extractclassname):
"""
trans dota farmat to longside format
:param imgpath: the path of images
:param txtpath: the path of txt in dota format
:param dstpath: the path of txt in YOLO format
:param extractclassname: the category you selected
"""
if os.path.exists(dstpath):
shutil.rmtree(dstpath) # delete output folder
os.makedirs(dstpath) # make new output folder
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
objects = util.parse_dota_poly(fullname)
'''
objects =
[{'name': 'ship',
'difficult': '1',
'poly': [(1054.0, 1028.0), (1063.0, 1011.0), (1111.0, 1040.0), (1112.0, 1062.0)],
'area': 1159.5
},
...
]
'''
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
img = Image.open(img_fullname)
img_w, img_h = img.size
# print img_w,img_h
with open(os.path.join(dstpath, name + '.txt'), 'w') as f_out:
num_gt = 0
for i, obj in enumerate(objects):
num_gt = num_gt + 1 # 为当前有效gt计数
poly = obj['poly'] # poly=[(x1,y1),(x2,y2),(x3,y3),(x4,y4)]
poly = np.float32(np.array(poly))
# 四点坐标归一化
poly[:, 0] = poly[:, 0]/img_w
poly[:, 1] = poly[:, 1]/img_h
rect = cv2.minAreaRect(poly) # 得到最小外接矩形的(中心(x,y), (宽,高), 旋转角度)
# box = np.float32(cv2.boxPoints(rect)) # 返回rect四个点的值
c_x = rect[0][0]
c_y = rect[0][1]
w = rect[1][0]
h = rect[1][1]
theta = rect[-1] # Range for angle is [-90,0)
trans_data = cvminAreaRect2longsideformat(c_x, c_y, w, h, theta)
if not trans_data:
if theta != 90: # Θ=90说明wh中有为0的元素,即gt信息不完整,无需提示异常,直接删除
print('opencv表示法转长边表示法出现异常,已将第%d个box排除,问题出现在该图片中:%s' % (i, img_fullname))
num_gt = num_gt - 1
continue
else:
# range:[-180,0)
c_x, c_y, longside, shortside, theta_longside = trans_data
bbox = np.array((c_x, c_y, longside, shortside))
if (sum(bbox <= 0) + sum(bbox[:2] >= 1) ) >= 1: # 0<xy<1, 0<side<=1
print('bbox[:2]中有>= 1的元素,bbox中有<= 0的元素,已将第%d个box排除,问题出现在该图片中:%s' % (i, img_fullname))
print('出问题的longside形式数据:[%.16f, %.16f, %.16f, %.16f, %.1f]' % (c_x, c_y, longside, shortside, theta_longside))
num_gt = num_gt - 1
continue
if (obj['name'] in extractclassname):
id = extractclassname.index(obj['name']) # id=类名的索引 比如'plane'对应id=0
else:
print('预定类别中没有类别:%s;已将该box排除,问题出现在该图片中:%s' % (obj['name'], fullname))
num_gt = num_gt - 1
continue
theta_label = int(theta_longside + 180.5) # range int[0,180] 四舍五入
if theta_label == 180: # range int[0,179]
theta_label = 179
# outline='id x y longside shortside Θ'
# final check
if id > 15 or id < 0:
print('id problems,问题出现在该图片中:%s' % (i, img_fullname))
print('出问题的longside形式数据:[%.16f, %.16f, %.16f, %.16f, %.1f]' % (
c_x, c_y, longside, shortside, theta_longside))
if theta_label < 0 or theta_label > 179:
print('id problems,问题出现在该图片中:%s' % (i, img_fullname))
print('出问题的longside形式数据:[%.16f, %.16f, %.16f, %.16f, %.1f]' % (
c_x, c_y, longside, shortside, theta_longside))
outline = str(id) + ' ' + ' '.join(list(map(str, bbox))) + ' ' + str(theta_label)
f_out.write(outline + '\n') # 写入txt文件中并加上换行符号 \n
if num_gt == 0:
os.remove(os.path.join(dstpath, name + '.txt')) #
os.remove(img_fullname)
os.remove(fullname)
print('%s 图片对应的txt不存在有效目标,已删除对应图片与txt' % img_fullname)
print('已完成文件夹内DOTA数据形式到长边表示法的转换')
def cvminAreaRect2longsideformat(x_c, y_c, width, height, theta):
'''
trans minAreaRect(x_c, y_c, width, height, θ) to longside format(x_c, y_c, longside, shortside, θ)
两者区别为:
当opencv表示法中width为最长边时(包括正方形的情况),则两种表示方法一致
当opencv表示法中width不为最长边 ,则最长边表示法的角度要在opencv的Θ基础上-90度
@param x_c: center_x
@param y_c: center_y
@param width: x轴逆时针旋转碰到的第一条边
@param height: 与width不同的边
@param theta: x轴逆时针旋转与width的夹角,由于原点位于图像的左上角,逆时针旋转角度为负 [-90, 0)
@return:
x_c: center_x
y_c: center_y
longside: 最长边
shortside: 最短边
theta_longside: 最长边和x轴逆时针旋转的夹角,逆时针方向角度为负 [-180, 0)
'''
'''
意外情况:(此时要将它们恢复符合规则的opencv形式:wh交换,Θ置为-90)
竖直box:box_width < box_height θ=0
水平box:box_width > box_height θ=0
'''
if theta == 0:
theta = -90
buffer_width = width
width = height
height = buffer_width
if theta > 0:
if theta != 90: # Θ=90说明wh中有为0的元素,即gt信息不完整,无需提示异常,直接删除
print('θ计算出现异常,当前数据为:%.16f, %.16f, %.16f, %.16f, %.1f;超出opencv表示法的范围:[-90,0)' % (x_c, y_c, width, height, theta))
return False
if theta < -90:
print('θ计算出现异常,当前数据为:%.16f, %.16f, %.16f, %.16f, %.1f;超出opencv表示法的范围:[-90,0)' % (x_c, y_c, width, height, theta))
return False
if width != max(width, height): # 若width不是最长边
longside = height
shortside = width
theta_longside = theta - 90
else: # 若width是最长边(包括正方形的情况)
longside = width
shortside = height
theta_longside = theta
if longside < shortside:
print('旋转框转换表示形式后出现问题:最长边小于短边;[%.16f, %.16f, %.16f, %.16f, %.1f]' % (x_c, y_c, longside, shortside, theta_longside))
return False
if (theta_longside < -180 or theta_longside >= 0):
print('旋转框转换表示形式时出现问题:θ超出长边表示法的范围:[-180,0);[%.16f, %.16f, %.16f, %.16f, %.1f]' % (x_c, y_c, longside, shortside, theta_longside))
return False
return x_c, y_c, longside, shortside, theta_longside
def drawLongsideFormatimg(imgpath, txtpath, dstpath, extractclassname, thickness=2):
"""
根据labels绘制边框(label_format:classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, Θ)
:param imgpath: the path of images
:param txtpath: the path of txt in longside format
:param dstpath: the path of image_drawed
:param extractclassname: the category you selected
"""
if os.path.exists(dstpath):
shutil.rmtree(dstpath) # delete output folder
os.makedirs(dstpath) # make new output folder
# 设置画框的颜色 colors = [[178, 63, 143], [25, 184, 176], [238, 152, 129],....,[235, 137, 120]]随机设置RGB颜色
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(extractclassname))]
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
objects = util.parse_longsideformat(fullname)
'''
objects[i] = [classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, theta]
'''
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
img_savename = os.path.join(dstpath, name + '_.png') # img_fullname='/.../_P000?.png'
img = Image.open(img_fullname) # 图像被打开但未被读取
img_w, img_h = img.size
img = cv2.imread(img_fullname) # 读取图像像素
for i, obj in enumerate(objects):
# obj = [classid, x_c_normalized, y_c_normalized, longside_normalized, shortside_normalized, float:0-179]
class_index = obj[0]
# rect=[(x_c,y_c),(w,h),Θ] Θ:flaot[0-179] -> (-180,0)
rect = longsideformat2cvminAreaRect(obj[1], obj[2], obj[3], obj[4], (obj[5]-179.9))
# poly = [(x1,y1),(x2,y2),(x3,y3),(x4,y4)]
poly = np.float32(cv2.boxPoints(rect)) # 返回rect对应的四个点的值 normalized
# 四点坐标反归一化 取整
poly[:, 0] = poly[:, 0] * img_w
poly[:, 1] = poly[:, 1] * img_h
poly = np.int0(poly)
# 画出来
cv2.drawContours(image=img,
contours=[poly],
contourIdx=-1,
color=colors[int(class_index)],
thickness=thickness)
cv2.imwrite(img_savename, img)
# time.sleep()
def longsideformat2cvminAreaRect(x_c, y_c, longside, shortside, theta_longside):
'''
trans longside format(x_c, y_c, longside, shortside, θ) to minAreaRect(x_c, y_c, width, height, θ)
两者区别为:
当opencv表示法中width为最长边时(包括正方形的情况),则两种表示方法一致
当opencv表示法中width不为最长边 ,则最长边表示法的角度要在opencv的Θ基础上-90度
@param x_c: center_x
@param y_c: center_y
@param longside: 最长边
@param shortside: 最短边
@param theta_longside: 最长边和x轴逆时针旋转的夹角,逆时针方向角度为负 [-180, 0)
@return: ((x_c, y_c),(width, height),Θ)
x_c: center_x
y_c: center_y
width: x轴逆时针旋转碰到的第一条边最长边
height: 与width不同的边
theta: x轴逆时针旋转与width的夹角,由于原点位于图像的左上角,逆时针旋转角度为负 [-90, 0)
'''
if (theta_longside >= -180 and theta_longside < -90): # width is not the longest side
width = shortside
height = longside
theta = theta_longside + 90
else:
width = longside
height =shortside
theta = theta_longside
if theta < -90 or theta >= 0:
print('当前θ=%.1f,超出opencv的θ定义范围[-90, 0)' % theta)
return ((x_c, y_c), (width, height), theta)
def delete(imgpath, txtpath):
filelist = util.GetFileFromThisRootDir(txtpath) # fileist=['/.../P0005.txt', ..., /.../P000?.txt]
for fullname in filelist: # fullname='/.../P000?.txt'
name = os.path.splitext(os.path.basename(fullname))[0] # name='P000?'
img_fullname = os.path.join(imgpath, name + '.png') # img_fullname='/.../P000?.png'
if not os.path.exists(img_fullname): # 如果文件bu存在
os.remove(fullname)
if __name__ == '__main__':
## an example
dota2LongSideFormat('./crack/images',
'./crack/txt',
'./crack/yolo_labels',
util.classnames_v1_5)
drawLongsideFormatimg(imgpath='crack/images',
txtpath='crack/yolo_labels',
dstpath='crack/draw_longside_img',
extractclassname=util.classnames_v1_5)
到了这里,关于yolov5旋转目标框的数据集标签制作的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!