本次分割的动物数据集 4G

https://download.csdn.net/download/qq_26696715/87621195

一. coco2017数据集结构

总的结构如下：

├─cocotoyolo.py
├─getanimal.py
├─annotations
└─images
    ├─train2017
    └─val2017

其中，images存放的是训练集、验证集的图片原图；annotations中存放的是标注文件：

2017/09/01  19:04        91,865,115 captions_train2017.json
2017/09/01  19:04         3,872,473 captions_val2017.json
2017/09/01  19:02       469,785,474 instances_train2017.json
2017/09/01  19:02        19,987,840 instances_val2017.json
2017/09/01  19:04       238,884,731 person_keypoints_train2017.json
2017/09/01  19:04        10,020,657 person_keypoints_val2017.json

标注文件解析

instances_xx2017.json 是一个COCO数据集的标注文件，包含了所有训练集图片的标注信息，字段含义如下：

具体解释如下：

二. 提取需要的类别重新封装成coco数据集（这里以动物类别为例）

提取完成后的新文件夹为

├─animal_detection
│  ├─annotations
│  └─images
│      ├─train2017
│      └─val2017

提取代码 getanimal.py

import os
import json
import shutil

# 定义要提取的类别
categories = ['bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe']

# 定义数据集路径
data_dir = './'

# 定义输出路径
output_dir = './animal_detection'

# 创建输出目录
if not os.path.exists(os.path.join(output_dir, 'annotations')):
    os.makedirs(os.path.join(output_dir, 'annotations'))
    os.makedirs(os.path.join(output_dir, 'images', 'train2017'))
    os.makedirs(os.path.join(output_dir, 'images', 'val2017'))



'''
训练集
'''
# 加载原始instances文件
with open(os.path.join(data_dir, 'annotations', 'instances_train2017.json'), 'r') as f:
    train_instances = json.load(f)

# 筛选动物类别的id
# 筛选动物类别的id
animal_ids = []
new_categories = []
for c in train_instances['categories']:
    if c['name'] in categories:
        animal_ids.append(c['id'] )
        new_categories.append(c)



# 筛选出验证集中包含动物的图片id
train_image_ids = set()
new_train_annotations = []
for ann in train_instances['annotations']:
    if ann['category_id'] in animal_ids:
        train_image_ids.add(ann['image_id'])
        new_train_annotations.append(ann)

new_images = []
# 复制验证集中包含动物的图片到输出目录
for image in train_instances['images']:
    if image['id'] in train_image_ids:
        new_images.append(image)
        shutil.copy(os.path.join(data_dir, 'images', 'train2017', image['file_name']), os.path.join(output_dir, 'images', 'train2017'))



# 构造新的instances文件
new_train_instances = {
    'info': train_instances['info'],
    'licenses': train_instances['licenses'],
    'images':new_images,
    'annotations': new_train_annotations,
    'categories': new_categories
}


# 保存新的instances文件
with open(os.path.join(output_dir, 'annotations', 'instances_train2017.json'), 'w') as f:
    json.dump(new_train_instances, f)



'''
验证集
'''
with open(os.path.join(data_dir, 'annotations', 'instances_val2017.json'), 'r') as f:
    val_instances = json.load(f)

# 筛选动物类别的id
animal_ids = []
new_categories = []
for c in val_instances['categories']:
    if c['name'] in categories:
        animal_ids.append(c['id'] )
        new_categories.append(c)


# 筛选出验证集中包含动物的图片id
val_image_ids = set()
new_val_annotations = []
for ann in val_instances['annotations']:
    if ann['category_id'] in animal_ids:
        val_image_ids.add(ann['image_id'])
        new_val_annotations.append(ann)

new_images = []
# 复制验证集中包含动物的图片到输出目录
for image in val_instances['images']:
    if image['id'] in val_image_ids:
        new_images.append(image)
        shutil.copy(os.path.join(data_dir, 'images', 'val2017', image['file_name']), os.path.join(output_dir, 'images', 'val2017'))

new_val_instances = {
    'info': val_instances['info'],
    'licenses': val_instances['licenses'],
    'images': new_images,
    'annotations': new_val_annotations,
    'categories': new_categories
}

with open(os.path.join(output_dir, 'annotations', 'instances_val2017.json'), 'w') as f:
    json.dump(new_val_instances, f)

三. 转换为yolo 数据集

转换后生成

├─animal_detection
│  ├─classes.txt
│  ├─train2017.txt
│  ├─val2017.txt
│  ├─annotations
│  ├─images
│  │  ├─train2017
│  │  └─val2017
│  └─label

转换代码 cocotoyolo.py

#COCO 格式的数据集转化为 YOLO 格式的数据集
#--json_path 输入的json文件路径
#--save_path 保存的文件夹名字，默认为当前目录下的labels。

import os
import json
from tqdm import tqdm


def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = box[0] + box[2] / 2.0
    y = box[1] + box[3] / 2.0
    w = box[2]
    h = box[3]
#round函数确定(xmin, ymin, xmax, ymax)的小数位数
    x = round(x * dw, 6)
    w = round(w * dw, 6)
    y = round(y * dh, 6)
    h = round(h * dh, 6)
    return (x, y, w, h)

if __name__ == '__main__':
    #这里根据自己的json文件位置，换成自己的就行
    root = "animal_detection/"
    json_trainfile = root+'annotations/instances_train2017.json' # COCO Object Instance 类型的标注
    json_valfile = root+'annotations/instances_val2017.json' # COCO Object Instance 类型的标注
    ana_txt_save_path = root+'labels/'  # 保存的路径

    traindata = json.load(open(json_trainfile, 'r'))
    valdata = json.load(open(json_valfile, 'r'))

    # 重新映射并保存class 文件
    if not os.path.exists(ana_txt_save_path):
        os.makedirs(ana_txt_save_path)

    id_map = {} # coco数据集的id不连续！重新映射一下再输出！
    with open(os.path.join(root, 'classes.txt'), 'w') as f:
        # 写入classes.txt
        for i, category in enumerate(traindata['categories']):
            f.write(f"{category['name']}\n")
            id_map[category['id']] = i



    '''
    保存train txt
    '''
    # print(id_map)
    #这里需要根据自己的需要，更改写入图像相对路径的文件位置。
    list_file = open(os.path.join(root, 'train2017.txt'), 'w')
    for img in tqdm(traindata['images']):
        filename = img["file_name"]
        img_width = img["width"]
        img_height = img["height"]
        img_id = img["id"]
        head, tail = os.path.splitext(filename)
        ana_txt_name = head + ".txt"  # 对应的txt名字，与jpg一致
        f_txt = open(os.path.join(ana_txt_save_path, ana_txt_name), 'w')
        for ann in traindata['annotations']:
            if ann['image_id'] == img_id:
                box = convert((img_width, img_height), ann["bbox"])
                f_txt.write("%s %s %s %s %s\n" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
        f_txt.close()
        #将图片的相对路径写入train2017或val2017的路径
        list_file.write('./images/train2017/%s.jpg\n' %(head))
    list_file.close()
    '''
    保存val txt
    '''
    # print(id_map)
    #这里需要根据自己的需要，更改写入图像相对路径的文件位置。
    list_file = open(os.path.join(root, 'val2017.txt'), 'w')
    for img in tqdm(valdata['images']):
        filename = img["file_name"]
        img_width = img["width"]
        img_height = img["height"]
        img_id = img["id"]
        head, tail = os.path.splitext(filename)
        ana_txt_name = head + ".txt"  # 对应的txt名字，与jpg一致
        f_txt = open(os.path.join(ana_txt_save_path, ana_txt_name), 'w')
        for ann in valdata['annotations']:
            if ann['image_id'] == img_id:
                box = convert((img_width, img_height), ann["bbox"])
                f_txt.write("%s %s %s %s %s\n" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
        f_txt.close()
        #将图片的相对路径写入train2017或val2017的路径
        list_file.write('./images/val2017/%s.jpg\n' %(head))
    list_file.close()

图像处理大大大大大牛啊

coco 2017数据集类别提取并转换为yolo数据集

coco 2017数据集提取和转换

本次分割的动物数据集 4G

一. coco2017数据集结构

标注文件解析

二. 提取需要的类别重新封装成coco数据集（这里以动物类别为例）

三. 转换为yolo 数据集

图像处理大大大大大牛啊

coco 2017数据集 类别提取并转换为yolo数据集

coco 2017数据集提取和转换

本次分割的动物数据集 4G

一. coco2017数据集结构

标注文件解析

二. 提取需要的类别重新封装成coco数据集（这里以动物类别为例）

三. 转换为yolo 数据集

coco 2017数据集类别提取并转换为yolo数据集