CIFAR是一个用于普通物体识别的数据集。CIFAR数据集分为两种:CIFAR-10和CIFAR-100。The CIFAR-10 and CIFAR-100 are labeled subsets of the 80 million tiny images dataset. They were collected by Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton.
CIFAR-10由60000张大小为32*32的三通道彩色图像组成,被分为10类,分别为airplane、automobile、bird、cat、deer、dog、frog、horse、ship、truck。每类由6000张图像。其中50000张图像用来训练,10000张图像用来测试。数据集分为5个训练块和1个测试块,每个块包含10000张图像.训练集每类包含5000张图像,测试集每类包含1000张图像.
CIFAR-100由60000张大小为32*32的三通道彩色图像组成,分为20个大类,每个大类又包含5个小类,总共100个小类。每个小类包含600张图像,其中500张用于训练,100张用于测试。
从https://www.cs.toronto.edu/~kriz/cifar.html 下载CIFAR C版本的二进制数据:
(1)、CIFAR-10:下载cifar-10-binary.tar.gz,解压缩,共8个文件,batches.meta.txt中存放10个种类名,data_batch_1.bin… data_batch_5.bin、test_batch.bin共6个文件,每个文件中存放10000张图像数据。
(2)、CIFAR-100:下载cifar-100-binary.tar.gz,解压缩,共5个文件,coarse_label_names.txt中存放20个大类名,fine_label_names.txt中存放100个小类名,train.bin中存放50000张训练图像,test.bin中存放10000张测试图像。
CIFAR数据集到图像转换实现的代码如下:
static void write_image_cifar(const cv::Mat& bgr, const std::string& image_save_path, const std::vector<int>& label_count, int label_class) { std::string str = std::to_string(label_count[label_class]); if (label_count[label_class] < 10) { str = "0000" + str; } else if (label_count[label_class] < 100) { str = "000" + str; } else if (label_count[label_class] < 1000) { str = "00" + str; } else if (label_count[label_class] < 10000) { str = "0" + str; } else { fprintf(stderr, "save image name fail\n"); return; } str = std::to_string(label_class) + "_" + str + ".png"; str = image_save_path + str; cv::imwrite(str, bgr); } static void read_cifar_10(const std::string& bin_name, const std::string& image_save_path, int image_count, std::vector<int>& label_count) { int image_width = 32; int image_height = 32; std::ifstream file(bin_name, std::ios::binary); if (file.is_open()) { for (int i = 0; i < image_count; ++i) { cv::Mat red = cv::Mat::zeros(image_height, image_width, CV_8UC1); cv::Mat green = cv::Mat::zeros(image_height, image_width, CV_8UC1); cv::Mat blue = cv::Mat::zeros(image_height, image_width, CV_8UC1); int label_class = 0; file.read((char*)&label_class, 1); label_count[label_class]++; file.read((char*)red.data, 1024); file.read((char*)green.data, 1024); file.read((char*)blue.data, 1024); std::vector<cv::Mat> tmp{ blue, green, red }; cv::Mat bgr; cv::merge(tmp, bgr); write_image_cifar(bgr, image_save_path, label_count, label_class); } file.close(); } } int CIFAR10toImage() { std::string images_path = "E:/GitCode/NN_Test/data/database/CIFAR/CIFAR-10/"; // train image std::vector<int> label_count(10, 0); for (int i = 1; i <= 5; i++) { std::string bin_name = images_path + "data_batch_" + std::to_string(i) + ".bin"; std::string image_save_path = "E:/GitCode/NN_Test/data/tmp/cifar-10_train/"; int image_count = 10000; read_cifar_10(bin_name, image_save_path, image_count, label_count); } // test image std::fill(&label_count[0], &label_count[0] + 10, 0); std::string bin_name = images_path + "test_batch.bin"; std::string image_save_path = "E:/GitCode/NN_Test/data/tmp/cifar-10_test/"; int image_count = 10000; read_cifar_10(bin_name, image_save_path, image_count, label_count); // save big imags images_path = "E:/GitCode/NN_Test/data/tmp/cifar-10_train/"; int width = 32 * 20; int height = 32 * 10; cv::Mat dst(height, width, CV_8UC3); for (int i = 0; i < 10; i++) { for (int j = 1; j <= 20; j++) { int x = (j - 1) * 32; int y = i * 32; cv::Mat part = dst(cv::Rect(x, y, 32, 32)); std::string str = std::to_string(j); if (j < 10) str = "0000" + str; else str = "000" + str; str = std::to_string(i) + "_" + str + ".png"; std::string input_image = images_path + str; cv::Mat src = cv::imread(input_image, 1); if (src.empty()) { fprintf(stderr, "read image error: %s\n", input_image.c_str()); return -1; } src.copyTo(part); } } std::string output_image = images_path + "result.png"; cv::imwrite(output_image, dst); return 0; } static void write_image_cifar(const cv::Mat& bgr, const std::string& image_save_path, const std::vector<std::vector<int>>& label_count, int label_class_coarse, int label_class_fine) { std::string str = std::to_string(label_count[label_class_coarse][label_class_fine]); if (label_count[label_class_coarse][label_class_fine] < 10) { str = "0000" + str; } else if (label_count[label_class_coarse][label_class_fine] < 100) { str = "000" + str; } else if (label_count[label_class_coarse][label_class_fine] < 1000) { str = "00" + str; } else if (label_count[label_class_coarse][label_class_fine] < 10000) { str = "0" + str; } else { fprintf(stderr, "save image name fail\n"); return; } str = std::to_string(label_class_coarse) + "_" + std::to_string(label_class_fine) + "_" + str + ".png"; str = image_save_path + str; cv::imwrite(str, bgr); } static void read_cifar_100(const std::string& bin_name, const std::string& image_save_path, int image_count, std::vector<std::vector<int>>& label_count) { int image_width = 32; int image_height = 32; std::ifstream file(bin_name, std::ios::binary); if (file.is_open()) { for (int i = 0; i < image_count; ++i) { cv::Mat red = cv::Mat::zeros(image_height, image_width, CV_8UC1); cv::Mat green = cv::Mat::zeros(image_height, image_width, CV_8UC1); cv::Mat blue = cv::Mat::zeros(image_height, image_width, CV_8UC1); int label_class_coarse = 0; file.read((char*)&label_class_coarse, 1); int label_class_fine = 0; file.read((char*)&label_class_fine, 1); label_count[label_class_coarse][label_class_fine]++; file.read((char*)red.data, 1024); file.read((char*)green.data, 1024); file.read((char*)blue.data, 1024); std::vector<cv::Mat> tmp{ blue, green, red }; cv::Mat bgr; cv::merge(tmp, bgr); write_image_cifar(bgr, image_save_path, label_count, label_class_coarse, label_class_fine); } file.close(); } } int CIFAR100toImage() { std::string images_path = "E:/GitCode/NN_Test/data/database/CIFAR/CIFAR-100/"; // train image std::vector<std::vector<int>> label_count; label_count.resize(20); for (int i = 0; i < 20; i++) { label_count[i].resize(100); std::fill(&label_count[i][0], &label_count[i][0] + 100, 0); } std::string bin_name = images_path + "train.bin"; std::string image_save_path = "E:/GitCode/NN_Test/data/tmp/cifar-100_train/"; int image_count = 50000; read_cifar_100(bin_name, image_save_path, image_count, label_count); // test image for (int i = 0; i < 20; i++) { label_count[i].resize(100); std::fill(&label_count[i][0], &label_count[i][0] + 100, 0); } bin_name = images_path + "test.bin"; image_save_path = "E:/GitCode/NN_Test/data/tmp/cifar-100_test/"; image_count = 10000; read_cifar_100(bin_name, image_save_path, image_count, label_count); // save big imags images_path = "E:/GitCode/NN_Test/data/tmp/cifar-100_train/"; int width = 32 * 20; int height = 32 * 100; cv::Mat dst(height, width, CV_8UC3); std::vector<std::string> image_names; for (int j = 0; j < 20; j++) { for (int i = 0; i < 100; i++) { std::string str1 = std::to_string(j); std::string str2 = std::to_string(i); std::string str = images_path + str1 + "_" + str2 + "_00001.png"; cv::Mat src = cv::imread(str, 1); if (src.data) { for (int t = 1; t < 21; t++) { if (t < 10) str = "0000" + std::to_string(t); else str = "000" + std::to_string(t); str = images_path + str1 + "_" + str2 + "_" + str + ".png"; image_names.push_back(str); } } } } for (int i = 0; i < 100; i++) { for (int j = 0; j < 20; j++) { int x = j * 32; int y = i * 32; cv::Mat part = dst(cv::Rect(x, y, 32, 32)); cv::Mat src = cv::imread(image_names[i * 20 + j], 1); if (src.empty()) { fprintf(stderr, "read image fail: %s\n", image_names[i * 20 + j].c_str()); return -1; } src.copyTo(part); } } std::string output_image = images_path + "result.png"; cv::imwrite(output_image, dst); cv::Mat src = cv::imread(output_image, 1); if (src.empty()) { fprintf(stderr, "read result image fail: %s\n", output_image.c_str()); return -1; } for (int i = 0; i < 4; i++) { cv::Mat dst = src(cv::Rect(0, i * 800, 640, 800)); std::string str = images_path + "result_" + std::to_string(i + 1) + ".png"; cv::imwrite(str, dst); } return 0; }
cifar-10转换的结果如下:
cifar-100转换的结果如下: