一个核心问题就是要将这八类数据图片全部重命名,尝试了一步到位
但是读取每一个类别都会重置positive序号,导致出错。所以分为了两步:(1)将八类数据图片放在两个文件夹中,先不改名称;(2)用上面的方法,给所有图片改名称
1)将八类数据图片放在两个文件夹中,先不改名称;
import os
from tqdm import tqdm
# 定义输入文件夹和输出文件夹的路径
input_folder = 'your_dataset_folder'
output_folder_positive = 'positive_images'
output_folder_negative = 'negative_images'
# 创建输出文件夹
os.makedirs(output_folder_positive, exist_ok=True)
os.makedirs(output_folder_negative, exist_ok=True)
# 定义类别列表
positive_categories = ['amusement', 'anger', 'awe', 'contentment']
negative_categories = ['disgust', 'excitement', 'fear', 'sadness']
# 遍历每个类别文件夹
for category in positive_categories + negative_categories:
category_folder = os.path.join(input_folder, category)
# 确保类别文件夹存在
if os.path.exists(category_folder):
# 使用tqdm显示进度条
for i, filename in enumerate(tqdm(os.listdir(category_folder), desc=f"Processing {category}")):
file_path = os.path.join(category_folder, filename)
# 确保是文件而不是文件夹
if os.path.isfile(file_path):
# 确定输出路径和新文件名
if category in positive_categories:
output_path = os.path.join(output_folder_positive, f"positive_{i+1}.jpg")
else:
output_path = os.path.join(output_folder_negative, f"negative_{i+1}.jpg")
# 复制文件到新文件夹并重新命名
os.rename(file_path, output_path)
print("处理完成。")
(2)用上面的方法,给所有图片改名称
import os
from tqdm import tqdm
import shutil
# 定义输入文件夹和输出文件夹的路径
input_folder = 'data'
output_folder_positive = 'positive'
output_folder_negative = 'negative'
# 创建输出文件夹
os.makedirs(output_folder_positive, exist_ok=True)
os.makedirs(output_folder_negative, exist_ok=True)
# 定义类别列表
positive_categories = ['amusement', 'anger', 'awe', 'contentment']
negative_categories = ['disgust', 'excitement', 'fear', 'sad']
# 遍历每个类别文件夹
for category in positive_categories + negative_categories:
category_folder = os.path.join(input_folder, category)
# 确保类别文件夹存在
if os.path.exists(category_folder):
# 使用tqdm显示进度条
for filename in tqdm(os.listdir(category_folder), desc=f"Processing {category}"):
file_path = os.path.join(category_folder, filename)
# 确保是文件而不是文件夹
if os.path.isfile(file_path):
# 确定输出路径和新文件名
if category in positive_categories:
output_path = os.path.join(output_folder_positive, filename)
else:
output_path = os.path.join(output_folder_negative, filename)
# 复制文件到新文件夹
shutil.copy(file_path, output_path)
print("处理完成。")
(3)把上述二分类数据集按照80:15:5划分为训练集,测试集,验证集
import os
import random
from shutil import copyfile
from tqdm import tqdm
def split_and_copy_images(input_folder, output_folder, split_ratios=(0.8, 0.15, 0.05), seed=42):
random.seed(seed)
# 创建输出文件夹
for split in ['train', 'test', 'val']:
split_path = os.path.join(output_folder, split)
os.makedirs(os.path.join(split_path, 'positive'), exist_ok=True)
os.makedirs(os.path.join(split_path, 'negative'), exist_ok=True)
# 遍历每个类别的文件夹
for category in ['positive', 'negative']:
category_path = os.path.join(input_folder, category)
image_files = os.listdir(category_path)
random.shuffle(image_files)
# 划分数据集
total_files = len(image_files)
train_count = int(total_files * split_ratios[0])
test_count = int(total_files * split_ratios[1])
# 复制文件到相应的文件夹
for i, filename in enumerate(tqdm(image_files, desc=f"Processing {category}")):
src_path = os.path.join(category_path, filename)
if i < train_count:
dst_path = os.path.join(output_folder, 'train', category, f'{category}_{i + 1}.jpg')
elif i < train_count + test_count:
dst_path = os.path.join(output_folder, 'test', category, f'{category}_{i + 1}.jpg')
else:
dst_path = os.path.join(output_folder, 'val', category, f'{category}_{i + 1}.jpg')
copyfile(src_path, dst_path)
# 输入文件夹和输出文件夹路径
input_folder = 'path/to/dataset'
output_folder = 'path/to/split_dataset'
# 划分数据集并显示进度条
split_and_copy_images(input_folder, output_folder)
print("数据集划分完成。")
总结,如果思路清晰,有些脚本用ChatGPT来写还是非常方便的。