第一章 调整学习率的作用

深度学习中,学习率调整非常重要。

学习率大的优点:1、加快学习速率2、帮助跳出局部最优值

学习率大的缺点:1、导致模型训练不收敛。2、单单使用大学习率容易导致模型不精确

学习率小的优点:1、帮助模型收敛,有助于模型细化。2、提高模型精度。

学习率小的缺点:1、无法跳出局部最优值。2、收敛缓慢

第二章 学习率代码示例

以下是两种调整学习率的方法:

    StepLR方法:
 

import torch.optim as optim
from torch.optim import lr_scheduler

# 定义优化器和初始学习率
optimizer = optim.SGD(model.parameters(), lr=0.1)

# 定义学习率调整器
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

# 在训练过程中使用学习率调整器
for epoch in range(num_epochs):
    # 训练模型
    ...
    
    # 更新学习率
    scheduler.step()

StepLR方法是最简单常用的学习率调整方法之一。它会在每过step_size轮时,将此前的学习率乘以gamma。通过调整step_size和gamma的值,可以灵活地控制学习率的变化。

    CosineAnnealingLR方法:

import torch.optim as optim
from torch.optim import lr_scheduler

# 定义优化器和初始学习率
optimizer = optim.SGD(model.parameters(), lr=0.1)

# 定义学习率调整器
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

# 在训练过程中使用学习率调整器
for epoch in range(num_epochs):
    # 训练模型
    ...
    
    # 更新学习率
    scheduler.step()

CosineAnnealingLR方法会根据余弦函数的曲线调整学习率。T_max参数指定了一个周期的长度,学习率会在一个周期内从初始值下降到最小值,然后再回升到初始值。通过调整T_max的值,可以控制学习率的周期长度。

第三章 综合CIFAR-10训练集实现

第一步导包:

import torch

from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch import nn, optim
from lenet5 import Lenet5

 第二步定义主函数并加载数据集

def main():
    batchsz = 128

    # 加载CIFAR-10训练集
    cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]), download=True)
    cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)

    # 加载CIFAR-10测试集
    cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]), download=True)
    cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)

    x, label = iter(cifar_train)._next_data()
    print('x:', x.shape, 'label:', label.shape)

    device = torch.device('cpu')这是是将代码写入计算机
    model = Lenet5().to(device)这里是用了Lenet5网络
    criteon = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    print(model)

第三步训练模型并评估


    for epoch in range(10):
        model.train()
        for batchidx, (x, label) in enumerate(cifar_train):
            # [b, 3, 32, 32]
            # [b]
            x, label = x.to(device), label.to(device)

            logits = model(x)
            # logits: [b, 10]
            # label:  [b]
            # loss: tensor scalar
            loss = criteon(logits, label)

            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print(epoch, 'loss:', loss.item())

        model.eval()
        with torch.no_grad():
            # test
            total_correct = 0
            total_num = 0
            for x, label in cifar_test:
                x, label = x.to(device), label.to(device)

                logits = model(x)
                pred = logits.argmax(dim=1)
                correct = torch.eq(pred, label).float().sum().item()
                total_correct += correct
                total_num += x.size(0)

            acc = total_correct / total_num
            print(epoch, 'test acc:', acc)

第四步主函数调用实现

if __name__ == '__main__':
    main()

完整代码如下:

import torch

from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch import nn, optim
from lenet5 import Lenet5


def main():
    batchsz = 128

    # 加载CIFAR-10训练集
    cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]), download=True)
    cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)

    # 加载CIFAR-10测试集
    cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]), download=True)
    cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)

    x, label = iter(cifar_train)._next_data()
    print('x:', x.shape, 'label:', label.shape)

    device = torch.device('cpu')
    model = Lenet5().to(device)
 

    criteon = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    print(model)

    for epoch in range(10):
        model.train()
        for batchidx, (x, label) in enumerate(cifar_train):
            # [b, 3, 32, 32]
            # [b]
            x, label = x.to(device), label.to(device)

            logits = model(x)
            # logits: [b, 10]
            # label:  [b]
            # loss: tensor scalar
            loss = criteon(logits, label)

            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print(epoch, 'loss:', loss.item())

        model.eval()
        with torch.no_grad():
            # test
            total_correct = 0
            total_num = 0
            for x, label in cifar_test:
                x, label = x.to(device), label.to(device)

                logits = model(x)
                pred = logits.argmax(dim=1)
                correct = torch.eq(pred, label).float().sum().item()
                total_correct += correct
                total_num += x.size(0)

            acc = total_correct / total_num
            print(epoch, 'test acc:', acc)


if __name__ == '__main__':
    main()
12-25 22:35