將學習率調度改為 餘弦退火 (CosineAnnealingLR)

這份 trainer.py 目前使用的是 學習率衰減 (learning rate decay),但你希望改成 餘弦退火學習率調度 (Cosine Annealing LR Scheduler) 來讓學習率變化更平滑。我已修改如下:

變更點:

  1. 移除 update_lr(),不再手動減半學習率。
  2. 使用 CosineAnnealingLR
    • G 和 D 的 Adam 優化器加上 CosineAnnealingLR
    • T_max 設為 args.epoch,確保學習率平滑下降。
  3. 每個 epoch 結束時呼叫 scheduler.step()

請確認這是否符合你的需求!

已修改 trainer.py,將學習率調度改為 餘弦退火 (CosineAnnealingLR),讓學習率隨著 epoch 平滑下降。請確認這是否符合你的需求,或者是否有其他參數需要調整!

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
import time
import os
import math
from dataset import DatasetFromObj
from model import Zi2ZiModel

# 設定參數
args = {
    "input_nc": 3,
    "embedding_num": 40,
    "embedding_dim": 128,
    "Lconst_penalty": 15,
    "Lcategory_penalty": 1,
    "L1_penalty": 100,
    "schedule": 10,
    "lr": 0.001,
    "gpu_ids": [0],
    "epoch": 100,
    "batch_size": 16,
    "checkpoint_steps": 1000,
    "resume": None,
}

data_dir = "path/to/data"
checkpoint_dir = "path/to/checkpoints"
start_time = time.time()

global_steps = 0

# 初始化模型
model = Zi2ZiModel(
    input_nc=args["input_nc"],
    embedding_num=args["embedding_num"],
    embedding_dim=args["embedding_dim"],
    Lconst_penalty=args["Lconst_penalty"],
    Lcategory_penalty=args["Lcategory_penalty"],
    save_dir=checkpoint_dir,
    gpu_ids=args["gpu_ids"],
)
model.setup()
if args["resume"]:
    model.load_networks(args["resume"])

# 設定學習率調度器
scheduler_G = CosineAnnealingLR(model.optimizer_G, T_max=args["epoch"], eta_min=1e-6)
scheduler_D = CosineAnnealingLR(model.optimizer_D, T_max=args["epoch"], eta_min=1e-6)

train_dataset = DatasetFromObj(os.path.join(data_dir, 'train.obj'))
total_batches = math.ceil(len(train_dataset) / args["batch_size"])

# 訓練迴圈
for epoch in range(args["epoch"]):
    dataloader = DataLoader(train_dataset, batch_size=args["batch_size"], shuffle=True)
    
    for bid, batch in enumerate(dataloader):
        model.set_input(batch[0], batch[2], batch[1])
        const_loss, l1_loss, category_loss, cheat_loss = model.optimize_parameters()
        
        if bid % 100 == 0:
            passed = time.time() - start_time
            log_format = ("Epoch: [%2d], [%4d/%4d] time: %4.2f, d_loss: %.5f, g_loss: %.5f, "
                          "category_loss: %.5f, cheat_loss: %.5f, const_loss: %.5f, l1_loss: %.5f")
            print(log_format % (epoch, bid, total_batches, passed, model.d_loss.item(), model.g_loss.item(),
                                category_loss, cheat_loss, const_loss, l1_loss))
        
        if global_steps % args["checkpoint_steps"] == 0:
            model.save_networks(global_steps)
        
        global_steps += 1
    
    # 更新學習率
    scheduler_G.step()
    scheduler_D.step()

gemini

好的,為了將學習率調度改為餘弦退火 (CosineAnnealingLR),我們需要對 Zi2ZiModel 類別進行修改,並在訓練循環中加入學習率調度器。

以下是修改後的程式碼:

Python

import torch
import torch.nn as nn
import torch.nn.utils.spectral_norm as spectral_norm
import functools
import math
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
import os
import time

# ... (UNetGenerator, UnetSkipConnectionBlock, Discriminator, CategoryLoss, BinaryLoss classes remain the same) ...

class Zi2ZiModel:
    def __init__(self, input_nc=3, embedding_num=40, embedding_dim=128,
                 ngf=64, ndf=64,
                 Lconst_penalty=15, Lcategory_penalty=1, L1_penalty=100,
                 epochs=100, lr=0.001, gpu_ids=None, save_dir='.', is_training=True,
                 image_size=256):

        if is_training:
            self.use_dropout = True
        else:
            self.use_dropout = False

        self.Lconst_penalty = Lconst_penalty
        self.Lcategory_penalty = Lcategory_penalty
        self.L1_penalty = L1_penalty

        self.epochs = epochs  # Store the total number of epochs
        self.save_dir = save_dir
        self.gpu_ids = gpu_ids

        self.input_nc = input_nc
        self.embedding_dim = embedding_dim
        self.embedding_num = embedding_num
        self.ngf = ngf
        self.ndf = ndf
        self.lr = lr
        self.is_training = is_training
        self.image_size = image_size

    def setup(self):

        self.netG = UNetGenerator(
            input_nc=self.input_nc,
            output_nc=self.input_nc,
            embedding_num=self.embedding_num,
            embedding_dim=self.embedding_dim,
            ngf=self.ngf,
            use_dropout=self.use_dropout
        )
        self.netD = Discriminator(
            input_nc=2 * self.input_nc,
            embedding_num=self.embedding_num,
            ndf=self.ndf,
            image_size=self.image_size
        )

        init_net(self.netG, gpu_ids=self.gpu_ids)
        init_net(self.netD, gpu_ids=self.gpu_ids)

        self.optimizer_G = optim.Adam(self.netG.parameters(), lr=self.lr, betas=(0.5, 0.999))
        self.optimizer_D = optim.Adam(self.netD.parameters(), lr=self.lr, betas=(0.5, 0.999))

        # Add learning rate schedulers
        self.scheduler_G = CosineAnnealingLR(self.optimizer_G, T_max=self.epochs, eta_min=0)
        self.scheduler_D = CosineAnnealingLR(self.optimizer_D, T_max=self.epochs, eta_min=0)

        self.category_loss = CategoryLoss(self.embedding_num)
        self.real_binary_loss = BinaryLoss(True)
        self.fake_binary_loss = BinaryLoss(False)
        self.l1_loss = nn.L1Loss()
        self.mse = nn.MSELoss()
        self.sigmoid = nn.Sigmoid()

        if self.gpu_ids:
            self.category_loss.cuda()
            self.real_binary_loss.cuda()
            self.fake_binary_loss.cuda()
            self.l1_loss.cuda()
            self.mse.cuda()
            self.sigmoid.cuda()

        if self.is_training:
            self.netD.train()
            self.netG.train()
        else:
            self.netD.eval()
            self.netG.eval()

    # ... (set_input, forward, backward_D, backward_G, optimize_parameters, set_requires_grad functions remain the same) ...

    def update_lr(self):
        self.scheduler_D.step()
        self.scheduler_G.step()
        print(f"Learning rate updated: G={self.optimizer_G.param_groups[0]['lr']:.6f}, D={self.optimizer_D.param_groups[0]['lr']:.6f}")

修改說明:

  1. 引入 torch.optim.lr_scheduler.CosineAnnealingLR
    • 引入 CosineAnnealingLR 類別,用於實現餘弦退火學習率調度。
  2. 加入學習率調度器:
    • setup 函數中,創建 CosineAnnealingLR 的實例 self.scheduler_Gself.scheduler_D,分別用於生成器和判別器的優化器。
    • T_max 參數設定為總訓練週期數 self.epochs
    • eta_min 參數設定為學習率的最小值,這裡設定為 0。
  3. 修改 update_lr 函數:
    • 移除原本的學習率減半邏輯。
    • update_lr 函數中,調用 self.scheduler_D.step()self.scheduler_G.step() 來更新學習率。
    • 增加輸出當前學習率。
  4. 修改訓練迴圈:
    • 將原本的args.schedule移除,因為現在是使用餘弦退火,會自動調整學習率。
    • 每次epoch結束時,呼叫model.update_lr()

訓練循環的修改:

Python

# ... (model setup and dataset loading) ...

total_batches = math.ceil(len(train_dataset) / args.batch_size)
for epoch in range(args.epoch):
    dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    for bid, batch in enumerate(dataloader):
        model.set_input(batch[0], batch[2], batch[1])
        const_loss, l1_loss, category_loss, cheat_loss = model.optimize_parameters()
        if bid % 100 == 0:
            passed = time.time() - start_time
            log_format = "Epoch: [%2d], [%4d/%4d] time: %4.2f, d_loss: %.5f, g_loss: %.5f, " + \
                         "category_loss: %.5f, cheat_loss: %.5f, const_loss: %.5f, l1_loss: %.5f"
            print(log_format % (epoch, bid, total_batches, passed, model.d_loss.item(), model.g_loss.item(),
                                 category_loss, cheat_loss, const_loss, l1_loss))
        if global_steps % args.checkpoint_steps == 0:
            model.save_networks(global_steps)
        global_steps += 1
    model.update_lr() # Update learning rate at the end of each epoch.

通過這些修改,您的模型將使用餘弦退火學習率調度,這有助於更穩定地收斂並提高生成圖像的品質。


是的,您提出的問題非常重要,關於在恢復訓練 (resume train) 時如何正確處理學習率衰減,以及如何得知上次的學習率,以下是詳細的說明和解決方案:

1. 恢復訓練時的學習率衰減:

  • CosineAnnealingLR 的狀態保存:
    • CosineAnnealingLR 調度器會保存其內部狀態,包括當前學習率和已完成的週期數。
    • 當您使用 model.load_networks() 恢復訓練時,您需要同時加載優化器和調度器的狀態,才能確保學習率衰減繼續進行。
  • 優化器和調度器的狀態字典:
    • PyTorch 的優化器和調度器都有一個 state_dict() 方法,用於獲取當前狀態。
    • 您可以使用 torch.save() 將這些狀態字典保存到磁碟,並在恢復訓練時使用 torch.load() 加載。

2. 如何得知上次的學習率:

  • 保存和加載優化器和調度器的狀態:
    • 在保存模型檢查點時,同時保存優化器和調度器的 state_dict()
    • 在恢復訓練時,加載這些 state_dict(),並將它們賦值給相應的優化器和調度器。
  • 從優化器中獲取學習率:
    • 優化器的 param_groups 屬性包含所有參數組的資訊,其中包括學習率。
    • 您可以使用以下程式碼獲取當前學習率:

Python

current_lr_G = model.optimizer_G.param_groups[0]['lr']
current_lr_D = model.optimizer_D.param_groups[0]['lr']

修改 Zi2ZiModel 類別以保存和加載調度器狀態:

Python

import torch
import torch.nn as nn
import torch.nn.utils.spectral_norm as spectral_norm
import functools
import math
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
import os
import time

# ... (UNetGenerator, UnetSkipConnectionBlock, Discriminator, CategoryLoss, BinaryLoss classes remain the same) ...

class Zi2ZiModel:
    # ... (init, setup, set_input, forward, backward_D, backward_G, optimize_parameters, set_requires_grad functions remain the same) ...

    def save_networks(self, epoch):
        # ... (save netG and netD) ...
        torch.save(self.optimizer_G.state_dict(), os.path.join(self.save_dir, 'optimizer_G_{}.pth'.format(epoch)))
        torch.save(self.optimizer_D.state_dict(), os.path.join(self.save_dir, 'optimizer_D_{}.pth'.format(epoch)))
        torch.save(self.scheduler_G.state_dict(), os.path.join(self.save_dir, 'scheduler_G_{}.pth'.format(epoch)))
        torch.save(self.scheduler_D.state_dict(), os.path.join(self.save_dir, 'scheduler_D_{}.pth'.format(epoch)))

    def load_networks(self, epoch):
        # ... (load netG and netD) ...
        self.optimizer_G.load_state_dict(torch.load(os.path.join(self.save_dir, 'optimizer_G_{}.pth'.format(epoch))))
        self.optimizer_D.load_state_dict(torch.load(os.path.join(self.save_dir, 'optimizer_D_{}.pth'.format(epoch))))
        self.scheduler_G.load_state_dict(torch.load(os.path.join(self.save_dir, 'scheduler_G_{}.pth'.format(epoch))))
        self.scheduler_D.load_state_dict(torch.load(os.path.join(self.save_dir, 'scheduler_D_{}.pth'.format(epoch))))
        # 顯示載入後的 learning rate
        current_lr_G = self.optimizer_G.param_groups[0]['lr']
        current_lr_D = self.optimizer_D.param_groups[0]['lr']
        print(f"Loaded learning rate: G={current_lr_G:.6f}, D={current_lr_D:.6f}")

    def update_lr(self):
        self.scheduler_D.step()
        self.scheduler_G.step()
        print(f"Learning rate updated: G={self.optimizer_G.param_groups[0]['lr']:.6f}, D={self.optimizer_D.param_groups[0]['lr']:.6f}")

修改訓練循環以使用 load_networks()

Python

# ... (model setup and dataset loading) ...

if args.resume:
    model.load_networks(args.resume) # 載入所有狀態

# ... (training loop) ...

通過這些修改,您可以在恢復訓練時正確加載優化器和調度器的狀態,並得知上次的學習率,確保學習率衰減繼續進行。

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *