완전연결층 fully connected layer가 포함된 신경망을 준비한다.
import torch
from torch import nn, optim
from torch.nn import functional as F
class net(nn.Module):
def __init__(self):
super(net, self).__init__()
self.conv1_1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 7, stride = 3)
self.bn1_1 = nn.BatchNorm2d(32)
self.conv_dropout1_1 = nn.Dropout2d(0.1)
self.conv2_1 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, stride = 2)
self.bn2_1 = nn.BatchNorm2d(64)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv_dropout2_1 = nn.Dropout2d(0.1)
self.conv3_1 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 2)
self.bn3_1 = nn.BatchNorm2d(128)
self.conv_dropout3_1 = nn.Dropout2d(0.25)
self.conv3_2 = nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1)
self.bn3_2 = nn.BatchNorm2d(128)
self.conv_dropout3_2 = nn.Dropout2d(0.25)
self.conv4_1 = nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 2, stride = 1)
self.bn4_1 = nn.BatchNorm2d(256)
self.conv_dropout4_1 = nn.Dropout2d(0.3)
self.conv4_2 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 2, stride = 1)
self.bn4_2 = nn.BatchNorm2d(256)
self.conv_dropout4_2 = nn.Dropout2d(0.3)
self.conv4_3 = nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 2, stride = 1)
self.bn4_3 = nn.BatchNorm2d(512)
self.pool2 = nn.MaxPool2d(2, 2)
self.conv_dropout4_3 = nn.Dropout2d(0.3)
self.fc1 = nn.Linear(512*7*7, 1000)
self.fc_dropout1 = nn.Dropout(0.5)
self.fc2 = nn.Linear(1000, 1000)
self.fc_dropout2 = nn.Dropout(0.5)
self.fc3 = nn.Linear(1000, 256)
self.fc_dropout3 = nn.Dropout(0.1)
self.out = nn.Linear(256, 1)
def forward(self, x):
x = self.bn1_1(F.relu(self.conv1_1(x)))
x = self.conv_dropout1_1(x)
x = self.bn2_1(F.relu(self.conv2_1(x)))
x = self.conv_dropout2_1(self.pool1(x))
x = self.bn3_1(F.leaky_relu(self.conv3_1(x)))
x = self.conv_dropout3_1(x)
x = self.bn3_2(F.leaky_relu(self.conv3_2(x)))
x = self.conv_dropout3_2(x)
x = self.bn4_1(F.leaky_relu(self.conv4_1(x)))
x = self.conv_dropout4_1(x)
x = self.bn4_2(F.leaky_relu(self.conv4_2(x)))
x = self.conv_dropout4_2(x)
x = self.bn4_3(F.leaky_relu(self.conv4_3(x)))
x = self.pool2(x)
x = self.conv_dropout4_3(x)
x = x.view(-1, 512*7*7)
x = self.fc_dropout1(F.relu(self.fc1(x)))
x = self.fc_dropout2(F.relu(self.fc2(x)))
x = self.fc_dropout3(F.relu(self.fc3(x)))
out = self.out(x)
return out
net = net()
Θ = {β,W}이 신경망 net의 모든 parameter일 때, β는 마지막 완전연결층(fc3)에서 예측값 out 사이의 가중치(절편 포함), W는 이 layer 전의 모든 weights의 집합이라고 하자. 이때 모든 parameter와 fc3의 weights의 regularization을 추가한 Loss를 최적화한다.
lambda1과 lambda2는 과적합을 방지하기 위한 정규화 모수이다.
lambda1 = 0.01
lambda2 = 0.005
fc_params = torch.cat([x.view(-1) for x in net.out.parameters()])
l1_regularization = lambda1 * torch.norm(fc_params, p=1)
l2_regularization = lambda2 * torch.norm(fc_params, p=2)
print(fc_params.size())
loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(ypred, y) + l1_regularization # loss에 마지막 fc layer의 parameter l1 규제 추가
optimizer.zero_grad()
loss.backward()
optimizer.step()
'PyTorch' 카테고리의 다른 글
[PyTorch] Focal Loss (0) | 2021.04.22 |
---|---|
[PyTorch] nn.LSTM input, output shape (0) | 2020.06.30 |
[PyTorch] Training 헬퍼 함수 만들기 (0) | 2020.06.17 |
[PyTorch] PyTorch 1.5.0 설치하기 (0) | 2020.06.16 |