완전연결층 fully connected layer가 포함된 신경망을 준비한다.

import torch
from torch import nn, optim
from torch.nn import functional as F

class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.conv1_1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 7, stride = 3)
        self.bn1_1 = nn.BatchNorm2d(32)
        self.conv_dropout1_1 = nn.Dropout2d(0.1)
        
        self.conv2_1 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, stride = 2)
        self.bn2_1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv_dropout2_1 = nn.Dropout2d(0.1)
        
        self.conv3_1 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 2)
        self.bn3_1 = nn.BatchNorm2d(128)
        self.conv_dropout3_1 = nn.Dropout2d(0.25)
        self.conv3_2 = nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1)
        self.bn3_2 = nn.BatchNorm2d(128)
        self.conv_dropout3_2 = nn.Dropout2d(0.25)
        
        self.conv4_1 = nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 2, stride = 1)
        self.bn4_1 = nn.BatchNorm2d(256)
        self.conv_dropout4_1 = nn.Dropout2d(0.3)
        self.conv4_2 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 2, stride = 1)
        self.bn4_2 = nn.BatchNorm2d(256)
        self.conv_dropout4_2 = nn.Dropout2d(0.3)
        self.conv4_3 = nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 2, stride = 1)
        self.bn4_3 = nn.BatchNorm2d(512)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv_dropout4_3 = nn.Dropout2d(0.3)
        
                     
        self.fc1 = nn.Linear(512*7*7, 1000)
        self.fc_dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(1000, 1000)
        self.fc_dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(1000, 256)
        self.fc_dropout3 = nn.Dropout(0.1)
        self.out = nn.Linear(256, 1)

    def forward(self, x):
        x = self.bn1_1(F.relu(self.conv1_1(x)))
        x = self.conv_dropout1_1(x)
        
        x = self.bn2_1(F.relu(self.conv2_1(x)))
        x = self.conv_dropout2_1(self.pool1(x))
        
        x = self.bn3_1(F.leaky_relu(self.conv3_1(x)))
        x = self.conv_dropout3_1(x)
        x = self.bn3_2(F.leaky_relu(self.conv3_2(x)))
        x = self.conv_dropout3_2(x)
        
        x = self.bn4_1(F.leaky_relu(self.conv4_1(x)))
        x = self.conv_dropout4_1(x)        
        x = self.bn4_2(F.leaky_relu(self.conv4_2(x)))
        x = self.conv_dropout4_2(x)
        x = self.bn4_3(F.leaky_relu(self.conv4_3(x)))
        x = self.pool2(x)
        x = self.conv_dropout4_3(x)
        
        x = x.view(-1, 512*7*7)
        
        x = self.fc_dropout1(F.relu(self.fc1(x)))
        x = self.fc_dropout2(F.relu(self.fc2(x)))
        x = self.fc_dropout3(F.relu(self.fc3(x)))
        out = self.out(x)
        
        return out
    
net = net()

 

Θ = {β,W}이 신경망 net의 모든 parameter일 때, β는 마지막 완전연결층(fc3)에서 예측값 out 사이의 가중치(절편 포함), W는 이 layer 전의 모든 weights의 집합이라고 하자. 이때 모든 parameter와 fc3의 weights의 regularization을 추가한 Loss를 최적화한다.

 

CrossEntropyLoss에 각각 l1규제와 l2규제 추가한 Loss

lambda1과 lambda2는 과적합을 방지하기 위한 정규화 모수이다.

lambda1 = 0.01
lambda2 = 0.005

fc_params = torch.cat([x.view(-1) for x in net.out.parameters()])
l1_regularization = lambda1 * torch.norm(fc_params, p=1)
l2_regularization = lambda2 * torch.norm(fc_params, p=2)

print(fc_params.size())

last fully conncected layer에서 prediction layer 사이의 가중치는 256개의 coefficients와 1개의 bias로 총 257개이다

 

 

loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(ypred, y) + l1_regularization # loss에 마지막 fc layer의 parameter l1 규제 추가
optimizer.zero_grad()
loss.backward()
optimizer.step()

 

'PyTorch' 카테고리의 다른 글

[PyTorch] Focal Loss  (0) 2021.04.22
[PyTorch] nn.LSTM input, output shape  (0) 2020.06.30
[PyTorch] Training 헬퍼 함수 만들기  (0) 2020.06.17
[PyTorch] PyTorch 1.5.0 설치하기  (0) 2020.06.16

+ Recent posts