본문 바로가기

AI/대학원

VGG16 을 이용한 Transfer Learning 실습

개요

AI프로그래밍 수업에서 실습한 내용으로 kaggle 의 Covid Patients Chest X-Ray dataset 을 이용하여 VGG16 을 Transfer Learning 으로 학습한 모델과 비교 합니다.

 

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt

 

COVID dataset

https://www.kaggle.com/datasets/ankitachoudhury01/covid-patients-chest-xray

 

Covid Patients Chest X-Ray

Download 162 images of covid patients and normal patients chest X-ray.

www.kaggle.com

from google.colab import drive
drive.mount('/content/drive')
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # VGG16 uses 224x224 inputs
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# COVID dataset
train_dataset = datasets.ImageFolder(root='/content/drive/MyDrive/data_files/COVID/train/',
                                     transform=transform)
test_dataset = datasets.ImageFolder(root='/content/drive/MyDrive/data_files/COVID/test/',
                                    transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

len(train_dataset), len(test_dataset)
(259, 66)
# Functions to show an image

def imshow(img):
    img = img*np.array([0.229, 0.224, 0.225]).reshape(-1,1,1) \
    + np.array([0.485, 0.456, 0.406]).reshape(-1,1,1)     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)

print(images.shape, labels.shape)

# show images
nrow = 8
imshow(torchvision.utils.make_grid(images, nrow=nrow))
# print labels
for i in range(len(labels)//nrow):
    print(' '.join(f'{labels[i*nrow+j]:d}' for j in range(nrow)))

 
1 0 0 0 1 0 1 0
1 1 0 1 0 0 0 1
0 0 1 1 0 0 1 1
0 1 1 0 1 1 1 1

 

Load VGG16

# Load the pre-trained VGG16 model
vgg16 = models.vgg16(pretrained=True)
# VGGNet16 구조
vgg16
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(vgg16)
138357544

 

 

output layer 변경

VGG16 의 기존 ouput 은 1000 개의 class 를 맞추도록 되어 있기 때문에 2개의 class 를 구분 하도록 변경

# Modify the classifier
num_classes = 2  # Change this to match the number of classes in the dataset
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16 = vgg16.to(device)
!nvidia-smi

 

features layers 는 기존 학습된 parameters 를 사용 하도록 설정

# Freeze training for all "features" layers
for param in vgg16.features.parameters():
    param.requires_grad = False

 

classifier layers 의 parameters 만 optimizer 대상으로 설정

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001, momentum=0.9)
w0 = vgg16.features[0].weight.data.clone()
w1 = vgg16.classifier[0].weight.data.clone()

 

Train the model

# Function to train the model
def train_model(model, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
# Train the model
train_model(vgg16, criterion, optimizer, num_epochs=20)

# Save the trained model
#torch.save(vgg16.state_dict(), 'vgg16_finetuned.pth')

# Freeze 가 되었는지 확인 코드
((vgg16.features[0].weight.data - w0)**2).sum()
tensor(0., device='cuda:0')
# classifier weight 업데이트 되었는지 확인
((vgg16.classifier[0].weight.data - w1)**2).sum()
tensor(0.4128, device='cuda:0')

 

Test set 결과 확인

# Function to test the model
def test(model, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()*len(data)
            _, predicted = torch.max(output.data, 1)
            #print(output, predicted, target)
            correct += (predicted.data == target.data).sum().item()
            # pred = output.argmax(dim=1, keepdim=True)
            # correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} \
    ({100. * correct / len(test_loader.dataset):.0f}%)\n')
test(vgg16, criterion)

 Test set: Average loss: 0.1176, Accuracy: 62/66 (94%)  

 

filter 시각화 함수

# Function to visualize the filters
def visualize_filters(layer):
    # We assume the layer is of type nn.Conv2d
    filters = layer.weight.data.clone()
    print(filters.shape)  # Shape of filters: (number of filters, channels, height, width)
    n_filters = filters.shape[0]
    n_inputchannels = filters.shape[1]

    # Normalize the filters for better visualization
    filters = (filters - filters.min()) / (filters.max() - filters.min())

    # Plotting the filters
    for ch in range(n_inputchannels):
        print(f'For input channel {ch}')
        fig, axs = plt.subplots(n_filters//8, 8, figsize=(20, 3*n_filters//8))
        for idx in range(n_filters):
            # We take the 0th channel for grayscale image filters
            axs[idx//8, idx%8].imshow(filters[idx][ch].cpu().numpy(), cmap='gray')
            axs[idx//8, idx%8].set_xticks([])
            axs[idx//8, idx%8].set_yticks([])
        plt.show()

# Visualize the filters of the first convolutional layer
visualize_filters(vgg16.features[0])

 torch.Size([64, 3, 3, 3]) 

 For input channel 0 

 

Unfreeze the feature learning layers

VGG16 의 기존 parameter 를 사용하여 전부 재학습 시킴

# Load the pre-trained VGG16 model
vgg16 = models.vgg16(pretrained=True)

# Modify the classifier
num_classes = 2  # Change this to match the number of classes in the dataset
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16 = vgg16.to(device)
# Train for all "features" layers
for param in vgg16.features.parameters():
    print(param.requires_grad)
w0 = vgg16.features[28].weight.data.clone()
w1 = vgg16.classifier[0].weight.data.clone()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
train_model(vgg16, criterion, optimizer, num_epochs=20)

test(vgg16, criterion)

 Test set: Average loss: 0.1257, Accuracy: 63/66 (95%) 

 

Compare to learning from scratch

VGG16 의 기존 parameter 를 사용하지 않고 새롭게 학습함

# pretrained 된 weight 를 사용안함
vgg16_scratch = models.vgg16(pretrained=False)
# Modify the classifier
num_classes = 2  # Change this to match the number of classes in the dataset
vgg16_scratch.classifier[6] = nn.Linear(vgg16_scratch.classifier[6].in_features, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16_scratch = vgg16_scratch.to(device)
vgg16_scratch
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=2, bias=True)
  )
)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_scratch = optim.SGD(vgg16_scratch.parameters(), lr=0.001, momentum=0.9)
# Train the model
train_model(vgg16_scratch, criterion, optimizer_scratch, num_epochs=20)

test(vgg16_scratch, criterion)

 Test set: Average loss: 0.3616, Accuracy: 58/66 (88%) 

 

Transfer Learning 으로 학습된 모델이 약 95%의 성능을 보이는 반면 대용량 데이터셋으로 pretrained 된 weight 을 사용 하지 않고 학습을 시킨 모델은 Accuracy 가 약 88% 로 모델의 성능이 떨어지는 것을 확인 할 수 있습니다.