개요
AI프로그래밍 수업에서 실습한 내용으로 kaggle 의 Covid Patients Chest X-Ray dataset 을 이용하여 VGG16 을 Transfer Learning 으로 학습한 모델과 비교 합니다.
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt
COVID dataset
https://www.kaggle.com/datasets/ankitachoudhury01/covid-patients-chest-xray
Covid Patients Chest X-Ray
Download 162 images of covid patients and normal patients chest X-ray.
www.kaggle.com
from google.colab import drive
drive.mount('/content/drive')
transform = transforms.Compose([
transforms.Resize((224, 224)), # VGG16 uses 224x224 inputs
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# COVID dataset
train_dataset = datasets.ImageFolder(root='/content/drive/MyDrive/data_files/COVID/train/',
transform=transform)
test_dataset = datasets.ImageFolder(root='/content/drive/MyDrive/data_files/COVID/test/',
transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
len(train_dataset), len(test_dataset)
(259, 66)
# Functions to show an image
def imshow(img):
img = img*np.array([0.229, 0.224, 0.225]).reshape(-1,1,1) \
+ np.array([0.485, 0.456, 0.406]).reshape(-1,1,1) # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)
print(images.shape, labels.shape)
# show images
nrow = 8
imshow(torchvision.utils.make_grid(images, nrow=nrow))
# print labels
for i in range(len(labels)//nrow):
print(' '.join(f'{labels[i*nrow+j]:d}' for j in range(nrow)))
1 0 0 0 1 0 1 0
1 1 0 1 0 0 0 1
0 0 1 1 0 0 1 1
0 1 1 0 1 1 1 1
Load VGG16
# Load the pre-trained VGG16 model
vgg16 = models.vgg16(pretrained=True)
# VGGNet16 구조
vgg16
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameters(vgg16)
138357544
output layer 변경
VGG16 의 기존 ouput 은 1000 개의 class 를 맞추도록 되어 있기 때문에 2개의 class 를 구분 하도록 변경
# Modify the classifier
num_classes = 2 # Change this to match the number of classes in the dataset
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, num_classes)
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16 = vgg16.to(device)
!nvidia-smi
features layers 는 기존 학습된 parameters 를 사용 하도록 설정
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
param.requires_grad = False
classifier layers 의 parameters 만 optimizer 대상으로 설정
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001, momentum=0.9)
w0 = vgg16.features[0].weight.data.clone()
w1 = vgg16.classifier[0].weight.data.clone()
Train the model
# Function to train the model
def train_model(model, criterion, optimizer, num_epochs=10):
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * images.size(0)
epoch_loss = running_loss / len(train_loader.dataset)
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
# Train the model
train_model(vgg16, criterion, optimizer, num_epochs=20)
# Save the trained model
#torch.save(vgg16.state_dict(), 'vgg16_finetuned.pth')
# Freeze 가 되었는지 확인 코드
((vgg16.features[0].weight.data - w0)**2).sum()
tensor(0., device='cuda:0')
# classifier weight 업데이트 되었는지 확인
((vgg16.classifier[0].weight.data - w1)**2).sum()
tensor(0.4128, device='cuda:0')
Test set 결과 확인
# Function to test the model
def test(model, criterion):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()*len(data)
_, predicted = torch.max(output.data, 1)
#print(output, predicted, target)
correct += (predicted.data == target.data).sum().item()
# pred = output.argmax(dim=1, keepdim=True)
# correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} \
({100. * correct / len(test_loader.dataset):.0f}%)\n')
test(vgg16, criterion)
Test set: Average loss: 0.1176, Accuracy: 62/66 (94%)
filter 시각화 함수
# Function to visualize the filters
def visualize_filters(layer):
# We assume the layer is of type nn.Conv2d
filters = layer.weight.data.clone()
print(filters.shape) # Shape of filters: (number of filters, channels, height, width)
n_filters = filters.shape[0]
n_inputchannels = filters.shape[1]
# Normalize the filters for better visualization
filters = (filters - filters.min()) / (filters.max() - filters.min())
# Plotting the filters
for ch in range(n_inputchannels):
print(f'For input channel {ch}')
fig, axs = plt.subplots(n_filters//8, 8, figsize=(20, 3*n_filters//8))
for idx in range(n_filters):
# We take the 0th channel for grayscale image filters
axs[idx//8, idx%8].imshow(filters[idx][ch].cpu().numpy(), cmap='gray')
axs[idx//8, idx%8].set_xticks([])
axs[idx//8, idx%8].set_yticks([])
plt.show()
# Visualize the filters of the first convolutional layer
visualize_filters(vgg16.features[0])
torch.Size([64, 3, 3, 3])
For input channel 0
Unfreeze the feature learning layers
VGG16 의 기존 parameter 를 사용하여 전부 재학습 시킴
# Load the pre-trained VGG16 model
vgg16 = models.vgg16(pretrained=True)
# Modify the classifier
num_classes = 2 # Change this to match the number of classes in the dataset
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, num_classes)
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16 = vgg16.to(device)
# Train for all "features" layers
for param in vgg16.features.parameters():
print(param.requires_grad)
w0 = vgg16.features[28].weight.data.clone()
w1 = vgg16.classifier[0].weight.data.clone()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
train_model(vgg16, criterion, optimizer, num_epochs=20)
test(vgg16, criterion)
Test set: Average loss: 0.1257, Accuracy: 63/66 (95%)
Compare to learning from scratch
VGG16 의 기존 parameter 를 사용하지 않고 새롭게 학습함
# pretrained 된 weight 를 사용안함
vgg16_scratch = models.vgg16(pretrained=False)
# Modify the classifier
num_classes = 2 # Change this to match the number of classes in the dataset
vgg16_scratch.classifier[6] = nn.Linear(vgg16_scratch.classifier[6].in_features, num_classes)
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16_scratch = vgg16_scratch.to(device)
vgg16_scratch
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=2, bias=True)
)
)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_scratch = optim.SGD(vgg16_scratch.parameters(), lr=0.001, momentum=0.9)
# Train the model
train_model(vgg16_scratch, criterion, optimizer_scratch, num_epochs=20)
test(vgg16_scratch, criterion)
Test set: Average loss: 0.3616, Accuracy: 58/66 (88%)
Transfer Learning 으로 학습된 모델이 약 95%의 성능을 보이는 반면 대용량 데이터셋으로 pretrained 된 weight 을 사용 하지 않고 학습을 시킨 모델은 Accuracy 가 약 88% 로 모델의 성능이 떨어지는 것을 확인 할 수 있습니다.
'AI > 대학원' 카테고리의 다른 글
PCA 와 FDA 실습 및 분석 (0) | 2024.11.20 |
---|---|
Gaussian process 실습 (8) | 2024.11.14 |
도메인에 맞는 AI 지능화 전략 (자율주행 보안) (6) | 2024.11.10 |
한양대학교 인공지능융합대학원 25년도 전기 신입생 모집 (10) | 2024.11.07 |
Ridge Regression 실습 및 개념정리 (1) | 2024.11.01 |