Lab 1 CIFAR10 initial done
This commit is contained in:
parent
671e8d40e6
commit
c75e27a36e
6 changed files with 414 additions and 0 deletions
2
lab1-pytorch-cifar10/.gitignore
vendored
Normal file
2
lab1-pytorch-cifar10/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/data
|
||||
*.pth
|
||||
10
lab1-pytorch-cifar10/README.md
Normal file
10
lab1-pytorch-cifar10/README.md
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
### "Тензорные ядра CUDA для глубокого обучения"
|
||||
|
||||
1. С помощью фреймворка PyTorch реализовать архитектуру AlexNet для распознавания изображений из набора данных CIFAR10.
|
||||
2. Обучить модель с помощью GPU на тренировочном наборе данных в двух режимах — без использования тензорных CUDA-ядер (FP32) и с их использованием (FP16).
|
||||
3. Эксперимент 1: сравнить время обучения модели в разных режимах.
|
||||
4. Эксперимент 2: сравнить точности распознавания изображений из тестового набора данных при разных режимах обучения модели.
|
||||
5. Эксперимент 3: сравнить реальные вычислительные сложности пакетного умножения матриц (Batch Matrix Multiplication) на GPU а режимах FP32 и FP16; построить графики сложностей.
|
||||
6. Объяснить результаты вычислительных экспериментов.
|
||||
|
||||
Отчет должен содержать: исходный код, результаты экспериментов, обсуждение полученных результатов.
|
||||
39
lab1-pytorch-cifar10/alexnet.py
Normal file
39
lab1-pytorch-cifar10/alexnet.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import torch.nn as nn
|
||||
|
||||
|
||||
CIFAR10_NUM_CLASSES = 10
|
||||
|
||||
|
||||
class AlexNet(nn.Module):
|
||||
def __init__(self, /, num_classes: int):
|
||||
super(AlexNet, self).__init__()
|
||||
self.features = nn.Sequential(
|
||||
nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=2),
|
||||
nn.Conv2d(64, 192, kernel_size=3, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=2),
|
||||
nn.Conv2d(192, 384, kernel_size=3, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(384, 256, kernel_size=3, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(256, 256, kernel_size=3, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=2),
|
||||
)
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(),
|
||||
nn.Linear(256 * 2 * 2, 4096),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(4096, 4096),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(4096, num_classes),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = x.view(x.size(0), 256 * 2 * 2)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
31
lab1-pytorch-cifar10/requirements.txt
Normal file
31
lab1-pytorch-cifar10/requirements.txt
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
black==23.1.0
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==3.0.1
|
||||
click==8.1.3
|
||||
colorama==0.4.6
|
||||
contourpy==1.0.7
|
||||
cycler==0.11.0
|
||||
fonttools==4.38.0
|
||||
idna==3.4
|
||||
importlib-resources==5.12.0
|
||||
kiwisolver==1.4.4
|
||||
matplotlib==3.7.0
|
||||
mypy-extensions==1.0.0
|
||||
numpy==1.24.2
|
||||
packaging==23.0
|
||||
pandas==1.5.3
|
||||
pathspec==0.11.0
|
||||
Pillow==9.4.0
|
||||
platformdirs==3.0.0
|
||||
pyparsing==3.0.9
|
||||
python-dateutil==2.8.2
|
||||
pytz==2022.7.1
|
||||
requests==2.28.2
|
||||
six==1.16.0
|
||||
tomli==2.0.1
|
||||
torch==1.13.1+cu117
|
||||
torchvision==0.14.1+cu117
|
||||
tqdm==4.64.1
|
||||
typing_extensions==4.5.0
|
||||
urllib3==1.26.14
|
||||
zipp==3.15.0
|
||||
135
lab1-pytorch-cifar10/test.py
Normal file
135
lab1-pytorch-cifar10/test.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
import torch
|
||||
import torch.utils.data
|
||||
import torch.nn as nn
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
import torch.optim as optim
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from alexnet import AlexNet, CIFAR10_NUM_CLASSES
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
raise RuntimeError("CUDA is not available")
|
||||
|
||||
|
||||
NET_SAVE_PATH = "./cifar10_alexnet.pth"
|
||||
|
||||
device: torch.device
|
||||
|
||||
transform = transforms.Compose(
|
||||
[transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
|
||||
)
|
||||
|
||||
batch_size = 4
|
||||
|
||||
trainset: torchvision.datasets.CIFAR10
|
||||
testset: torchvision.datasets.CIFAR10
|
||||
|
||||
trainloader: torch.utils.data.DataLoader
|
||||
testloader: torch.utils.data.DataLoader
|
||||
|
||||
classes = (
|
||||
"plane",
|
||||
"car",
|
||||
"bird",
|
||||
"cat",
|
||||
"deer",
|
||||
"dog",
|
||||
"frog",
|
||||
"horse",
|
||||
"ship",
|
||||
"truck",
|
||||
)
|
||||
|
||||
|
||||
def load_data():
|
||||
global trainset, trainloader, testset, testloader
|
||||
trainset = torchvision.datasets.CIFAR10(
|
||||
root="./data", train=True, download=True, transform=transform
|
||||
)
|
||||
trainloader = torch.utils.data.DataLoader(
|
||||
trainset, batch_size=batch_size, shuffle=True, num_workers=2
|
||||
)
|
||||
|
||||
testset = torchvision.datasets.CIFAR10(
|
||||
root="./data", train=False, download=True, transform=transform
|
||||
)
|
||||
testloader = torch.utils.data.DataLoader(
|
||||
testset, batch_size=batch_size, shuffle=False, num_workers=2
|
||||
)
|
||||
|
||||
|
||||
def imshow(img):
|
||||
if img.is_cuda:
|
||||
img = img.cpu()
|
||||
img = img / 2 + 0.5
|
||||
npimg = img.numpy()
|
||||
plt.imshow(np.transpose(npimg, (1, 2, 0)))
|
||||
plt.show()
|
||||
|
||||
|
||||
def main():
|
||||
global device
|
||||
device = torch.device("cuda:0")
|
||||
print("Available device:", device)
|
||||
|
||||
load_data()
|
||||
|
||||
net = AlexNet(CIFAR10_NUM_CLASSES).to(device)
|
||||
|
||||
net.load_state_dict(torch.load(NET_SAVE_PATH))
|
||||
|
||||
dataiter = iter(testloader)
|
||||
images, labels = next(dataiter)
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
|
||||
imshow(torchvision.utils.make_grid(images))
|
||||
print("GroundTruth: ", " ".join(f"{classes[labels[j]]:5s}" for j in range(4)))
|
||||
|
||||
outputs = net(images)
|
||||
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
|
||||
print("Predicted: ", " ".join(f"{classes[predicted[j]]:5s}" for j in range(4)))
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
|
||||
with torch.no_grad():
|
||||
for data in testloader:
|
||||
images, labels = data
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
|
||||
outputs = net(images)
|
||||
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
print(
|
||||
f"Accuracy of the network on the 10000 test images: {100 * correct // total} %"
|
||||
)
|
||||
|
||||
correct_pred = {classname: 0 for classname in classes}
|
||||
total_pred = {classname: 0 for classname in classes}
|
||||
|
||||
with torch.no_grad():
|
||||
for data in testloader:
|
||||
images, labels = data
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
outputs = net(images)
|
||||
_, predictions = torch.max(outputs, 1)
|
||||
|
||||
for label, prediction in zip(labels, predictions):
|
||||
if label == prediction:
|
||||
correct_pred[classes[label]] += 1
|
||||
total_pred[classes[label]] += 1
|
||||
|
||||
for classname, correct_count in correct_pred.items():
|
||||
accuracy = 100 * float(correct_count) / total_pred[classname]
|
||||
print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
197
lab1-pytorch-cifar10/train.py
Normal file
197
lab1-pytorch-cifar10/train.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
import torch
|
||||
import torch.cuda
|
||||
import torch.cuda.amp
|
||||
import torch.utils.data
|
||||
import torch.nn as nn
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
import torch.optim as optim
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from alexnet import AlexNet, CIFAR10_NUM_CLASSES
|
||||
import argparse
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
raise RuntimeError("CUDA is not available")
|
||||
|
||||
|
||||
NET_SAVE_PATH = "./cifar10_alexnet.pth"
|
||||
|
||||
device: torch.device
|
||||
|
||||
transform = transforms.Compose(
|
||||
[transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
|
||||
)
|
||||
|
||||
batch_size = 4
|
||||
|
||||
trainset: torchvision.datasets.CIFAR10
|
||||
testset: torchvision.datasets.CIFAR10
|
||||
|
||||
trainloader: torch.utils.data.DataLoader
|
||||
testloader: torch.utils.data.DataLoader
|
||||
|
||||
classes = (
|
||||
"plane",
|
||||
"car",
|
||||
"bird",
|
||||
"cat",
|
||||
"deer",
|
||||
"dog",
|
||||
"frog",
|
||||
"horse",
|
||||
"ship",
|
||||
"truck",
|
||||
)
|
||||
|
||||
|
||||
def load_data():
|
||||
global trainset, trainloader, testset, testloader
|
||||
trainset = torchvision.datasets.CIFAR10(
|
||||
root="./data", train=True, download=True, transform=transform
|
||||
)
|
||||
trainloader = torch.utils.data.DataLoader(
|
||||
trainset, batch_size=batch_size, shuffle=True, num_workers=2
|
||||
)
|
||||
|
||||
testset = torchvision.datasets.CIFAR10(
|
||||
root="./data", train=False, download=True, transform=transform
|
||||
)
|
||||
testloader = torch.utils.data.DataLoader(
|
||||
testset, batch_size=batch_size, shuffle=False, num_workers=2
|
||||
)
|
||||
|
||||
|
||||
def imshow(img):
|
||||
if img.is_cuda:
|
||||
img = img.cpu()
|
||||
img = img / 2 + 0.5
|
||||
npimg = img.numpy()
|
||||
plt.imshow(np.transpose(npimg, (1, 2, 0)))
|
||||
plt.show()
|
||||
|
||||
|
||||
def main(half_precision: bool = False):
|
||||
global device
|
||||
device = torch.device("cuda:0")
|
||||
|
||||
load_data()
|
||||
|
||||
net = AlexNet(num_classes=CIFAR10_NUM_CLASSES)
|
||||
if half_precision:
|
||||
net = net.half()
|
||||
net = net.to(device)
|
||||
|
||||
scaler = torch.cuda.amp.grad_scaler.GradScaler()
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
|
||||
|
||||
print("Training")
|
||||
|
||||
for epoch in range(2):
|
||||
running_loss = 0.0
|
||||
for i, data in tqdm(
|
||||
enumerate(trainloader, 0),
|
||||
desc=f"Epoch {epoch+1}",
|
||||
total=len(trainloader),
|
||||
unit="batch",
|
||||
):
|
||||
inputs, labels = data
|
||||
if half_precision:
|
||||
inputs = inputs.half()
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
outputs = net(inputs).to(device)
|
||||
loss = criterion(outputs, labels)
|
||||
if half_precision:
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
else:
|
||||
scaler.scale(loss).backward() # type: ignore
|
||||
scaler.step(optimizer)
|
||||
scaler.update()
|
||||
|
||||
running_loss += loss.item()
|
||||
if i % 2000 == 1999:
|
||||
tqdm.write(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}")
|
||||
running_loss = 0.0
|
||||
|
||||
print("Finished Training")
|
||||
|
||||
torch.save(net.state_dict(), NET_SAVE_PATH)
|
||||
|
||||
dataiter = iter(testloader)
|
||||
images, labels = next(dataiter)
|
||||
if half_precision:
|
||||
images = images.half()
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
|
||||
print("GroundTruth: ", " ".join(f"{classes[labels[j]]:5s}" for j in range(4)))
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
print("Predicted: ", " ".join(f"{classes[predicted[j]]:5s}" for j in range(4)))
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for data in tqdm(
|
||||
testloader,
|
||||
desc="Measuring random guess accuracy",
|
||||
unit="batch",
|
||||
total=len(testloader),
|
||||
):
|
||||
images, labels = data
|
||||
if half_precision:
|
||||
images = images.half()
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
|
||||
outputs = net(images)
|
||||
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
print(
|
||||
f"Accuracy of the network on the 10000 test images: {100 * correct // total} %"
|
||||
)
|
||||
|
||||
correct_pred = {classname: 0 for classname in classes}
|
||||
total_pred = {classname: 0 for classname in classes}
|
||||
|
||||
with torch.no_grad():
|
||||
for data in tqdm(
|
||||
testloader,
|
||||
desc="Measuring class accuracy",
|
||||
unit="batch",
|
||||
total=len(testloader),
|
||||
):
|
||||
images, labels = data
|
||||
if half_precision:
|
||||
images = images.half()
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
outputs = net(images)
|
||||
_, predictions = torch.max(outputs, 1)
|
||||
|
||||
for label, prediction in zip(labels, predictions):
|
||||
if label == prediction:
|
||||
correct_pred[classes[label]] += 1
|
||||
total_pred[classes[label]] += 1
|
||||
|
||||
for classname, correct_count in correct_pred.items():
|
||||
accuracy = 100 * float(correct_count) / total_pred[classname]
|
||||
print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# use argparse to add 'half' argument for training on half precision
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--half", action="store_true", help="use half precision")
|
||||
args = parser.parse_args()
|
||||
if args.half:
|
||||
print("Using half precision")
|
||||
NET_SAVE_PATH = "./cifar10_alexnet_half.pth"
|
||||
# now we can use args.half to check if we want to use half precision
|
||||
main(half_precision=args.half)
|
||||
Loading…
Add table
Add a link
Reference in a new issue