diff --git a/lab1-pytorch-cifar10/.gitignore b/lab1-pytorch-cifar10/.gitignore new file mode 100644 index 0000000..97bc54e --- /dev/null +++ b/lab1-pytorch-cifar10/.gitignore @@ -0,0 +1,2 @@ +/data +*.pth \ No newline at end of file diff --git a/lab1-pytorch-cifar10/README.md b/lab1-pytorch-cifar10/README.md new file mode 100644 index 0000000..0e5ff18 --- /dev/null +++ b/lab1-pytorch-cifar10/README.md @@ -0,0 +1,10 @@ +### "Тензорные ядра CUDA для глубокого обучения" + +1. С помощью фреймворка PyTorch реализовать архитектуру AlexNet для распознавания изображений из набора данных CIFAR10. +2. Обучить модель с помощью GPU на тренировочном наборе данных в двух режимах — без использования тензорных CUDA-ядер (FP32) и с их использованием (FP16). +3. Эксперимент 1: сравнить время обучения модели в разных режимах. +4. Эксперимент 2: сравнить точности распознавания изображений из тестового набора данных при разных режимах обучения модели. +5. Эксперимент 3: сравнить реальные вычислительные сложности пакетного умножения матриц (Batch Matrix Multiplication) на GPU а режимах FP32 и FP16; построить графики сложностей. +6. Объяснить результаты вычислительных экспериментов. + +Отчет должен содержать: исходный код, результаты экспериментов, обсуждение полученных результатов. \ No newline at end of file diff --git a/lab1-pytorch-cifar10/alexnet.py b/lab1-pytorch-cifar10/alexnet.py new file mode 100644 index 0000000..ef3d5e5 --- /dev/null +++ b/lab1-pytorch-cifar10/alexnet.py @@ -0,0 +1,39 @@ +import torch.nn as nn + + +CIFAR10_NUM_CLASSES = 10 + + +class AlexNet(nn.Module): + def __init__(self, /, num_classes: int): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(64, 192, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2), + ) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 2 * 2, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0), 256 * 2 * 2) + x = self.classifier(x) + return x diff --git a/lab1-pytorch-cifar10/requirements.txt b/lab1-pytorch-cifar10/requirements.txt new file mode 100644 index 0000000..86d9c11 --- /dev/null +++ b/lab1-pytorch-cifar10/requirements.txt @@ -0,0 +1,31 @@ +black==23.1.0 +certifi==2022.12.7 +charset-normalizer==3.0.1 +click==8.1.3 +colorama==0.4.6 +contourpy==1.0.7 +cycler==0.11.0 +fonttools==4.38.0 +idna==3.4 +importlib-resources==5.12.0 +kiwisolver==1.4.4 +matplotlib==3.7.0 +mypy-extensions==1.0.0 +numpy==1.24.2 +packaging==23.0 +pandas==1.5.3 +pathspec==0.11.0 +Pillow==9.4.0 +platformdirs==3.0.0 +pyparsing==3.0.9 +python-dateutil==2.8.2 +pytz==2022.7.1 +requests==2.28.2 +six==1.16.0 +tomli==2.0.1 +torch==1.13.1+cu117 +torchvision==0.14.1+cu117 +tqdm==4.64.1 +typing_extensions==4.5.0 +urllib3==1.26.14 +zipp==3.15.0 diff --git a/lab1-pytorch-cifar10/test.py b/lab1-pytorch-cifar10/test.py new file mode 100644 index 0000000..4b7ce0a --- /dev/null +++ b/lab1-pytorch-cifar10/test.py @@ -0,0 +1,135 @@ +import torch +import torch.utils.data +import torch.nn as nn +import torchvision +import torchvision.transforms as transforms +import torch.optim as optim +import matplotlib.pyplot as plt +import numpy as np +from tqdm import tqdm +from alexnet import AlexNet, CIFAR10_NUM_CLASSES + +if not torch.cuda.is_available(): + raise RuntimeError("CUDA is not available") + + +NET_SAVE_PATH = "./cifar10_alexnet.pth" + +device: torch.device + +transform = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] +) + +batch_size = 4 + +trainset: torchvision.datasets.CIFAR10 +testset: torchvision.datasets.CIFAR10 + +trainloader: torch.utils.data.DataLoader +testloader: torch.utils.data.DataLoader + +classes = ( + "plane", + "car", + "bird", + "cat", + "deer", + "dog", + "frog", + "horse", + "ship", + "truck", +) + + +def load_data(): + global trainset, trainloader, testset, testloader + trainset = torchvision.datasets.CIFAR10( + root="./data", train=True, download=True, transform=transform + ) + trainloader = torch.utils.data.DataLoader( + trainset, batch_size=batch_size, shuffle=True, num_workers=2 + ) + + testset = torchvision.datasets.CIFAR10( + root="./data", train=False, download=True, transform=transform + ) + testloader = torch.utils.data.DataLoader( + testset, batch_size=batch_size, shuffle=False, num_workers=2 + ) + + +def imshow(img): + if img.is_cuda: + img = img.cpu() + img = img / 2 + 0.5 + npimg = img.numpy() + plt.imshow(np.transpose(npimg, (1, 2, 0))) + plt.show() + + +def main(): + global device + device = torch.device("cuda:0") + print("Available device:", device) + + load_data() + + net = AlexNet(CIFAR10_NUM_CLASSES).to(device) + + net.load_state_dict(torch.load(NET_SAVE_PATH)) + + dataiter = iter(testloader) + images, labels = next(dataiter) + images, labels = images.to(device), labels.to(device) + + imshow(torchvision.utils.make_grid(images)) + print("GroundTruth: ", " ".join(f"{classes[labels[j]]:5s}" for j in range(4))) + + outputs = net(images) + + _, predicted = torch.max(outputs, 1) + + print("Predicted: ", " ".join(f"{classes[predicted[j]]:5s}" for j in range(4))) + + correct = 0 + total = 0 + + with torch.no_grad(): + for data in testloader: + images, labels = data + images, labels = images.to(device), labels.to(device) + + outputs = net(images) + + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print( + f"Accuracy of the network on the 10000 test images: {100 * correct // total} %" + ) + + correct_pred = {classname: 0 for classname in classes} + total_pred = {classname: 0 for classname in classes} + + with torch.no_grad(): + for data in testloader: + images, labels = data + images, labels = images.to(device), labels.to(device) + outputs = net(images) + _, predictions = torch.max(outputs, 1) + + for label, prediction in zip(labels, predictions): + if label == prediction: + correct_pred[classes[label]] += 1 + total_pred[classes[label]] += 1 + + for classname, correct_count in correct_pred.items(): + accuracy = 100 * float(correct_count) / total_pred[classname] + print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %") + + +if __name__ == "__main__": + main() diff --git a/lab1-pytorch-cifar10/train.py b/lab1-pytorch-cifar10/train.py new file mode 100644 index 0000000..53c9e57 --- /dev/null +++ b/lab1-pytorch-cifar10/train.py @@ -0,0 +1,197 @@ +import torch +import torch.cuda +import torch.cuda.amp +import torch.utils.data +import torch.nn as nn +import torchvision +import torchvision.transforms as transforms +import torch.optim as optim +import matplotlib.pyplot as plt +import numpy as np +from tqdm import tqdm +from alexnet import AlexNet, CIFAR10_NUM_CLASSES +import argparse + +if not torch.cuda.is_available(): + raise RuntimeError("CUDA is not available") + + +NET_SAVE_PATH = "./cifar10_alexnet.pth" + +device: torch.device + +transform = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] +) + +batch_size = 4 + +trainset: torchvision.datasets.CIFAR10 +testset: torchvision.datasets.CIFAR10 + +trainloader: torch.utils.data.DataLoader +testloader: torch.utils.data.DataLoader + +classes = ( + "plane", + "car", + "bird", + "cat", + "deer", + "dog", + "frog", + "horse", + "ship", + "truck", +) + + +def load_data(): + global trainset, trainloader, testset, testloader + trainset = torchvision.datasets.CIFAR10( + root="./data", train=True, download=True, transform=transform + ) + trainloader = torch.utils.data.DataLoader( + trainset, batch_size=batch_size, shuffle=True, num_workers=2 + ) + + testset = torchvision.datasets.CIFAR10( + root="./data", train=False, download=True, transform=transform + ) + testloader = torch.utils.data.DataLoader( + testset, batch_size=batch_size, shuffle=False, num_workers=2 + ) + + +def imshow(img): + if img.is_cuda: + img = img.cpu() + img = img / 2 + 0.5 + npimg = img.numpy() + plt.imshow(np.transpose(npimg, (1, 2, 0))) + plt.show() + + +def main(half_precision: bool = False): + global device + device = torch.device("cuda:0") + + load_data() + + net = AlexNet(num_classes=CIFAR10_NUM_CLASSES) + if half_precision: + net = net.half() + net = net.to(device) + + scaler = torch.cuda.amp.grad_scaler.GradScaler() + criterion = nn.CrossEntropyLoss() + optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) + + print("Training") + + for epoch in range(2): + running_loss = 0.0 + for i, data in tqdm( + enumerate(trainloader, 0), + desc=f"Epoch {epoch+1}", + total=len(trainloader), + unit="batch", + ): + inputs, labels = data + if half_precision: + inputs = inputs.half() + inputs, labels = inputs.to(device), labels.to(device) + + optimizer.zero_grad() + + outputs = net(inputs).to(device) + loss = criterion(outputs, labels) + if half_precision: + loss.backward() + optimizer.step() + else: + scaler.scale(loss).backward() # type: ignore + scaler.step(optimizer) + scaler.update() + + running_loss += loss.item() + if i % 2000 == 1999: + tqdm.write(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}") + running_loss = 0.0 + + print("Finished Training") + + torch.save(net.state_dict(), NET_SAVE_PATH) + + dataiter = iter(testloader) + images, labels = next(dataiter) + if half_precision: + images = images.half() + images, labels = images.to(device), labels.to(device) + + print("GroundTruth: ", " ".join(f"{classes[labels[j]]:5s}" for j in range(4))) + outputs = net(images) + _, predicted = torch.max(outputs, 1) + print("Predicted: ", " ".join(f"{classes[predicted[j]]:5s}" for j in range(4))) + + correct = 0 + total = 0 + with torch.no_grad(): + for data in tqdm( + testloader, + desc="Measuring random guess accuracy", + unit="batch", + total=len(testloader), + ): + images, labels = data + if half_precision: + images = images.half() + images, labels = images.to(device), labels.to(device) + + outputs = net(images) + + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + print( + f"Accuracy of the network on the 10000 test images: {100 * correct // total} %" + ) + + correct_pred = {classname: 0 for classname in classes} + total_pred = {classname: 0 for classname in classes} + + with torch.no_grad(): + for data in tqdm( + testloader, + desc="Measuring class accuracy", + unit="batch", + total=len(testloader), + ): + images, labels = data + if half_precision: + images = images.half() + images, labels = images.to(device), labels.to(device) + outputs = net(images) + _, predictions = torch.max(outputs, 1) + + for label, prediction in zip(labels, predictions): + if label == prediction: + correct_pred[classes[label]] += 1 + total_pred[classes[label]] += 1 + + for classname, correct_count in correct_pred.items(): + accuracy = 100 * float(correct_count) / total_pred[classname] + print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %") + + +if __name__ == "__main__": + # use argparse to add 'half' argument for training on half precision + parser = argparse.ArgumentParser() + parser.add_argument("--half", action="store_true", help="use half precision") + args = parser.parse_args() + if args.half: + print("Using half precision") + NET_SAVE_PATH = "./cifar10_alexnet_half.pth" + # now we can use args.half to check if we want to use half precision + main(half_precision=args.half)