Lab 1 CIFAR10 initial done

2023-03-01 16:06:46 +07:00 · 2023-03-01 16:06:46 +07:00 · c75e27a36e
commit c75e27a36e
parent 671e8d40e6
6 changed files with 414 additions and 0 deletions
--- a/lab1-pytorch-cifar10/train.py
+++ b/lab1-pytorch-cifar10/train.py
@ -0,0 +1,197 @@
+import torch
+import torch.cuda
+import torch.cuda.amp
+import torch.utils.data
+import torch.nn as nn
+import torchvision
+import torchvision.transforms as transforms
+import torch.optim as optim
+import matplotlib.pyplot as plt
+import numpy as np
+from tqdm import tqdm
+from alexnet import AlexNet, CIFAR10_NUM_CLASSES
+import argparse
+
+if not torch.cuda.is_available():
+    raise RuntimeError("CUDA is not available")
+
+
+NET_SAVE_PATH = "./cifar10_alexnet.pth"
+
+device: torch.device
+
+transform = transforms.Compose(
+    [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+)
+
+batch_size = 4
+
+trainset: torchvision.datasets.CIFAR10
+testset: torchvision.datasets.CIFAR10
+
+trainloader: torch.utils.data.DataLoader
+testloader: torch.utils.data.DataLoader
+
+classes = (
+    "plane",
+    "car",
+    "bird",
+    "cat",
+    "deer",
+    "dog",
+    "frog",
+    "horse",
+    "ship",
+    "truck",
+)
+
+
+def load_data():
+    global trainset, trainloader, testset, testloader
+    trainset = torchvision.datasets.CIFAR10(
+        root="./data", train=True, download=True, transform=transform
+    )
+    trainloader = torch.utils.data.DataLoader(
+        trainset, batch_size=batch_size, shuffle=True, num_workers=2
+    )
+
+    testset = torchvision.datasets.CIFAR10(
+        root="./data", train=False, download=True, transform=transform
+    )
+    testloader = torch.utils.data.DataLoader(
+        testset, batch_size=batch_size, shuffle=False, num_workers=2
+    )
+
+
+def imshow(img):
+    if img.is_cuda:
+        img = img.cpu()
+    img = img / 2 + 0.5
+    npimg = img.numpy()
+    plt.imshow(np.transpose(npimg, (1, 2, 0)))
+    plt.show()
+
+
+def main(half_precision: bool = False):
+    global device
+    device = torch.device("cuda:0")
+
+    load_data()
+
+    net = AlexNet(num_classes=CIFAR10_NUM_CLASSES)
+    if half_precision:
+        net = net.half()
+    net = net.to(device)
+
+    scaler = torch.cuda.amp.grad_scaler.GradScaler()
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
+
+    print("Training")
+
+    for epoch in range(2):
+        running_loss = 0.0
+        for i, data in tqdm(
+            enumerate(trainloader, 0),
+            desc=f"Epoch {epoch+1}",
+            total=len(trainloader),
+            unit="batch",
+        ):
+            inputs, labels = data
+            if half_precision:
+                inputs = inputs.half()
+            inputs, labels = inputs.to(device), labels.to(device)
+
+            optimizer.zero_grad()
+
+            outputs = net(inputs).to(device)
+            loss = criterion(outputs, labels)
+            if half_precision:
+                loss.backward()
+                optimizer.step()
+            else:
+                scaler.scale(loss).backward()  # type: ignore
+                scaler.step(optimizer)
+                scaler.update()
+
+            running_loss += loss.item()
+            if i % 2000 == 1999:
+                tqdm.write(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}")
+                running_loss = 0.0
+
+    print("Finished Training")
+
+    torch.save(net.state_dict(), NET_SAVE_PATH)
+
+    dataiter = iter(testloader)
+    images, labels = next(dataiter)
+    if half_precision:
+        images = images.half()
+    images, labels = images.to(device), labels.to(device)
+
+    print("GroundTruth: ", " ".join(f"{classes[labels[j]]:5s}" for j in range(4)))
+    outputs = net(images)
+    _, predicted = torch.max(outputs, 1)
+    print("Predicted: ", " ".join(f"{classes[predicted[j]]:5s}" for j in range(4)))
+
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for data in tqdm(
+            testloader,
+            desc="Measuring random guess accuracy",
+            unit="batch",
+            total=len(testloader),
+        ):
+            images, labels = data
+            if half_precision:
+                images = images.half()
+            images, labels = images.to(device), labels.to(device)
+
+            outputs = net(images)
+
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+    print(
+        f"Accuracy of the network on the 10000 test images: {100 * correct // total} %"
+    )
+
+    correct_pred = {classname: 0 for classname in classes}
+    total_pred = {classname: 0 for classname in classes}
+
+    with torch.no_grad():
+        for data in tqdm(
+            testloader,
+            desc="Measuring class accuracy",
+            unit="batch",
+            total=len(testloader),
+        ):
+            images, labels = data
+            if half_precision:
+                images = images.half()
+            images, labels = images.to(device), labels.to(device)
+            outputs = net(images)
+            _, predictions = torch.max(outputs, 1)
+
+            for label, prediction in zip(labels, predictions):
+                if label == prediction:
+                    correct_pred[classes[label]] += 1
+                total_pred[classes[label]] += 1
+
+    for classname, correct_count in correct_pred.items():
+        accuracy = 100 * float(correct_count) / total_pred[classname]
+        print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %")
+
+
+if __name__ == "__main__":
+    # use argparse to add 'half' argument for training on half precision
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--half", action="store_true", help="use half precision")
+    args = parser.parse_args()
+    if args.half:
+        print("Using half precision")
+        NET_SAVE_PATH = "./cifar10_alexnet_half.pth"
+    # now we can use args.half to check if we want to use half precision
+    main(half_precision=args.half)