https://github.com/mrdbourke/pytorch-deep-learning

PyTorch Fundamentals¶

https://www.learnpytorch.io/00_pytorch_fundamentals/

Imports¶

In [1]:

Copied!

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch

In [2]:

Copied!

torch.__version__
torch.__version__

Out[2]:

'2.6.0+cu124'

Tensors Basics¶

Create¶

In [ ]:

Copied!

# Scalar

scalar = torch.tensor(401)
scalar
# Scalar

scalar = torch.tensor(401)
scalar

Out[ ]:

tensor(401)

In [ ]:

Copied!

scalar.ndim, scalar.dtype
scalar.ndim, scalar.dtype

Out[ ]:

(0, torch.int64)

In [ ]:

Copied!

# Vector

vector = torch.tensor([6, 8])
vector
# Vector

vector = torch.tensor([6, 8])
vector

Out[ ]:

tensor([6, 8])

In [ ]:

Copied!

vector.dtype, vector.shape
vector.dtype, vector.shape

Out[ ]:

(torch.int64, torch.Size([2]))

In [ ]:

Copied!

# Matrix

MAT = torch.tensor([[6, 8], [9, 8]])
MAT
# Matrix

MAT = torch.tensor([[6, 8], [9, 8]])
MAT

Out[ ]:

tensor([[6, 8],
        [9, 8]])

In [ ]:

Copied!

MAT.ndim, MAT.shape
MAT.ndim, MAT.shape

Out[ ]:

(2, torch.Size([2, 2]))

In [ ]:

Copied!





# Tensor

TENSOR = torch.tensor([[[6, 8],
                        [9, 8]],
                       [[6, 8],
                        [9, 8]]])
# Tensor

TENSOR = torch.tensor([[[6, 8],
                        [9, 8]],
                       [[6, 8],
                        [9, 8]]])

In [ ]:

Copied!

TENSOR.ndim, TENSOR.shape
TENSOR.ndim, TENSOR.shape

Out[ ]:

(3, torch.Size([2, 2, 2]))

In [ ]:

Copied!

# Zeros Tensor

zeros = torch.zeros(size=(2, 2))
zeros
# Zeros Tensor

zeros = torch.zeros(size=(2, 2))
zeros

Out[ ]:

tensor([[0., 0.],
        [0., 0.]])

In [ ]:

Copied!

# Ones Tensor

ones = torch.ones(size=(2, 2))
ones
# Ones Tensor

ones = torch.ones(size=(2, 2))
ones

Out[ ]:

tensor([[1., 1.],
        [1., 1.]])

Random Tensors¶

Random tensors are important because the way many neural network learn is that they start with tensors full of random numbers and then adjust that random numbers to better represent the data.

In [ ]:

Copied!

rand_tensor = torch.rand(size=(2, 4, 8), dtype=torch.float64)
rand_tensor
rand_tensor = torch.rand(size=(2, 4, 8), dtype=torch.float64)
rand_tensor

Out[ ]:

tensor([[[0.6275, 0.7244, 0.7825, 0.5644, 0.5718, 0.0054, 0.2976, 0.0068],
         [0.5648, 0.8897, 0.7519, 0.0515, 0.2864, 0.0137, 0.5526, 0.9334],
         [0.9502, 0.1364, 0.2715, 0.8319, 0.1277, 0.2627, 0.1642, 0.3742],
         [0.4545, 0.5249, 0.1865, 0.4319, 0.2269, 0.4022, 0.8512, 0.0965]],

        [[0.1592, 0.5930, 0.4154, 0.7496, 0.7190, 0.6702, 0.3055, 0.2441],
         [0.7006, 0.0468, 0.5782, 0.0172, 0.3908, 0.7947, 0.2125, 0.2507],
         [0.8254, 0.7010, 0.7293, 0.3864, 0.6410, 0.2798, 0.7485, 0.2312],
         [0.3683, 0.4007, 0.6758, 0.5511, 0.1513, 0.8037, 0.1573, 0.9407]]],
       dtype=torch.float64)

Range & Like¶

In [ ]:

Copied!

range = torch.arange(start=0, end=11, step=2)
range
range = torch.arange(start=0, end=11, step=2)
range

Out[ ]:

tensor([ 0,  2,  4,  6,  8, 10])

In [ ]:

Copied!

zeros_like_range = torch.zeros_like(range)
zeros_like_range
zeros_like_range = torch.zeros_like(range)
zeros_like_range

Out[ ]:

tensor([0, 0, 0, 0, 0, 0])

Tensor Parameters¶

Tensor data types is one of 3 big errors with PyTorch and Deep Learning:

1. Tensor not right datatype
2. Tensor not right shape
3. Tensor not on the right device

In [ ]:

Copied!





# Default datatype for tensors is float32
float_64_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float64, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded

float_64_tensor.shape, float_64_tensor.dtype, float_64_tensor.device
# Default datatype for tensors is float32
float_64_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=torch.float64, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded

float_64_tensor.shape, float_64_tensor.dtype, float_64_tensor.device

Out[ ]:

(torch.Size([3]), torch.float64, device(type='cpu'))

Getting Informations From Tensors¶

In [ ]:

Copied!





tensor = torch.rand(size=(2, 3, 1),
                    dtype=torch.float64,
                    device=None
                    )
tensor
tensor = torch.rand(size=(2, 3, 1),
                    dtype=torch.float64,
                    device=None
                    )
tensor

Out[ ]:

tensor([[[0.5410],
         [0.9445],
         [0.6152]],

        [[0.1851],
         [0.1831],
         [0.3596]]], dtype=torch.float64)

In [ ]:

Copied!





# Tensor Attributes
print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}") # will default to CPU
# Tensor Attributes
print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}") # will default to CPU

Shape of tensor: torch.Size([2, 3, 1])
Datatype of tensor: torch.float64
Device tensor is stored on: cpu

Manipulating tensors (tensor operations)¶

Addition
Substraction
Multiplication (element-wise)
Division
Matrix multiplication (dot product)

In [ ]:

Copied!

tensor_a = torch.rand(size=(1, 3))
tensor_b = torch.rand(size=(1, 3))

tensor_c = torch.rand(size=(3, 1))
tensor_a = torch.rand(size=(1, 3))
tensor_b = torch.rand(size=(1, 3))

tensor_c = torch.rand(size=(3, 1))

In [ ]:

Copied!

# Addition & Subtraction
tensor_a + 100, torch.subtract(tensor_a, 10)
# Addition & Subtraction
tensor_a + 100, torch.subtract(tensor_a, 10)

Out[ ]:

(tensor([[100.6755, 100.7467, 100.3929]]),
 tensor([[-9.3245, -9.2533, -9.6071]]))

In [ ]:

Copied!

# Multiplication (element-wise)
tensor_a * 10, torch.mul(tensor_a, 10)
# Multiplication (element-wise)
tensor_a * 10, torch.mul(tensor_a, 10)

Out[ ]:

(tensor([[6.7548, 7.4674, 3.9295]]), tensor([[6.7548, 7.4674, 3.9295]]))

In [ ]:

Copied!

# Division (element-wise)
tensor_a / 10, tensor_a / tensor_b
# Division (element-wise)
tensor_a / 10, tensor_a / tensor_b

Out[ ]:

(tensor([[0.0675, 0.0747, 0.0393]]), tensor([[5.8973, 1.7381, 0.5845]]))

In [ ]:

Copied!

# Multiplication (Matrix)
torch.matmul(tensor_a, tensor_c), torch.matmul(tensor_c, tensor_a)
# Multiplication (Matrix)
torch.matmul(tensor_a, tensor_c), torch.matmul(tensor_c, tensor_a)

Out[ ]:

(tensor([[0.6870]]),
 tensor([[0.4412, 0.4877, 0.2567],
         [0.0347, 0.0383, 0.0202],
         [0.3566, 0.3943, 0.2075]]))

In [ ]:

Copied!

# Multiplication (Matrix)
tensor_a @ tensor_c, tensor_c @ tensor_a
# Multiplication (Matrix)
tensor_a @ tensor_c, tensor_c @ tensor_a

Out[ ]:

(tensor([[0.6870]]),
 tensor([[0.4412, 0.4877, 0.2567],
         [0.0347, 0.0383, 0.0202],
         [0.3566, 0.3943, 0.2075]]))

In [ ]:

Copied!

# Division (Matrix)
tensor_a * (1 / tensor_c), tensor_a / tensor_c
# Division (Matrix)
tensor_a * (1 / tensor_c), tensor_a / tensor_c

Out[ ]:

(tensor([[ 1.0342,  1.1433,  0.6016],
         [13.1626, 14.5512,  7.6571],
         [ 1.2794,  1.4144,  0.7443]]),
 tensor([[ 1.0342,  1.1433,  0.6016],
         [13.1626, 14.5512,  7.6571],
         [ 1.2794,  1.4144,  0.7443]]))

Matrix Multiplication¶

Note: A matrix multiplication like below is also referred to as the dot product of two matrices.

Because much of deep learning is multiplying and performing operations on matrices and matrices have a strict rule about what shapes and sizes can be combined, one of the most common errors you'll run into in deep learning is shape mismatches.

In [ ]:

Copied!

tensor_a = torch.rand(size=(3, 4))
tensor_b = torch.rand(size=(4, 2))
tensor_a = torch.rand(size=(3, 4))
tensor_b = torch.rand(size=(4, 2))

No description has been provided for this image

torch.matmul vs @ time¶

In [ ]:

Copied!

%%time
torch.matmul(tensor_a, tensor_b)
%%time
torch.matmul(tensor_a, tensor_b)

CPU times: user 301 µs, sys: 0 ns, total: 301 µs
Wall time: 234 µs

Out[ ]:

tensor([[0.6726, 1.1521],
        [0.7624, 1.3569],
        [0.3680, 0.7537]])

In [ ]:

Copied!

%%time
tensor_a @ tensor_b
%%time
tensor_a @ tensor_b

CPU times: user 97 µs, sys: 4 µs, total: 101 µs
Wall time: 106 µs

Out[ ]:

tensor([[0.6726, 1.1521],
        [0.7624, 1.3569],
        [0.3680, 0.7537]])

Rules¶

We can make matrix multiplication work between tensor_A and tensor_B by making their inner dimensions match. (3, 4) @ (4, 2)

One of the ways to do this is with a transpose (switch the dimensions of a given tensor).

You can perform transposes in PyTorch using either:

torch.transpose(input, dim0, dim1) - where input is the desired tensor to transpose and dim0 and dim1 are the dimensions to be swapped.
tensor.T - where tensor is the desired tensor to transpose.

The resulting matrix has the shape of outer dimensions -> (3, 4) @ (4, 2) = (3, 2)

In [ ]:

Copied!

torch.mm(tensor_a, tensor_b.T.T) # matmul
torch.mm(tensor_a, tensor_b.T.T) # matmul

Out[ ]:

tensor([[0.6726, 1.1521],
        [0.7624, 1.3569],
        [0.3680, 0.7537]])

Finding the min, max, mean, sum, etc (aggregation)¶

In [ ]:

Copied!

rng = torch.arange(0, 100)
rng = torch.arange(0, 100)

In [ ]:

Copied!

torch.min(rng), torch.max(rng), rng.max()
torch.min(rng), torch.max(rng), rng.max()

Out[ ]:

(tensor(0), tensor(99), tensor(99))

In [ ]:

Copied!

# mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long
torch.mean(rng.type(torch.float32)), rng.type(torch.float32).mean()
# mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long
torch.mean(rng.type(torch.float32)), rng.type(torch.float32).mean()

Out[ ]:

(tensor(49.5000), tensor(49.5000))

In [ ]:

Copied!

torch.mean(rng, dtype=torch.float32)
torch.mean(rng, dtype=torch.float32)

Out[ ]:

tensor(49.5000)

In [ ]:

Copied!

torch.median(rng)
torch.median(rng)

Out[ ]:

tensor(49)

In [ ]:

Copied!

torch.sum(rng), rng.sum()
torch.sum(rng), rng.sum()

Out[ ]:

(tensor(4950), tensor(4950))

Positional min/max¶

In [ ]:

Copied!

rng.argmin(), rng.argmax() # index
rng.argmin(), rng.argmax() # index

Out[ ]:

(tensor(0), tensor(99))

Reshaping, Stacking, Squeezing, Unsqueezing and Permute Tensors¶

Method	One-line description
`torch.reshape(input, shape)`	Reshapes `input` to `shape` (if compatible), can also use `torch.Tensor.reshape()`.
`Tensor.view(shape)`	Returns a view of the original tensor in a different `shape` but shares the same data as the original tensor.
`torch.stack(tensors, dim=0)`	Concatenates a sequence of `tensors` along a new dimension (`dim`), all `tensors` must be same size.
`torch.squeeze(input)`	Squeezes `input` to remove all the dimenions with value `1`.
`torch.unsqueeze(input, dim)`	Returns `input` with a dimension value of `1` added at `dim`.
`torch.permute(input, dims)`	Returns a view of the original `input` with its dimensions permuted (rearranged) to `dims`.

reshape & view¶

Multiplication of the new size should be as same as multiplication of the original size.

In [ ]:

Copied!

t = torch.tensor([[1, 2, 3],
                  [4, 5, 6]], dtype=torch.float32)

t.shape
t = torch.tensor([[1, 2, 3],
                  [4, 5, 6]], dtype=torch.float32)

t.shape

Out[ ]:

torch.Size([2, 3])

In [ ]:

Copied!

t.reshape(3, 2)
t.reshape(3, 2)

Out[ ]:

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

In [ ]:

Copied!

t.view(6, 1)
t.view(6, 1)

Out[ ]:

tensor([[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.]])

stack, vstack and hstack¶

torch.stack: It requires all input tensors to have the same size and shape

torch.stack It requires all input tensors to have the same size and shape

Purpose: Adds a new dimension when combining tensors.
Use case: When you want to group tensors along a new axis.
Shape change: Input tensors of shape (N,) → result shape (2, N) if dim=0, or (N, 2) if dim=1.

torch.hstack (horizontal stack)

Purpose: Concatenates tensors horizontally (along the last dimension).
Use case: Like stacking columns side by side.
Shape change: Input tensors must be at least 1D. For 1D → result shape (N + M,). For 2D → concatenate along dim=1.

torch.vstack (vertical stack)

Purpose: Concatenates tensors vertically (along the first dimension).
Use case: Like stacking rows on top of each other.
Shape change: For 1D → tensors are reshaped to (1, N) before stacking. For 2D → concat along dim=0.

In [ ]:

Copied!

tensor = torch.arange(0.0, 10.0, 2)
tensor
tensor = torch.arange(0.0, 10.0, 2)
tensor

Out[ ]:

tensor([0., 2., 4., 6., 8.])

In [ ]:

Copied!

tensor_h = torch.arange(0.0, 5.0, 2)
tensor_h
tensor_h = torch.arange(0.0, 5.0, 2)
tensor_h

Out[ ]:

tensor([0., 2., 4.])

In [ ]:

Copied!

tensor_v = torch.vstack((tensor, tensor))
tensor_v
tensor_v = torch.vstack((tensor, tensor))
tensor_v

Out[ ]:

tensor([[0., 2., 4., 6., 8.],
        [0., 2., 4., 6., 8.]])

In [ ]:

Copied!





stacked_tensor = torch.stack((tensor, tensor), dim=0)
print("\nStacked tensor (dim=0):")
print(stacked_tensor, stacked_tensor.shape)
print("-" * 50)
torch.vstack((tensor, tensor))
stacked_tensor = torch.stack((tensor, tensor), dim=0)
print("\nStacked tensor (dim=0):")
print(stacked_tensor, stacked_tensor.shape)
print("-" * 50)
torch.vstack((tensor, tensor))

Stacked tensor (dim=0):
tensor([[0., 0.],
        [2., 2.],
        [4., 4.],
        [6., 6.],
        [8., 8.]]) torch.Size([5, 2])
--------------------------------------------------

Out[ ]:

tensor([[0., 2., 4., 6., 8.],
        [0., 2., 4., 6., 8.]])

In [ ]:

Copied!





stacked_tensor_dim1 = torch.stack((tensor, tensor), dim=1) # Horizontal
print("\nStacked tensor (dim=1):")
print(stacked_tensor_dim1, stacked_tensor_dim1.shape)
print("-" * 50)
torch.hstack((tensor, tensor))
stacked_tensor_dim1 = torch.stack((tensor, tensor), dim=1) # Horizontal
print("\nStacked tensor (dim=1):")
print(stacked_tensor_dim1, stacked_tensor_dim1.shape)
print("-" * 50)
torch.hstack((tensor, tensor))

Stacked tensor (dim=1):
tensor([[0., 0.],
        [2., 2.],
        [4., 4.],
        [6., 6.],
        [8., 8.]]) torch.Size([5, 2])
--------------------------------------------------

Out[ ]:

tensor([0., 2., 4., 6., 8., 0., 2., 4., 6., 8.])

In [ ]:

Copied!

torch.hstack((tensor, tensor_h))
torch.hstack((tensor, tensor_h))

Out[ ]:

tensor([0., 2., 4., 6., 8., 0., 2., 4.])

In [ ]:

Copied!

torch.vstack((tensor, tensor_v))
torch.vstack((tensor, tensor_v))

Out[ ]:

tensor([[0., 2., 4., 6., 8.],
        [0., 2., 4., 6., 8.],
        [0., 2., 4., 6., 8.]])

squeezing and unsqueezing¶

In [ ]:

Copied!

zeros = torch.zeros(1, 4, 1)
zeros
zeros = torch.zeros(1, 4, 1)
zeros

Out[ ]:

tensor([[[0.],
         [0.],
         [0.],
         [0.]]])

In [ ]:

Copied!

# Remove all single dimensitions
zeros.squeeze(), zeros.squeeze().shape
# Remove all single dimensitions
zeros.squeeze(), zeros.squeeze().shape

Out[ ]:

(tensor([0., 0., 0., 0.]), torch.Size([4]))

In [ ]:

Copied!

zeros.squeeze(2), zeros.squeeze(2).shape
zeros.squeeze(2), zeros.squeeze(2).shape

Out[ ]:

(tensor([[0., 0., 0., 0.]]), torch.Size([1, 4]))

In [ ]:

Copied!

# Add extra dimensions
print(f"Original Shape: {zeros.shape}")
zeros.unsqueeze(2), zeros.unsqueeze(2).shape
# Add extra dimensions
print(f"Original Shape: {zeros.shape}")
zeros.unsqueeze(2), zeros.unsqueeze(2).shape

Original Shape: torch.Size([1, 4, 1])

Out[ ]:

(tensor([[[[0.]],
 
          [[0.]],
 
          [[0.]],
 
          [[0.]]]]),
 torch.Size([1, 4, 1, 1]))

In [ ]:

Copied!

print(f"Original Shape: {zeros.shape}")
zeros.unsqueeze(1), zeros.unsqueeze(1).shape
print(f"Original Shape: {zeros.shape}")
zeros.unsqueeze(1), zeros.unsqueeze(1).shape

Original Shape: torch.Size([1, 4, 1])

Out[ ]:

(tensor([[[[0.],
           [0.],
           [0.],
           [0.]]]]),
 torch.Size([1, 1, 4, 1]))

permute¶

torch.permute() is a function used to rearrange the dimensions of a tensor. It returns a view of the original tensor with its dimensions reordered according to a specified permutation. This means it does not create a new copy of the data in memory but rather changes how the existing data is interpreted.

In [30]:

Copied!

tensor = torch.rand(size=(2, 2, 3), dtype=torch.float) # h, w, color channels

tensor, tensor.shape
tensor = torch.rand(size=(2, 2, 3), dtype=torch.float) # h, w, color channels

tensor, tensor.shape

Out[30]:

(tensor([[[0.6080, 0.9288, 0.8971],
          [0.1286, 0.2127, 0.8230]],
 
         [[0.0525, 0.1619, 0.5537],
          [0.8337, 0.0971, 0.3341]]]),
 torch.Size([2, 2, 3]))

In [31]:

Copied!

torch.permute(tensor, (2, 0, 1)), torch.permute(tensor, (2, 0, 1)).shape
torch.permute(tensor, (2, 0, 1)), torch.permute(tensor, (2, 0, 1)).shape

Out[31]:

(tensor([[[0.6080, 0.1286],
          [0.0525, 0.8337]],
 
         [[0.9288, 0.2127],
          [0.1619, 0.0971]],
 
         [[0.8971, 0.8230],
          [0.5537, 0.3341]]]),
 torch.Size([3, 2, 2]))

Indexing¶

In [46]:

Copied!

t = torch.arange(end=12, dtype=torch.float).reshape(1, 4, 3)
t, t.shape
t = torch.arange(end=12, dtype=torch.float).reshape(1, 4, 3)
t, t.shape

Out[46]:

(tensor([[[ 0.,  1.,  2.],
          [ 3.,  4.,  5.],
          [ 6.,  7.,  8.],
          [ 9., 10., 11.]]]),
 torch.Size([1, 4, 3]))

In [53]:

Copied!

t[:, 1:3, 1]
t[:, 1:3, 1]

Out[53]:

tensor([[4., 7.]])

In [47]:

Copied!

t[0, -1, -1]
t[0, -1, -1]

Out[47]:

tensor(11.)

In [57]:

Copied!

t[:, -1, -1]
t[:, -1, -1]

Out[57]:

tensor([11.])

In [56]:

Copied!

t[:, :, -1]
t[:, :, -1]

Out[56]:

tensor([[ 2.,  5.,  8., 11.]])

PyTorch tensors & NumPy¶

Since NumPy is a popular Python numerical computing library, PyTorch has functionality to interact with it nicely.

The two main methods you'll want to use for NumPy to PyTorch (and back again) are:

torch.from_numpy(ndarray) - NumPy array -> PyTorch tensor.
torch.Tensor.numpy() - PyTorch tensor -> NumPy array.

Note: By default, NumPy arrays are created with the datatype float64 and if you convert it to a PyTorch tensor, it'll keep the same datatype (as above).

However, many PyTorch calculations default to using float32.

So if you want to convert your NumPy array (float64) -> PyTorch tensor (float64) -> PyTorch tensor (float32), you can use tensor = torch.from_numpy(array).type(torch.float32).

In [58]:

Copied!





# NumPy array to tensor
array = np.arange(1.0, 5.0)
tensor = torch.from_numpy(array)
array, tensor
# NumPy array to tensor
array = np.arange(1.0, 5.0)
tensor = torch.from_numpy(array)
array, tensor

Out[58]:

(array([1., 2., 3., 4.]), tensor([1., 2., 3., 4.], dtype=torch.float64))

In [59]:

Copied!





# Tensor to NumPy array
tensor = torch.arange(1.0, 5.0)
array = tensor.numpy()
array, tensor
# Tensor to NumPy array
tensor = torch.arange(1.0, 5.0)
array = tensor.numpy()
array, tensor

Out[59]:

(array([1., 2., 3., 4.], dtype=float32), tensor([1., 2., 3., 4.]))

Reproducibility (trying to take the random out of random)¶

The PyTorch reproducibility documentation (a good exercise would be to read through this for 10-minutes and even if you don't understand it now, being aware of it is important).
The Wikipedia random seed page (this'll give a good overview of random seeds and pseudorandomness in general).

In [62]:

Copied!





# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B
# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.4554, 0.1824, 0.1643, 0.7226],
        [0.4712, 0.3353, 0.8879, 0.8378],
        [0.6242, 0.3597, 0.6016, 0.1824]])

Tensor B:
tensor([[0.3954, 0.7728, 0.2363, 0.9636],
        [0.9459, 0.0383, 0.6894, 0.9444],
        [0.6322, 0.4338, 0.9433, 0.7510]])

Does Tensor A equal Tensor B? (anywhere)

Out[62]:

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [69]:

Copied!





# Set the random seed
RANDOM_SEED=42
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 5)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C
torch.random.manual_seed(seed=RANDOM_SEED)
random_tensor_D = torch.rand(3, 5)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D
# Set the random seed
RANDOM_SEED=42
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 5)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C
torch.random.manual_seed(seed=RANDOM_SEED)
random_tensor_D = torch.rand(3, 5)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904],
        [0.6009, 0.2566, 0.7936, 0.9408, 0.1332],
        [0.9346, 0.5936, 0.8694, 0.5677, 0.7411]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593, 0.3904],
        [0.6009, 0.2566, 0.7936, 0.9408, 0.1332],
        [0.9346, 0.5936, 0.8694, 0.5677, 0.7411]])

Does Tensor C equal Tensor D? (anywhere)

Out[69]:

tensor([[True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True]])

Running tensors on GPUs (and making faster computations)¶

Deep learning algorithms require a lot of numerical operations.

And by default these operations are often done on a CPU (computer processing unit).

However, there's another common piece of hardware called a GPU (graphics processing unit), which is often much faster at performing the specific types of operations neural networks need (matrix multiplications) than CPUs.

CUDA is a computing platform and API that helps allow GPUs be used for general purpose computing & not just graphics

To check if you've got access to a Nvidia GPU, you can run !nvidia-smi

In [7]:

Copied!

!nvidia-smi
!nvidia-smi

Mon Aug  4 11:02:33 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                                                         
+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|  No running processes found                                                             |
+-----------------------------------------------------------------------------------------+

Note: In PyTorch, it's best practice to write device agnostic code. This means code that'll run on CPU (always available) or GPU (if available) or Use Apple Silicon GPU(if available)

In [8]:

Copied!





# Set device type
if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available

device
# Set device type
if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available

device

Out[8]:

'cuda'

If you want to do faster computing you can use a GPU but if you want to do much faster computing, you can use multiple GPUs.

In [5]:

Copied!

# Count number of devices
torch.cuda.device_count()
# Count number of devices
torch.cuda.device_count()

Out[5]:

Knowing the number of GPUs PyTorch has access to is helpful incase you wanted to run a specific process on one GPU and another process on another (PyTorch also has features to let you run a process across all GPUs).

Putting tensors (and models) on the GPU¶

You can put tensors (and models, we'll see this later) on a specific device by calling to(device) on them. Where device is the target device you'd like the tensor (or model) to go to.

In [9]:

Copied!





# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu
# Create tensor (default on CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3]) cpu

Out[9]:

tensor([1, 2, 3], device='cuda:0')

In [11]:

Copied!

# Convert tensor that on GPU to NumPy
tensor_on_gpu.cpu().numpy()
# Convert tensor that on GPU to NumPy
tensor_on_gpu.cpu().numpy()

Out[11]:

array([1, 2, 3])

Exercises¶

In [12]:

Copied!

import torch
import numpy as np
import torch
import numpy as np

See the documentation on torch.Tensor and for torch.cuda.

In [19]:

Copied!

# 2. Create a random tensor with shape (7, 7)
rand1 = torch.rand(size=(7, 7))
rand1
# 2. Create a random tensor with shape (7, 7)
rand1 = torch.rand(size=(7, 7))
rand1

Out[19]:

tensor([[0.0371, 0.9263, 0.9660, 0.5299, 0.8111, 0.7744, 0.9784],
        [0.2305, 0.0793, 0.0270, 0.3309, 0.6999, 0.7467, 0.8744],
        [0.9648, 0.8845, 0.5339, 0.8718, 0.1371, 0.8212, 0.6988],
        [0.7388, 0.3217, 0.5072, 0.4767, 0.0500, 0.0213, 0.9643],
        [0.6127, 0.0850, 0.3618, 0.3483, 0.4541, 0.9983, 0.1756],
        [0.5945, 0.6631, 0.4309, 0.9887, 0.8342, 0.9622, 0.1245],
        [0.1351, 0.2120, 0.5343, 0.9736, 0.0149, 0.2368, 0.9623]])

In [25]:

Copied!

# 3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7)
rand2 = torch.rand(size=(1, 7))
torch.matmul(rand1, rand2.T)
# 3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7)
rand2 = torch.rand(size=(1, 7))
torch.matmul(rand1, rand2.T)

Out[25]:

tensor([[1.1939],
        [1.1061],
        [0.9868],
        [1.8999],
        [0.7716],
        [1.3174],
        [1.6353]])

In [24]:

Copied!





# 4. Set the random seed to 0 and do exercises 2 & 3 over again
torch.manual_seed(0)
rand1 = torch.rand(size=(7, 7))
rand2 = torch.rand(size=(1, 7))
torch.matmul(rand1, rand2.T)
# 4. Set the random seed to 0 and do exercises 2 & 3 over again
torch.manual_seed(0)
rand1 = torch.rand(size=(7, 7))
rand2 = torch.rand(size=(1, 7))
torch.matmul(rand1, rand2.T)

Out[24]:

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [26]:

Copied!

# 5. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent?
torch.cuda.manual_seed(0)
# 5. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent?
torch.cuda.manual_seed(0)

In [27]:

Copied!





# 6. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this).
# Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed)
torch.manual_seed(1234)

rand1 = torch.rand(2, 3)
rand2 = torch.rand_like(rand1)

rand1.to("cuda")
rand2.to("cuda")
# 6. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this).
# Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed)
torch.manual_seed(1234)

rand1 = torch.rand(2, 3)
rand2 = torch.rand_like(rand1)

rand1.to("cuda")
rand2.to("cuda")

Out[27]:

tensor([[0.0518, 0.4681, 0.6738],
        [0.3315, 0.7837, 0.5631]], device='cuda:0')

In [32]:

Copied!

# 7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).
result = torch.matmul(rand1, rand2.T)
result
# 7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).
result = torch.matmul(rand1, rand2.T)
result

Out[32]:

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]])

In [33]:

Copied!

# 8. Find the maximum and minimum values of the output of 7
result.max(), result.min()
# 8. Find the maximum and minimum values of the output of 7
result.max(), result.min()

Out[33]:

(tensor(0.5617), tensor(0.3647))

In [34]:

Copied!

# 9 Find the maximum and minimum index values of the output of 7
result.argmax(), result.argmin()
# 9 Find the maximum and minimum index values of the output of 7
result.argmax(), result.argmin()

Out[34]:

(tensor(3), tensor(0))

In [36]:

Copied!





# 10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor
# with all the 1 dimensions removed to be left with a tensor of shape (10).
# Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.
torch.manual_seed(7)

rand3 = torch.rand(1, 1, 1, 10)
rand3_squeezed = rand3.squeeze()
rand3, rand3_squeezed.shape
# 10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor
# with all the 1 dimensions removed to be left with a tensor of shape (10).
# Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.
torch.manual_seed(7)

rand3 = torch.rand(1, 1, 1, 10)
rand3_squeezed = rand3.squeeze()
rand3, rand3_squeezed.shape

Out[36]:

(tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
            0.3653, 0.8513]]]]),
 torch.Size([10]))