# Module 3: Loss functions for classification

These are the quizzes corresponding to Module 3

In all the questions below we assume that all the import have been done

import torch
import torch.nn as nn


Try to answer the questions without running the code ;-)

We run the following code:

> m = nn.Sigmoid()
> loss = nn.BCELoss()
> target = torch.empty(3).random_(2)
> print(target)
tensor([0., 1., 1.])
> optimizer = torch.optim.SGD([input], lr = 0.1)
> print(m(input))
> for _ in range(10000):
output = loss(m(input), target)
output.backward()
optimizer.step()
> print(xxxxxx)


and obtain the following result:

tensor([0.0030, 0.9970, 0.9970], grad_fn=<SigmoidBackward>)


We run the following code:

> target = torch.empty(1,2,3).random_(2)
> print(target)
tensor([[[1., 1., 1.],
[0., 0., 1.]]])
> optimizer = torch.optim.SGD([input], lr = 0.1)
> print(m(input).size())
torch.Size([1, 2, 3])


We then run

for _ in range(10000):
output = loss(m(input), target)
output.backward()
optimizer.step()


We run the following code:

> target = torch.randn(3)
> print(target)
tensor([-0.1272, -0.4165,  0.1002])
> optimizer = torch.optim.SGD([input], lr = 0.1)
> print(m(input))


and then

for _ in range(10000):
output = loss(m(input), target)
output.backward()
optimizer.step()
print(m(input))


We run the following code:

> target = 10*torch.randn(3)
> print(target)
tensor([ 12.1225, -11.8731,  19.2255])
> optimizer = torch.optim.SGD([input], lr = 0.1)
> print(m(input))


and then

for _ in range(10000):
output = loss(m(input), target)
output.backward()
optimizer.step()
print(m(input))


We run:

> loss2 = nn.BCEWithLogitsLoss()
> target = torch.empty(3).random_(2)
> print(target)
tensor([0., 1., 0.])


and then code1:

input = torch.randn(3, requires_grad=True)
optimizer = torch.optim.SGD([input], lr = 0.1)
for _ in range(500):
output = loss2(m(input), target)
output.backward()
optimizer.step()
print(m(input))


and then code2:

input = torch.randn(3, requires_grad=True)
optimizer = torch.optim.SGD([input], lr = 0.1)
for _ in range(500):
output = loss2(input, target)
output.backward()
optimizer.step()
print(m(input))


We run:

> loss3 = nn.NLLLoss()
> m3 = nn.LogSoftmax(dim=1)
> target = torch.empty(4).random_(6)
> print(target)
tensor([1., 5., 0., 2.])


then code1:

input = torch.randn(4, requires_grad=True)
optimizer = torch.optim.SGD([input], lr = 0.1)
for _ in range(1000):
output = loss3(m3(input), target)
output.backward()
optimizer.step()
print(torch.exp(m3(input)))


code2:

input = torch.randn((4,7), requires_grad=True)
optimizer = torch.optim.SGD([input], lr = 0.1)
for _ in range(1000):
output = loss3(m3(input), target.long())
output.backward()
optimizer.step()
print(torch.exp(m3(input)))


code3:

input = torch.randn((4,6), requires_grad=True)
optimizer = torch.optim.SGD([input], lr = 0.1)
for _ in range(1000):
output = loss3(m3(input), target)
output.backward()
optimizer.step()
print(torch.exp(m3(input)))


We now take

> target = torch.empty(4,2,3).random_(6)
> print(target)
tensor([[[3., 1., 0.],
[4., 2., 3.]],

[[1., 3., 3.],
[0., 1., 0.]],

[[2., 1., 1.],
[0., 4., 5.]],

[[1., 3., 5.],
[3., 2., 5.]]])


We now run

loss4 = nn.CrossEntropyLoss()
m4 = nn.Softmax(dim=1)
target = torch.empty(4).random_(6)
optimizer = torch.optim.SGD([input], lr = 0.1)


then code1

for _ in range(500):
output = loss4(input, target.long())
output.backward()
optimizer.step()
print(m4(input))


code2

for _ in range(500):
output = loss3(input, target.long())
output.backward()
optimizer.step()
print(m4(input))


code3

for _ in range(500):
output = loss3(m3(input), target.long())