Underwhelming performance in neural network with PyTorch

sjhalayka · July 12, 2023, 11:25pm

I am trying to train a network to classify cat vs dog images. I’m only getting about 75% correct at this point. Is there anything obvious (to an expert) that I can do to make it perform better?

The lastest img_train.py code file is at: https://github.com/sjhalayka/pytorch_cats_vs_dogs/blob/main/img_train.py

The data that I’m using are at: Cat and Dog | Kaggle

For the record, the current code is:

import numpy as np
import math
import cv2
import random
import torch
from torch import flatten
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

import os.path
from os import path



img_width = 64
num_channels = 3

#num_input_components = img_width*img_width*num_channels
num_output_components = 2

num_epochs = 1000
learning_rate = 0.001






class Net(torch.nn.Module):
	def __init__(self, num_channels, num_output_components, all_train_files_len):
		super().__init__()
		self.model = torch.nn.Sequential(
		    #Input = 3 x 32 x 32, Output = 32 x 32 x 32
		    torch.nn.Conv2d(in_channels = num_channels, out_channels = 32, kernel_size = 3, padding = 1), 
		    torch.nn.ReLU(),
		    #Input = 32 x 32 x 32, Output = 32 x 16 x 16
		    torch.nn.MaxPool2d(kernel_size=2),
  
		    #Input = 32 x 16 x 16, Output = 64 x 16 x 16
		    torch.nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1),
		    torch.nn.ReLU(),
		    #Input = 64 x 16 x 16, Output = 64 x 8 x 8
		    torch.nn.MaxPool2d(kernel_size=2),
		      
		    #Input = 64 x 8 x 8, Output = 64 x 8 x 8
		    torch.nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = 1),
		    torch.nn.ReLU(),
		    #Input = 64 x 8 x 8, Output = 64 x 4 x 4
		    torch.nn.MaxPool2d(kernel_size=2),
  
		    torch.nn.Flatten(),
		    torch.nn.Linear(4096, all_train_files_len),
		    torch.nn.ReLU(),
		    torch.nn.Linear(all_train_files_len, num_output_components)
		)
  
	def forward(self, x):
		return self.model(x)

"""
	def __init__(self):
		super(Net, self).__init__()
		self.hidden1 = torch.nn.Linear(num_input_components, 8192)
		self.hidden2 = torch.nn.Linear(8192, 1024) 
		self.hidden3 = torch.nn.Linear(1024, 128)
		self.predict = torch.nn.Linear(128, num_output_components)

	def forward(self, x):
		x = torch.tanh(self.hidden1(x))		
		x = torch.tanh(self.hidden2(x))
		x = torch.tanh(self.hidden3(x))
		x = self.predict(x)    # linear output
		return x
"""



class float_image:

	def __init__(self, img):
		self.img = img

class image_type:

	def __init__(self, img_type, float_img):
		self.img_type = img_type
		self.float_img = float_img




if False: #path.exists('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'):
	net.load_state_dict(torch.load('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'))
	print("loaded file successfully")
else:
	print("training...")





	all_train_files = []

	file_count = 0

	path = 'training_set/cats/'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		#if file_count >= 100:
		#	break;

		print(path + f)
		img = cv2.imread(path + f)
		
		if (img is None) == False:

			img = img.astype(np.float32)
			res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
			flat_file = res / 255.0
			flat_file = np.transpose(flat_file, (2, 0, 1))
			all_train_files.append(image_type(0, flat_file))

		else:
			print("image read failure")

	file_count = 0




	path = 'training_set/dogs/'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		#if file_count >= 100:
		#	break;

		print(path + f)
		img = cv2.imread(path + f)
		
		if (img is None) == False:

			img = img.astype(np.float32)
			res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
			flat_file = res / 255.0
			flat_file = np.transpose(flat_file, (2, 0, 1))
			all_train_files.append(image_type(1, flat_file))

		else:
			print("image read failure")



	


	net = Net(num_channels, num_output_components, len(all_train_files))
	optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
	loss_func = torch.nn.MSELoss()

	batch = np.zeros((len(all_train_files), num_channels, img_width, img_width), dtype=np.float32)
	ground_truth = np.zeros((len(all_train_files), num_output_components), dtype=np.float32)
	
	for epoch in range(num_epochs):
		
		random.shuffle(all_train_files)

		count = 0

		for i in all_train_files:

			batch[count] = i.float_img
		
			if i.img_type == 0: # cat

				ground_truth[count][0] = 1
				ground_truth[count][1] = 0
			
			elif i.img_type == 1: # dog
				
				ground_truth[count][0] = 0
				ground_truth[count][1] = 1

			count = count + 1
	
		x = Variable(torch.from_numpy(batch))
		y = Variable(torch.from_numpy(ground_truth))

		prediction = net(x)	 
		loss = loss_func(prediction, y)

		print(epoch, loss)

		optimizer.zero_grad()	 # clear gradients for next train
		loss.backward()		 # backpropagation, compute gradients
		optimizer.step()		# apply gradients



	#torch.save(net.state_dict(), 'weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth')



path = 'test_set/cats/'
filenames = next(os.walk(path))[2]

cat_count = 0
total_count = 0

for f in filenames:

	img = cv2.imread(path + f)
			
	if (img is None) == False:

		img = img.astype(np.float32)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0
		flat_file = np.transpose(flat_file, (2, 0, 1))

	else:

		print("image read failure")
		continue

	batch = torch.zeros((1, num_channels, img_width, img_width), dtype=torch.float32)
	batch[0] = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction[0][0] > prediction[0][1]:
		cat_count = cat_count + 1

	total_count = total_count + 1

print(cat_count / total_count)
print(total_count)





path = 'test_set/dogs/'
filenames = next(os.walk(path))[2]

dog_count = 0
total_count = 0

for f in filenames:

	img = cv2.imread(path + f)
			
	if (img is None) == False:

		img = img.astype(np.float32)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0
		flat_file = np.transpose(flat_file, (2, 0, 1))

	else:

		print("image read failure")
		continue

	batch = torch.zeros((1, num_channels, img_width, img_width), dtype=torch.float32)
	batch[0] = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction[0][0] < prediction[0][1]:
		dog_count = dog_count + 1

	total_count = total_count + 1

print(dog_count / total_count)
print(total_count)

cudawarped · July 13, 2023, 5:01am

I would use a framework like fastai with models from timm.

IIRC you should probably be able to get much better results with “3 lines of code”. For more info check the first lesson in the course for details

berak · July 13, 2023, 7:34am

looks like a leftover from the previous regression attempt.
for classification, you want some kind of softmax as last layer

sjhalayka · July 13, 2023, 4:13pm

Hi berak:

Yes, the code is a product of my experimentation with PyTorch. I am a raw beginner, so I might be doing something counterproductive. I see why you’d use the Softmax, it gives a set of probabilities that sum to 1. Reminds me of information theory. Thanks for the tip! Much appreciated.

Shawn

sjhalayka · July 13, 2023, 4:14pm

Thank you for the link.

sjhalayka · July 22, 2023, 1:24am

How do I interpret the result of Softmax? How does it affect the one-hot prediction?

Thanks for any help that you can provide.

crackwitz · July 22, 2023, 9:09am

may I recommend taking this to a forum for PyTorch or general deep learning?

berak · July 22, 2023, 10:22am

i recommend cs231n in general

sjhalayka · July 22, 2023, 11:42pm

Much appreciated! I’ll move this to another forum. Thanks for all of the help though.

sjhalayka · August 1, 2023, 3:27pm

Sorry, one last set of questions:

Would you say that a neural network performs non-linear regression? I mean, when it’s not a classification problem, it’s a regression problem, right?

Topic		Replies	Views
Correct classification results but strange probabilities output C++ dnn	1	245	August 2, 2021
Personal Opinions on Deep Learning Libraries dnn , tensorflow , pytorch , programming	8	1061	April 12, 2024
Action recognize with NN (2plus1d_34) C++ dnn	2	340	April 26, 2023
ML Algorithm for picking the best picture Python	2	131	July 19, 2024
Convert PyTorch YOLOv5s model to ONNX or tensorflow format from google colab dnn	1	2330	August 30, 2021

Underwhelming performance in neural network with PyTorch

Related topics