Detecting Image Orientation Using Reflections and CNNs | by Ved Prakash | May, 2024


In this blog post, we will walk through the process of creating a dataset from random images, augmenting them by reflecting against horizontal and vertical axes, and training a classifier to detect image orientation (horizontal or vertical). We will also cover saving these augmented images into organized folders for better dataset management.

Make sure you have the following Python libraries installed:
– OpenCV
– NumPy
– TensorFlow/Keras
– scikit-learn

You can install them using pip:

pip install opencv-python-headless numpy tensorflow scikit-learn

We start by loading a set of random images from a directory. These images will serve as the base for our dataset.

import cv2
import numpy as np
import os
def load_images(image_folder):
images = []
for filename in os.listdir(image_folder):
img = cv2.imread(os.path.join(image_folder, filename))
if img is not None:
images.append(img)
return images
original_images = load_images('images')

For each image, we generate four new images by reflecting it horizontally, vertically, and both. We also label these images based on their orientation.

def reflect_images(images):
augmented_images = []
labels = []
for img in images:
# Original image (Assume 0 for horizontal, 1 for vertical)
augmented_images.append(img)
labels.append(0 if img.shape[1] > img.shape[0] else 1)

# Horizontal reflection
horiz_reflect = cv2.flip(img, 1)
augmented_images.append(horiz_reflect)
labels.append(0 if horiz_reflect.shape[1] > horiz_reflect.shape[0] else 1)

# Vertical reflection
vert_reflect = cv2.flip(img, 0)
augmented_images.append(vert_reflect)
labels.append(0 if vert_reflect.shape[1] > vert_reflect.shape[0] else 1)

# Both horizontal and vertical reflection
both_reflect = cv2.flip(img, -1)
augmented_images.append(both_reflect)
labels.append(0 if both_reflect.shape[1] > both_reflect.shape[0] else 1)

return np.array(augmented_images), np.array(labels)
augmented_images, labels = reflect_images(original_images)

We save each augmented image in separate folders based on its orientation (horizontal or vertical). This helps in organizing our dataset for future use.

def ensure_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
def save_image(image, folder, image_name):
ensure_dir(folder)
image_path = os.path.join(folder, image_name)
cv2.imwrite(image_path, image)
def reflect_and_save_images(images, base_save_path='augmented_images'):
augmented_images = []
labels = []

horizontal_folder = os.path.join(base_save_path, 'horizontal')
vertical_folder = os.path.join(base_save_path, 'vertical')

for idx, img in enumerate(images):
# Original image
augmented_images.append(img)
label = 0 if img.shape[1] > img.shape[0] else 1
labels.append(label)
folder = horizontal_folder if label == 0 else vertical_folder
save_image(img, folder, f'image_{idx}_original.jpg')

# Horizontal reflection
horiz_reflect = cv2.flip(img, 1)
augmented_images.append(horiz_reflect)
label = 0 if horiz_reflect.shape[1] > horiz_reflect.shape[0] else 1
labels.append(label)
folder = horizontal_folder if label == 0 else vertical_folder
save_image(horiz_reflect, folder, f'image_{idx}_horiz_reflect.jpg')

# Vertical reflection
vert_reflect = cv2.flip(img, 0)
augmented_images.append(vert_reflect)
label = 0 if vert_reflect.shape[1] > vert_reflect.shape[0] else 1
labels.append(label)
folder = horizontal_folder if label == 0 else vertical_folder
save_image(vert_reflect, folder, f'image_{idx}_vert_reflect.jpg')

# Both horizontal and vertical reflection
both_reflect = cv2.flip(img, -1)
augmented_images.append(both_reflect)
label = 0 if both_reflect.shape[1] > both_reflect.shape[0] else 1
labels.append(label)
folder = horizontal_folder if label == 0 else vertical_folder
save_image(both_reflect, folder, f'image_{idx}_both_reflect.jpg')

return np.array(augmented_images), np.array(labels)
# Example usage
original_images = load_images('images')
augmented_images, labels = reflect_and_save_images(original_images)

Resize and normalize the images to prepare them for training the classifier.

def preprocess_images(images, target_size=(128, 128)):
processed_images = []
for img in images:
resized_img = cv2.resize(img, target_size) # Resize to a standard size
processed_images.append(resized_img)
return np.array(processed_images) / 255.0
processed_images = preprocess_images(augmented_images)Step 5: Split Data into Training and Testing Sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(processed_images, labels, test_size=0.2, random_state=42)

We build a simple convolutional neural network (CNN) to classify the images as horizontal or vertical.

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Flatten(),
Dense(64, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Evaluate the trained model on the test set to check its accuracy.

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Create a function to detect the orientation of a single image using the trained model.

def load_trained_model(model_path):
model = tf.keras.models.load_model(model_path)
return model
def preprocess_image(image, target_size=(128, 128)):
resized_img = cv2.resize(image, target_size)
normalized_img = resized_img / 255.0
return np.expand_dims(normalized_img, axis=0) # Add batch dimension
def detect_orientation(image, model):
preprocessed_img = preprocess_image(image)
prediction = model.predict(preprocessed_img)
orientation = 'horizontal' if prediction < 0.5 else 'vertical'
return orientation
# Example usage:
model_path = 'orientation_model.h5'
model = load_trained_model(model_path)
image_path = 'path_to_your_image.jpg'
image = cv2.imread(image_path)
orientation = detect_orientation(image, model)
print(f"The orientation of the image is: {orientation}")

In this blog post, we’ve walked through the process of creating a dataset by reflecting images, labeling them by orientation, and organizing them into folders. We then trained a CNN to classify image orientation and evaluated its performance. Finally, we created a function to detect the orientation of a single image using the trained model. This comprehensive approach helps in managing and utilizing image datasets effectively for machine learning tasks.



Source link

Be the first to comment

Leave a Reply

Your email address will not be published.


*