Category Archives: Image Processing

Image Blending using Image Pyramids

In the previous blog, we discussed image pyramids, and how to construct a Laplacian pyramid from the Gaussian. In this blog, we will discuss how image pyramids can be used for image blending. This produces more visually appealing results as compared to different blending methods we discussed until now. Below are the steps for image blending using image pyramids.

Steps:

Load the two images and the mask.
Find the Gaussian pyramid for the two images and the mask.
From the Gaussian pyramid, calculate the Laplacian pyramid for the two images as explained in the previous blog.
Now, blend each level of the Laplacian pyramid according to the mask image of the corresponding Gaussian level.
From this blended Laplacian pyramid, reconstruct the original image. This is done by expanding the level and adding it to the below level as shown in the figure below. Here LS0, LS1, LS2, and LS3 are the levels of the blended Laplacian pyramid obtained in step 4.

Now, let’s implement the above steps using OpenCV-Python. Suppose we want to blend the two images corresponding to the mask as shown below.

So, we will clip the jet image from the second image and blend it to the first image. Below is the code for the steps explained above.

import cv2
import numpy as np


# Step-2
# Find the Gaussian pyramid of the two images and the mask
def gaussian_pyramid(img, num_levels):
    lower = img.copy()
    gaussian_pyr = [lower]
    for i in range(num_levels):
        lower = cv2.pyrDown(lower)
        gaussian_pyr.append(np.float32(lower))
    return gaussian_pyr

# Step-3
# Then calculate the Laplacian pyramid
def laplacian_pyramid(gaussian_pyr):
    laplacian_top = gaussian_pyr[-1]
    num_levels = len(gaussian_pyr) - 1
    
    laplacian_pyr = [laplacian_top]
    for i in range(num_levels,0,-1):
        size = (gaussian_pyr[i - 1].shape[1], gaussian_pyr[i - 1].shape[0])
        gaussian_expanded = cv2.pyrUp(gaussian_pyr[i], dstsize=size)
        laplacian = np.subtract(gaussian_pyr[i-1], gaussian_expanded)
        laplacian_pyr.append(laplacian)
    return laplacian_pyr

# Step-4
# Now blend the two images wrt. the mask
def blend(laplacian_A,laplacian_B,mask_pyr):
    LS = []
    for la,lb,mask in zip(laplacian_A,laplacian_B,mask_pyr):
        ls = lb * mask + la * (1.0 - mask)
        LS.append(ls)
    return LS

# Step-5
# Reconstruct the original image
def reconstruct(laplacian_pyr):
    laplacian_top = laplacian_pyr[0]
    laplacian_lst = [laplacian_top]
    num_levels = len(laplacian_pyr) - 1
    for i in range(num_levels):
        size = (laplacian_pyr[i + 1].shape[1], laplacian_pyr[i + 1].shape[0])
        laplacian_expanded = cv2.pyrUp(laplacian_top, dstsize=size)
        laplacian_top = cv2.add(laplacian_pyr[i+1], laplacian_expanded)
        laplacian_lst.append(laplacian_top)
    return laplacian_lst

# Now let's call all these functions
if __name__ == '__main__':
    # Step-1
    # Load the two images
    img1 = cv2.imread('D:/downloads/cloud1.jpg')
    img1 = cv2.resize(img1, (1800, 1000))
    img2 = cv2.imread('D:/downloads/jet.jpg')
    img2 = cv2.resize(img2, (1800, 1000))

    # Create the mask
    mask = np.zeros((1000,1800,3), dtype='float32')
    mask[250:500,640:1440,:] = (1,1,1)
    
    num_levels = 7
    
    # For image-1, calculate Gaussian and Laplacian
    gaussian_pyr_1 = gaussian_pyramid(img1, num_levels)
    laplacian_pyr_1 = laplacian_pyramid(gaussian_pyr_1)
    # For image-2, calculate Gaussian and Laplacian
    gaussian_pyr_2 = gaussian_pyramid(img2, num_levels)
    laplacian_pyr_2 = laplacian_pyramid(gaussian_pyr_2)
    # Calculate the Gaussian pyramid for the mask image and reverse it.
    mask_pyr_final = gaussian_pyramid(mask, num_levels)
    mask_pyr_final.reverse()
    # Blend the images
    add_laplace = blend(laplacian_pyr_1,laplacian_pyr_2,mask_pyr_final)
    # Reconstruct the images
    final  = reconstruct(add_laplace)
    # Save the final image to the disk
    cv2.imwrite('D:/downloads/pp2.jpg',final[num_levels])

import cv2

import numpy as np

# Step-2

# Find the Gaussian pyramid of the two images and the mask

def gaussian_pyramid(img, num_levels):

lower = img.copy()

gaussian_pyr = [lower]

for i in range(num_levels):

lower = cv2.pyrDown(lower)

gaussian_pyr.append(np.float32(lower))

return gaussian_pyr

# Step-3

# Then calculate the Laplacian pyramid

def laplacian_pyramid(gaussian_pyr):

laplacian_top = gaussian_pyr[-1]

num_levels = len(gaussian_pyr) - 1

laplacian_pyr = [laplacian_top]

for i in range(num_levels,0,-1):

size = (gaussian_pyr[i - 1].shape[1], gaussian_pyr[i - 1].shape[0])

gaussian_expanded = cv2.pyrUp(gaussian_pyr[i], dstsize=size)

laplacian = np.subtract(gaussian_pyr[i-1], gaussian_expanded)

laplacian_pyr.append(laplacian)

return laplacian_pyr

# Step-4

# Now blend the two images wrt. the mask

def blend(laplacian_A,laplacian_B,mask_pyr):

LS = []

for la,lb,mask in zip(laplacian_A,laplacian_B,mask_pyr):

ls = lb * mask + la * (1.0 - mask)

LS.append(ls)

return LS

# Step-5

# Reconstruct the original image

def reconstruct(laplacian_pyr):

laplacian_top = laplacian_pyr[0]

laplacian_lst = [laplacian_top]

num_levels = len(laplacian_pyr) - 1

for i in range(num_levels):

size = (laplacian_pyr[i + 1].shape[1], laplacian_pyr[i + 1].shape[0])

laplacian_expanded = cv2.pyrUp(laplacian_top, dstsize=size)

laplacian_top = cv2.add(laplacian_pyr[i+1], laplacian_expanded)

laplacian_lst.append(laplacian_top)

return laplacian_lst

# Now let's call all these functions

if __name__ == '__main__':

# Step-1

# Load the two images

img1 = cv2.imread('D:/downloads/cloud1.jpg')

img1 = cv2.resize(img1, (1800, 1000))

img2 = cv2.imread('D:/downloads/jet.jpg')

img2 = cv2.resize(img2, (1800, 1000))

# Create the mask

mask = np.zeros((1000,1800,3), dtype='float32')

mask[250:500,640:1440,:] = (1,1,1)

num_levels = 7

# For image-1, calculate Gaussian and Laplacian

gaussian_pyr_1 = gaussian_pyramid(img1, num_levels)

laplacian_pyr_1 = laplacian_pyramid(gaussian_pyr_1)

# For image-2, calculate Gaussian and Laplacian

gaussian_pyr_2 = gaussian_pyramid(img2, num_levels)

laplacian_pyr_2 = laplacian_pyramid(gaussian_pyr_2)

# Calculate the Gaussian pyramid for the mask image and reverse it.

mask_pyr_final = gaussian_pyramid(mask, num_levels)

mask_pyr_final.reverse()

# Blend the images

add_laplace = blend(laplacian_pyr_1,laplacian_pyr_2,mask_pyr_final)

# Reconstruct the images

final = reconstruct(add_laplace)

# Save the final image to the disk

cv2.imwrite('D:/downloads/pp2.jpg',final[num_levels])

The blended output is shown below

Still, there is some amount of white gaze around the jet. Later, we will discuss gradient-domain blending methods which improve the result even more. Now, compare this image with a simple copy and paste operation and see the difference.

You can do a side-by-side blending also. In the next blog, we will discuss how to perform image enhancement and image compression using the Laplacian pyramids. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Earth Mover’s Distance (EMD)

Leave a reply

In the previous blogs, we discussed various histogram comparison methods for image retrieval. Most of the methods we discussed were highly sensitive to blurring, local deformation or color shifts. etc. In this blog, we will discuss a more robust method for comparing the distributions known as Wasserstein metric or Earthmover’s distance. So, let’s get started.

In a formal sense, the notion of distance is more suitable to single elements as compared to distributions. For instance, if I say what is the distance from your house to the neighbor’s house? Most of you will come up with a number say x meters but where is this distance came from. Is it the distance between the centers of both the houses or the nearest distance between the two houses or any other form. Thus, the definition of distance becomes less apparent when we are dealing with distributions or sets of elements rather than single elements. So, in this blog, we will discuss the Earthmover’s distance also known as Wasserstein metric which is more suitable for finding distance or similarity between the distributions. This concept was first introduced by Gaspard Monge in 1781, in the context of transportation theory (Wikipedia). Let’s discuss the main concept behind this.

Let’s say we have two distributions A and B whose distance we want to calculate. This assumes one distribution to be a mass of earth or a pile of dirt and the other to be a collection of holes in that same space. The least amount of work done to fill the holes completely gives us the EMD. Filling the holes result in converting one distribution to another. The lesser the distance, the more similar will be the distributions and vice-versa.

Mathematically, we construct a matrix, say M, whose each element denotes the amount of weight transferred or matched between the distributions. For instance, M_ij denotes the weight transferred from the ith position in the first distribution to the jth position in the second distribution. The work done will be the weight transferred multiplied by the distance i.e. M_ij*d_ij. Thus the EMD is given by

There is an important terminology used in EMD known as a signature which is nothing but a way of representing any distribution. For instance, in this, we divide any distribution into clusters which are represented by the mean (or any other statistics) and the fraction of the distribution in that cluster. This representation by a set of clusters is called the signature.

Now, let’s see how to implement this. OpenCV provides a builtin function for calculating EMD as shown below.

cv2.EMD(signature1, signature2, distType[, cost[, lowerBound]])

1	cv2.EMD(signature1, signature2, distType[, cost[, lowerBound]])

Here, the signature is a matrix of size equal to the (total number of pixels x number of dimensions + 1). For greyscale, we have 2 dimensions, width, and height while for color image 3. Thus for greyscale, each row of signature represents (pixel value and the coordinates). To calculate the distance, you can use any metric such as L1 and L2 as cv2.DIST_L1 etc. You can pass the cost matrix using the cost argument. lower-bound is the distance between the center of mass of two signatures. This outputs the flow matrix (M discussed above), lower bound and the work.

Now, let’s take an example to understand this. First, you need to change the images to their corresponding signatures as shown below.

# image to signature for color image
def img_to_sig(img):
    sig = np.empty((img.size, 4), dtype=np.float32)
    idx = 0
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            for k in range(img.shape[2]):
                sig[idx] = np.array([img[i,j,k], i, j,k])
                idx += 1
    return sig

# image to signature for color image

def img_to_sig(img):

sig = np.empty((img.size, 4), dtype=np.float32)

idx = 0

for i in range(img.shape[0]):

for j in range(img.shape[1]):

for k in range(img.shape[2]):

sig[idx] = np.array([img[i,j,k], i, j,k])

idx += 1

return sig

and then calculate the EMD as shown below.

import cv2
import numpy as np
img1 = cv2.imread('D:/downloads/app1.jpg')
img2 = cv2.imread('D:/downloads/app2.jpg')
sig1 = img_to_sig(img1)
sig2 = img_to_sig(img2)
distance, lowerbound, flow_matrix = cv2.EMD(sig1, sig2, cv2.DIST_L1,lowerBound=0)

import cv2

import numpy as np

img1 = cv2.imread('D:/downloads/app1.jpg')

img2 = cv2.imread('D:/downloads/app2.jpg')

sig1 = img_to_sig(img1)

sig2 = img_to_sig(img2)

distance, lowerbound, flow_matrix = cv2.EMD(sig1, sig2, cv2.DIST_L1,lowerBound=0)

Below is the output we got

Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Comparing Histograms using OpenCV-Python

1 Reply

In the previous blogs, we discussed a lot about histograms. We learned histogram equalization, making a histogram to match a specified histogram, back project a histogram to find regions of interest and even used a histogram for performing image thresholding. In this blog, we will learn how to compare the histograms for the notion of similarity. This comparison is possible because we can classify a number of things around us based on color. We will learn various single number evaluation metrics that tell how well two histograms match with each other. So, let’s get started.

The histogram comparison methods can be classified into two categories

Bin-to-Bin comparison
Cross-bin comparison

Bin-to-Bin comparison methods include L1, L2 norm for calculating the bin distances or bin intersection, etc. These methods assume that the histogram domains are aligned but this condition is easily violated in most of the cases due to change in lighting conditions, quantization, etc. Cross bin comparison methods are more robust and discriminative but this can be computationally expensive. To circumvent this, one can reduce the cross bin comparison to bin-to-bin. Cross bin comparison methods include Earthmoving distance (EMD), quadratic form distances (taking into account the bin similarity matrix), etc.

OpenCV provides a builtin function for comparing the histograms as shown below.

cv2.compareHist(H1, H2, method)

1	cv2.compareHist(H1, H2, method)

Here, H1 and H2 are the histograms we want to compare and the “method” argument specifies the comparison method. OpenCV provides several built-in methods for histogram comparison as shown below

HISTCMP_CORREL: Correlation
HISTCMP _CHISQR: Chi-Square
HISTCMP _CHISQR_ALT: Alternative Chi-Square
HISTCMP _INTERSECT: Intersection
HISTCMP _BHATTACHARYYA: Bhattacharyya distance
HISTCMP _HELLINGER: Synonym for CV_COMP_BHATTACHARYYA
HISTCMP _KL_DIV: Kullback-Leibler divergence

For the Correlation and Intersection methods, the higher the metric, the more accurate the match. While for chi-square and Bhattacharyya, the lower metric value represents a more accurate match. Now, let’s take an example to understand how to use this function. Here, we will compare the two images as shown below.

Steps:

Load the images
Convert it into any suitable color model
Calculate the image histogram (2D or 3D histograms are better) and normalize it
Compare the histograms using the above function

import cv2

# Load the images
img1 = cv2.imread('D:/downloads/app1.jpg')
img2 = cv2.imread('D:/downloads/app2.jpg')

# Convert it to HSV
img1_hsv = cv2.cvtColor(img1, cv2.COLOR_BGR2HSV)
img2_hsv = cv2.cvtColor(img2, cv2.COLOR_BGR2HSV)

# Calculate the histogram and normalize it
hist_img1 = cv2.calcHist([img1_hsv], [0,1], None, [180,256], [0,180,0,256])
cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
hist_img2 = cv2.calcHist([img2_hsv], [0,1], None, [180,256], [0,180,0,256])
cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);

# find the metric value
metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_BHATTACHARYYA)

import cv2

# Load the images

img1 = cv2.imread('D:/downloads/app1.jpg')

img2 = cv2.imread('D:/downloads/app2.jpg')

# Convert it to HSV

img1_hsv = cv2.cvtColor(img1, cv2.COLOR_BGR2HSV)

img2_hsv = cv2.cvtColor(img2, cv2.COLOR_BGR2HSV)

# Calculate the histogram and normalize it

hist_img1 = cv2.calcHist([img1_hsv], [0,1], None, [180,256], [0,180,0,256])

cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);

hist_img2 = cv2.calcHist([img2_hsv], [0,1], None, [180,256], [0,180,0,256])

cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);

# find the metric value

metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_BHATTACHARYYA)

The metric value comes out to be around 0.99 which seems to be pretty good. Try changing the bin sizes and the comparison methods and observe the change. In the next blog, we will discuss Earthmoving distance (EMD), a cross bin comparison method that is more robust as compared to these methods. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Add borders to the image using OpenCV-Python

Leave a reply

In this blog, we will learn how to add different borders to the image using OpenCV-Python. Adding border doesn’t only make the image looks stylish but this is also useful in many image processing tasks such as image interpolation, morphological operations, edge detection, etc. OpenCV provides different border styles and in this blog, we will explore these. Below is the inbuilt function provided by OpenCV for this.

cv2.copyMakeBorder(src, top, bottom, left, right, borderType[, value])

1	cv2.copyMakeBorder(src, top, bottom, left, right, borderType[, value])

Here, src is the input image and top, left, right, and bottom specifies how many pixels to add in each direction. “borderType” specifies what type of border to add. Below are the types available in OpenCV.

cv2.BORDER_REFLECT: this reflects the border elements such as fedcba|abcdefgh|hgfedcb
cv2.BORDER_REFLECT_101: this reflects leaving the border pixel such as gfedcb|abcdefgh|gfedcba
cv2.BORDER_REPLICATE: Border pixel will be replicated such as aaaaaa|abcdefgh|hhhhhhh
cv2.BORDER_WRAP: this reflects the pixel from the opposite boundary as cdefgh|abcdefgh|abcdefg
cv2.BORDER_CONSTANT: this adds a constant border whose value is given by the “value” argument.

Now, let’s take an example to illustrate this. Here, I have created a trackbar that lets us understand this clearly.

import cv2
# Load the image
img = cv2.imread('D:/downloads/open1.PNG')
# create window
title_window = "Border type demo"
cv2.namedWindow(title_window)

border_dic = {0: 'Constant', 1: 'Replicate', 2: 'Reflect', 3: 'Wrap', 4: 'Reflect_101'}

# define callback function
def bordertype(val):
    # get current trackbar position
    border = cv2.getTrackbarPos('Bordertype', title_window)
    left = cv2.getTrackbarPos('left', title_window)
    right = cv2.getTrackbarPos('right', title_window)
    top = cv2.getTrackbarPos('top', title_window)
    bottom = cv2.getTrackbarPos('bottom', title_window)
    val = cv2.getTrackbarPos('value', title_window)
    # Add border
    out = cv2.copyMakeBorder(img,top,bottom,left,right,border,value=[val,val,val])
    # display text
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(out,'Border:{}'.format(border_dic[border]),(10,40), font, 0.5,(0,255,255),1,cv2.LINE_AA)
    cv2.imshow(title_window, out)

# create trackbars
cv2.createTrackbar('Bordertype', title_window , 0, 4, bordertype)
cv2.createTrackbar('left', title_window , 0, 25, bordertype)
cv2.createTrackbar('right', title_window , 0, 25, bordertype)
cv2.createTrackbar('top', title_window , 0, 25, bordertype)
cv2.createTrackbar('bottom', title_window , 0, 25, bordertype)
cv2.createTrackbar('value', title_window , 0, 255, bordertype)

# Instantiate the function
bordertype(0)
cv2.waitKey()

import cv2

# Load the image

img = cv2.imread('D:/downloads/open1.PNG')

# create window

title_window = "Border type demo"

cv2.namedWindow(title_window)

border_dic = {0: 'Constant', 1: 'Replicate', 2: 'Reflect', 3: 'Wrap', 4: 'Reflect_101'}

# define callback function

def bordertype(val):

# get current trackbar position

border = cv2.getTrackbarPos('Bordertype', title_window)

left = cv2.getTrackbarPos('left', title_window)

right = cv2.getTrackbarPos('right', title_window)

top = cv2.getTrackbarPos('top', title_window)

bottom = cv2.getTrackbarPos('bottom', title_window)

val = cv2.getTrackbarPos('value', title_window)

# Add border

out = cv2.copyMakeBorder(img,top,bottom,left,right,border,value=[val,val,val])

# display text

font = cv2.FONT_HERSHEY_SIMPLEX

cv2.putText(out,'Border:{}'.format(border_dic[border]),(10,40), font, 0.5,(0,255,255),1,cv2.LINE_AA)

cv2.imshow(title_window, out)

# create trackbars

cv2.createTrackbar('Bordertype', title_window , 0, 4, bordertype)

cv2.createTrackbar('left', title_window , 0, 25, bordertype)

cv2.createTrackbar('right', title_window , 0, 25, bordertype)

cv2.createTrackbar('top', title_window , 0, 25, bordertype)

cv2.createTrackbar('bottom', title_window , 0, 25, bordertype)

cv2.createTrackbar('value', title_window , 0, 255, bordertype)

# Instantiate the function

bordertype(0)

cv2.waitKey()

Play around with these trackbars to get a feeling of different border types. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Thresholding using cv2.inRange() function

Leave a reply

In the previous blogs, we discussed various thresholding methods such as Otsu, adaptive, BHT, etc. In this blog, we will learn how to segment out a particular region or color from an image. This is naively equivalent to multiple thresholding where we assign a particular value to the region falling in between the two thresholds. Remaining region is assigned a different value. OpenCV provides an inbuilt function for this as shown below

cv2.inRange(src, lowerb, upperb)

1	cv2.inRange(src, lowerb, upperb)

Here, src is the input image. ‘lowerb’ and ‘upperb’ denotes the lower and upper boundary of the threshold region. A pixel is set to 255 if it lies within the boundaries specified otherwise set to 0. This way it returns the thresholded image.

A nice way to understand any method is to play with the arguments and for that, trackbars come very handily. Let’s segment the image based on the color as any color (and its shades) mostly covers some range of intensity values. Thus for segmentation any color this function will be very useful. Below is the code where I have created trackbars to segment any color in a live webcam feed.

import cv2
import numpy as np
 
def nothing(x):
    pass

# Open the camera
cap = cv2.VideoCapture(0) 
 
# Create a window
cv2.namedWindow('image')
 
# create trackbars for color change
cv2.createTrackbar('lowH','image',0,179,nothing)
cv2.createTrackbar('highH','image',179,179,nothing)
 
cv2.createTrackbar('lowS','image',0,255,nothing)
cv2.createTrackbar('highS','image',255,255,nothing)
 
cv2.createTrackbar('lowV','image',0,255,nothing)
cv2.createTrackbar('highV','image',255,255,nothing)
 
while(True):
    ret, frame = cap.read()
 
    # get current positions of the trackbars
    ilowH = cv2.getTrackbarPos('lowH', 'image')
    ihighH = cv2.getTrackbarPos('highH', 'image')
    ilowS = cv2.getTrackbarPos('lowS', 'image')
    ihighS = cv2.getTrackbarPos('highS', 'image')
    ilowV = cv2.getTrackbarPos('lowV', 'image')
    ihighV = cv2.getTrackbarPos('highV', 'image')
    
    # convert color to hsv because it is easy to track colors in this color model
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    lower_hsv = np.array([ilowH, ilowS, ilowV])
    higher_hsv = np.array([ihighH, ihighS, ihighV])
    # Apply the cv2.inrange method to create a mask
    mask = cv2.inRange(hsv, lower_hsv, higher_hsv)
    # Apply the mask on the image to extract the original color
    frame = cv2.bitwise_and(frame, frame, mask=mask)
    cv2.imshow('image', frame)
    # Press q to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

import cv2

import numpy as np

def nothing(x):

pass

# Open the camera

cap = cv2.VideoCapture(0)

# Create a window

cv2.namedWindow('image')

# create trackbars for color change

cv2.createTrackbar('lowH','image',0,179,nothing)

cv2.createTrackbar('highH','image',179,179,nothing)

cv2.createTrackbar('lowS','image',0,255,nothing)

cv2.createTrackbar('highS','image',255,255,nothing)

cv2.createTrackbar('lowV','image',0,255,nothing)

cv2.createTrackbar('highV','image',255,255,nothing)

while(True):

ret, frame = cap.read()

# get current positions of the trackbars

ilowH = cv2.getTrackbarPos('lowH', 'image')

ihighH = cv2.getTrackbarPos('highH', 'image')

ilowS = cv2.getTrackbarPos('lowS', 'image')

ihighS = cv2.getTrackbarPos('highS', 'image')

ilowV = cv2.getTrackbarPos('lowV', 'image')

ihighV = cv2.getTrackbarPos('highV', 'image')

# convert color to hsv because it is easy to track colors in this color model

hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

lower_hsv = np.array([ilowH, ilowS, ilowV])

higher_hsv = np.array([ihighH, ihighS, ihighV])

# Apply the cv2.inrange method to create a mask

mask = cv2.inRange(hsv, lower_hsv, higher_hsv)

# Apply the mask on the image to extract the original color

frame = cv2.bitwise_and(frame, frame, mask=mask)

cv2.imshow('image', frame)

# Press q to exit

if cv2.waitKey(1) & 0xFF == ord('q'):

break

cap.release()

cv2.destroyAllWindows()

Play around with the trackbars to get a feel of cv2.inRange function. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Understanding Structuring Element with Trackbars

1 Reply

As we already discussed that the most important thing in morphological image processing is the Structuring element. This is used to probe an image for finding the region of interest. Different shapes and sizes of SE will produce a different result. Thus it becomes vital to have a good grasp on this for better understanding of morphological image processing. In this blog, let’s create trackbars which makes it really easy to visualize the result for different values. So, let’s get started.

Steps:

Load the image and create a window to attach trackbars
Specify the morphological operations and Structuring elements
Create the trackbars and the callback function

import cv2
import numpy as np

# Load the image
src = cv2.imread('D:/downloads/g1.png')
# Create a window to attach trackbars
title_window = 'Morphology Operations'
cv2.namedWindow(title_window)

# Specify the morphological operations and Structuring elements
morph_op_dic = {0: 'ERODE', 1: 'DILATE', 2: 'OPEN', 3: 'CLOSE', \
                4: 'GRADIENT', 5: 'TOPHAT', 6: 'BLACKHAT', 7: 'HITMISS'}
element_dic = {0: 'RECT', 1: 'CROSS', 2: 'ELLIPSE'}

# Define callback function
def morphology_operations(val):
    # get current trackbar position
    morph_operator = cv2.getTrackbarPos('Operator', title_window)
    kernel_size = cv2.getTrackbarPos('kernel', title_window)
    val_type = cv2.getTrackbarPos('SE', title_window)
    iter_num = cv2.getTrackbarPos('iterations', title_window)
    # create structuring element
    element = cv2.getStructuringElement(val_type, (2*kernel_size + 1, 2*kernel_size+1))
    # apply morphological operation
    dst = cv2.morphologyEx(src, morph_operator, element,iterations=iter_num)
    # Display the operation and structuring element
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(dst,'Op:{}, SE: {}'.format(morph_op_dic[morph_operator],element_dic[val_type]),(10,40), font, 0.5,(0,255,255),1,cv2.LINE_AA)
    cv2.imshow(title_window, dst)
    
# Create Trackbars
cv2.createTrackbar('Operator', title_window , 0, 7, morphology_operations)
cv2.createTrackbar('SE', title_window , 0, 2, morphology_operations)
cv2.createTrackbar('kernel', title_window , 0, 10, morphology_operations)
cv2.createTrackbar('iterations', title_window , 0, 10, morphology_operations)

# Instantiate the function
morphology_operations(0)
cv2.waitKey(0)

import cv2

import numpy as np

# Load the image

src = cv2.imread('D:/downloads/g1.png')

# Create a window to attach trackbars

title_window = 'Morphology Operations'

cv2.namedWindow(title_window)

# Specify the morphological operations and Structuring elements

morph_op_dic = {0: 'ERODE', 1: 'DILATE', 2: 'OPEN', 3: 'CLOSE', \

4: 'GRADIENT', 5: 'TOPHAT', 6: 'BLACKHAT', 7: 'HITMISS'}

element_dic = {0: 'RECT', 1: 'CROSS', 2: 'ELLIPSE'}

# Define callback function

def morphology_operations(val):

# get current trackbar position

morph_operator = cv2.getTrackbarPos('Operator', title_window)

kernel_size = cv2.getTrackbarPos('kernel', title_window)

val_type = cv2.getTrackbarPos('SE', title_window)

iter_num = cv2.getTrackbarPos('iterations', title_window)

# create structuring element

element = cv2.getStructuringElement(val_type, (2*kernel_size + 1, 2*kernel_size+1))

# apply morphological operation

dst = cv2.morphologyEx(src, morph_operator, element,iterations=iter_num)

# Display the operation and structuring element

font = cv2.FONT_HERSHEY_SIMPLEX

cv2.putText(dst,'Op:{}, SE: {}'.format(morph_op_dic[morph_operator],element_dic[val_type]),(10,40), font, 0.5,(0,255,255),1,cv2.LINE_AA)

cv2.imshow(title_window, dst)

# Create Trackbars

cv2.createTrackbar('Operator', title_window , 0, 7, morphology_operations)

cv2.createTrackbar('SE', title_window , 0, 2, morphology_operations)

cv2.createTrackbar('kernel', title_window , 0, 10, morphology_operations)

cv2.createTrackbar('iterations', title_window , 0, 10, morphology_operations)

# Instantiate the function

morphology_operations(0)

cv2.waitKey(0)

This will produce the following output

Play with the trackbars to get a feel about the morphological operations. That’s all for this blog. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time

Thinning and Thickening

1 Reply

In the previous blog, we discussed Hit-or-Miss transformation, that is used for finding desired patterns in an image. In this blog, we will discuss various applications of Hit-or-miss transform such as thinning, thickening, etc. So, let’s get started.

Thinning

This is somewhat similar to erosion or opening operation that we discussed earlier. As clear from the name, this is used to thin the foreground region such that its extent and connectivity is preserved. Preserving extent means preserving the endpoints of a structure whereas connectivity can refer to either 4-connected or 8-connected. Thinning is mostly used for producing skeletons which serve as image descriptors, and for reducing the output of the edge detectors to a one-pixel thickness, etc.

There are various algorithms to implement the thinning operation such as

Zhang Suen fast parallel thinning algorithm
Non-max Suppression in Canny Edge Detector
Guo and Hall’s two sub-iteration parallel Thinning algorithm
Iterative algorithms using morphological operations such as hit-or-miss, opening and erosion, etc

In this blog, we will only discuss the last algorithm, rest we will discuss in the following blogs. So, let’s get started.

In this, we can implement thinning either using erosion and opening operations or by using hit-or-miss operation. Let’s first discuss thinning using erosion and opening. This can be expressed as the union of skeleton subsets where each subset is given by the following expression (A-Binary image and B-structuring element)

Here, n indicates the number of iterations of erosion. N is the last iterative step before A erodes to the empty set (stopping condition). Now, let’s discuss how to implement this using OpenCV-Python.

import cv2
import numpy as np

# Create an image with text on it
img = np.zeros((100,400),dtype='uint8')
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img,'TheAILearner',(5,70), font, 2,(255),5,cv2.LINE_AA)
img1 = img.copy()

# Structuring Element
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
# Create an empty output image to hold values
thin = np.zeros(img.shape,dtype='uint8')

# Loop until erosion leads to an empty set
while (cv2.countNonZero(img1)!=0):
    # Erosion
    erode = cv2.erode(img1,kernel)
    # Opening on eroded image
    opening = cv2.morphologyEx(erode,cv2.MORPH_OPEN,kernel)
    # Subtract these two
    subset = erode - opening
    # Union of all previous sets
    thin = cv2.bitwise_or(subset,thin)
    # Set the eroded image for next iteration
    img1 = erode.copy()
    
cv2.imshow('original',img)
cv2.imshow('thinned',thin)
cv2.waitKey(0)

import cv2

import numpy as np

# Create an image with text on it

img = np.zeros((100,400),dtype='uint8')

font = cv2.FONT_HERSHEY_SIMPLEX

cv2.putText(img,'TheAILearner',(5,70), font, 2,(255),5,cv2.LINE_AA)

img1 = img.copy()

# Structuring Element

kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))

# Create an empty output image to hold values

thin = np.zeros(img.shape,dtype='uint8')

# Loop until erosion leads to an empty set

while (cv2.countNonZero(img1)!=0):

# Erosion

erode = cv2.erode(img1,kernel)

# Opening on eroded image

opening = cv2.morphologyEx(erode,cv2.MORPH_OPEN,kernel)

# Subtract these two

subset = erode - opening

# Union of all previous sets

thin = cv2.bitwise_or(subset,thin)

# Set the eroded image for next iteration

img1 = erode.copy()

cv2.imshow('original',img)

cv2.imshow('thinned',thin)

cv2.waitKey(0)

Now, let’s discuss thinning using hit-or-miss transform. Thinning of set A by SE B can be expressed in terms of hit-or-miss transform as

This means we remove all those pixels whose neighborhood exactly matches the pixels in the SE. Instead of applying this with a single structuring element, it is a common practice to implement it using a sequence of SE so as to produce symmetric results. This operation is mostly applied iteratively until no further changes occur.

Thickening

Thickening is the dual of thinning and thus is equivalent to applying the thinning operation on the background or on the complement of the set A.

In the next blog, we will discuss the remaining thinning algorithms in detail. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Hit-or-Miss Transform

Leave a reply

In this blog, we will discuss Hit-or-Miss transformation. This is basically used for shape detection or finding particular patterns in the given image. The shape or pattern to match has to be provided via the structuring element. This transformation can be easily implemented using the erosion operation. So, let’s get started.

Here, we use two structuring elements (say B1 and B2). In this, we ask a simple question of does B1 fits the object while, simultaneously, B2 misses the object, i.e. fits the background? In other words, we are interested only in those pixels whose neighborhood exactly matches B1 while not matching B2 at the same time. As we already discussed that erosion answers the question of whether the SE fits or not. Thus, Hit-or-Miss operation can be expressed in terms of erosion as

Here, we assume that both SE B1 and B2 don’t intersect otherwise this operation cannot be performed. Because both B1 and B2 are disjoint sets, we can express both in terms of a single structuring element. Doing so makes the operation more interpretable. Let’s see how.

Suppose we want to find a T-shaped pattern in the image defined by the SE B1 such that it does not contain the pattern defined by B2 in its right neighborhood. 0 below represents that we don’t care about these positions. Below figure shows how to combine two SE into one.

Now, just compare the underlying pixels values with the combined SE. If it matches exactly then the pixel underneath the origin of SE is set to 1 else 0. Let’s take an example. Suppose we want to find the above combined SE in the image shown below. Clearly, the one on the lower right matches the pattern defined by the combined SE. The result is shown on the right side.

Below is the code for this. The hit-or-miss can be implemented using the OpenCV cv2.morphologyEx() function by passing the flag cv2.MORPH_HITMISS as shown below.

import cv2
import numpy as np

# Create the input image
input_image = np.array((
    [0, 0, 0, 0, 0, 0, 0, 0],
    [0, 255, 255, 255, 0, 0, 0, 255],
    [0, 0, 255, 255, 0, 0, 0, 0],
    [0, 0, 255, 255, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0],
    [0, 255, 255, 0, 0, 255, 255, 255],
    [0,255, 0, 0, 0, 0, 255, 0],
    [0, 255, 0, 0, 0, 0, 255, 0]), dtype="uint8")

# Construct the structuring element
kernel = np.array((
        [1, 1, 1],
        [0, 1, -1],
        [0, 1, -1]), dtype="int")

# Apply hit-or-miss transformation
output_image = cv.morphologyEx(input_image, cv2.MORPH_HITMISS, kernel)

import cv2

import numpy as np

# Create the input image

input_image = np.array((

[0, 0, 0, 0, 0, 0, 0, 0],

[0, 255, 255, 255, 0, 0, 0, 255],

[0, 0, 255, 255, 0, 0, 0, 0],

[0, 0, 0, 0, 0, 0, 0, 0],

[0, 255, 255, 0, 0, 255, 255, 255],

[0,255, 0, 0, 0, 0, 255, 0],

[0, 255, 0, 0, 0, 0, 255, 0]), dtype="uint8")

# Construct the structuring element

kernel = np.array((

[1, 1, 1],

[0, 1, -1],

[0, 1, -1]), dtype="int")

# Apply hit-or-miss transformation

output_image = cv.morphologyEx(input_image, cv2.MORPH_HITMISS, kernel)

This way you can find any patterns in the input image. In the next blog, we will discuss other applications of hit-or-miss operation such as thinning, thickening, convex Hull, etc. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Opening and Closing

Leave a reply

In the previous blogs, we discuss two fundamental morphological operations – Erosion and Dilation. Both have their own advantages and disadvantages. For instance, erosion is useful in removing salt noise and structures of a certain shape but at the same time, if holes or gaps are present in the object, this tends to amplify them. So, what we want is something that removes the structures or fills holes/gaps without affecting the remaining foreground parts.

One plausible solution is to combine erosion and dilation operation. So, for instance, let’s take the case of noise removal. Like we discussed that the erosion will remove the salt noise but at the same time will shrink the foreground region also. To counter this, we apply dilation operation using the same structuring element. Because noise is removed, so dilation will only work on the shrunk foreground area and revert it back to the original. This process of applying erosion followed by dilation is known as Opening and in this blog, we will discuss this method in detail.

So, why is the name Opening? Because this opens up the gap between the objects connected by thin protrusions that are of size less than that of the structuring element. Below is the image where the bridge is 2 pixels wide while the SE is 3 pixels wide.

Now, let’s formulate the opening operation in terms of a set operation. The opening of binary image A by the structuring element B is defined as the erosion of A by B, followed by the dilation of the result by B. This can be stated using any of the two expressions as shown below

The second one states that the opening is the union of all the translations of SE B that fits into A. There is an interesting property associated with the opening known as idempotence. This simply states that if an image has been opened once, performing the subsequent opening operation with the same SE will have no effect on that image. This is because after opening the new boundaries created are such that the SE always fits inside them. Now, let’s discuss another operation – closing that is a dual of opening operation.

Closing

This is the just the reverse of Opening i.e Dilation followed by Erosion. Because this closes the holes/gaps present in the object while keeping the initial object size the same. That’s why the name Closing. Now, let’s formulate this in terms of a set operation.

Actual implementation involves rotating the SE by 180 degrees before performing dilation and erosion. But since our SE is mostly symmetric, we usually don’t care. Similar to the opening, this operator is also idempotent. Unlike opening, this fuses narrow breaks or bridges between the objects. Generally, good for removing pepper noise but not salt noise.

Now, let’s discuss how to implement these using OpenCV-Python. For Opening, one way is to first apply erosion and then dilation using the builtin functions we discussed earlier. Similarly, for closing also. Fortunately, OpenCV provides another function that directly implements these operations as shown below.

cv2.morphologyEx(src, op, kernel[, anchor[, iterations[, borderType[, borderValue]]]])

1	cv2.morphologyEx(src, op, kernel[, anchor[, iterations[, borderType[, borderValue]]]])

Here, src is the input image with any number of channels( all will be processed independently) and the kernel is the structuring element whose origin is defined by the anchor (default (-1,-1)). Don’t know why the iterations argument is given because the opening and closing are idempotent operators. You can create the SE using cv2.getStructuringElement() or simply using numpy. It is sometimes useful to pad the image to account for the boundary pixels or if the image is of non-regular shape and this can be done using the “borderType” and “borderValue” arguments. The “op” argument specifies which type of morphological operation to apply. Following are the types available.

MORPH_OPEN – an opening operation
MORPH_CLOSE – a closing operation
MORPH_GRADIENT – a morphological gradient
MORPH_TOPHAT – “top hat”
MORPH_BLACKHAT – “black hat”

Below is an example where we open and close the image with the rectangular SE.

# Define the structuring element
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
# Apply the opening operation
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# Apply the closing operation
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

# Define the structuring element

kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))

# Apply the opening operation

opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

# Apply the closing operation

closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

In the next blog, we will discuss other morphological operators like morphological gradient, top hat, etc. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.

Morphological gradient and Top-hat operators

Leave a reply

In this blog, we will discuss various morphological operators such as morphological gradient, white and black top-hat transform, etc. All these operators can be easily obtained by combining erosion and dilation operations. So, let’s discuss each of these methods in detail.

Morphological Gradient

This is the difference between the dilation and erosion of an image. Because dilation and erosion mostly affect the pixels that are close to the boundary between the foreground and background, their difference generally yields the boundary and thus this is used for edge detection and segmentation tasks. Now, let’s discuss how to implement this using OpenCV-Python.

One approach is to use OpenCV cv2.dilate() and cv2.erode() functions and then subtract these two. Another approach is to use OpenCV cv2.morphologyEx() function with cv2.MORPH_GRADIENT flag as discussed in the previous blog. Both of these approaches are shown below.

# Structuring element
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))

# Approach-1: Perform erosion and dilation separately and then subtract
erosion = cv2.erode(img, kernel, iterations = 1)
dilation = cv2.dilate(img, kernel, iterations=1)
gradient1 = dilation - erosion

# Approach-2: Use cv2.morphologyEx()
gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)

# Structuring element

kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))

# Approach-1: Perform erosion and dilation separately and then subtract

erosion = cv2.erode(img, kernel, iterations = 1)

dilation = cv2.dilate(img, kernel, iterations=1)

gradient1 = dilation - erosion

# Approach-2: Use cv2.morphologyEx()

gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)

Top Hat Transform

This is mainly used to extract small details from the images. There are two types of top hat operators namely

White top-hat transform: This is the difference between the image and its opening.
Black top-hat transform: Difference between the closing and the input image.

As we already know that opening eliminates thin protrusions and salt noise, thus the white top hat transform will return these elements. Similarly, the black hat transform will return the pepper noise and gaps/holes that are filled. That’s why the name “White” and “Black”. Thus depending upon the structuring element used, these can be useful for feature extraction.

Now, let’s discuss how to implement this using OpenCV-Python. This can be implemented either manually for instance first finding the image opening and then subtracting it from the original image. The second approach is to pass the corresponding flag in the cv2.morphologyEx() function as shown below.

# Load the image
img1 = cv2.imread("D:/downloads/adap1.jpg",0)
# Structuring element
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(25,25))
# Apply the top hat transform
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
# Apply the black hat transform
blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)

# Load the image

img1 = cv2.imread("D:/downloads/adap1.jpg",0)

# Structuring element

kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(25,25))

# Apply the top hat transform

tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)

# Apply the black hat transform

blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)

This can also be used for tackling non-uniform illumination in images and thus can be a useful pre-processing tool for image segmentation tasks. The main idea is that you select the structuring element large enough that it erodes all the object region. Then the only thing remaining will be the background or the shading pattern. Subtracting this background from the image (or top-hat transform, in other words) will produce an image that reduces the non-uniform illumination effect. Thus the image can now be reasonably segmented. Below is an example of the image corrupted by non-uniform illumination and the result of applying white top-hat transform on that image. The image is thresholded using Otsu’s method.

That’s all for this blog. Hope you enjoy reading.

If you have any doubt/suggestion please feel free to ask and I will do my best to help or improve myself. Good-bye until next time.