Non-maximum suppressionDescriptionNon-maximum suppression (NMS) is a post-processing step in many computer vision applications. In the context of object detection, it is used to discard redundant detections and to achieve a single bounding-box for each detected object. After applying the object detection method, the output looks like: Method0. Sort all objects by score in descending order.1. Compute area for every object. 2. Discard objects with score <= 0.6 (0.6 value as example) 3. Compute the intersection between 2 objects. A dominant object must have a highest score. 4. Compute the IOU or Jaccard index. 5. Discard non dominant object with IOU >= 0.5 (0.5 value as example). EXAMPLE
/* ANSI C89, C99, C11 compliance */
/* The following example shows the usage of non-maximum suppression. */
#include <stdio.h>
#include <string.h>
#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))
#define DISCARD_BOXES_SCORE 0.6 /* boxes with score <= 0.6 to discard */
#define DISCARD_BOXES_IOU 0.5 /* boxes with IOU >= 0.5 to discard */
/*
a---
| |
---b
*/
struct OBJ_DET {
unsigned int n_class; /* class of object */
float r_score; /* detection score */
unsigned int n_x_a; /* bounding box upper left corner x coordinate */
unsigned int n_y_a; /* bounding box upper left corner y coordinate */
unsigned int n_x_b; /* bounding box bottom right corner x coordinate */
unsigned int n_y_b; /* bounding box bottom right corner y coordinate */
unsigned int n_width; /* bounding box width */
unsigned int n_height; /* bounding box height */
unsigned int n_area; /* bounding box area */
unsigned int n_state; /* state of detection: 0 - non active, 1 - active */
};
/* insertion sort in descending order */
void insertion_sort_do(struct OBJ_DET *p_st, size_t n_size) {
size_t i, j;
struct OBJ_DET st_val;
for (i = 1; i < n_size; i++) {
memcpy(&st_val, &p_st[i], sizeof(struct OBJ_DET));
for (j = i; j > 0 && st_val.r_score > p_st[j - 1].r_score; j--) {
memcpy(&p_st[j], &p_st[j - 1], sizeof(struct OBJ_DET));
}
memcpy(&p_st[j], &st_val, sizeof(struct OBJ_DET));
}
}
/* The Jaccard index, also known as Intersection over Union (IoU) and the Jaccard similarity coefficient. */
float intersection_over_union(unsigned int n_area_inter, unsigned int n_area_a, unsigned int n_area_b) {
return (float)n_area_inter / (float)(n_area_a + n_area_b - n_area_inter);
}
int main() {
struct OBJ_DET st_ds[3] = {{0, 0.7f, 1, 1, 0, 0, 4, 4, 0, 1}, {0, 0.2f, 2, 2, 0, 0, 4, 4, 0, 1}, {0, 0.8f, 1, 1, 0, 0, 4, 4, 0, 1}};
size_t i, j, n_size;
int z_xx_a, z_yy_a, z_xx_b, z_yy_b, z_w, z_h;
float r_iou;
n_size = 3; /* number of objects */
insertion_sort_do(&st_ds[0], n_size); /* sort detections in descending order */
for (i = 0; i < n_size; i++) { /* calculate area */
st_ds[i].n_area = st_ds[i].n_width * st_ds[i].n_height;
st_ds[i].n_x_b = st_ds[i].n_x_a + st_ds[i].n_width;
st_ds[i].n_y_b = st_ds[i].n_y_a + st_ds[i].n_height;
if (st_ds[i].r_score <= DISCARD_BOXES_SCORE) /* discard object */
st_ds[i].n_state = 0;
}
for (i = 0; i < n_size; i++) {
if (st_ds[i].n_state != 0) {
for (j = 0; j < n_size; j++) {
if ((i != j) && (st_ds[j].n_state != 0) && (st_ds[i].n_class == st_ds[j].n_class)) {
z_xx_a = MAX(st_ds[i].n_x_a, st_ds[j].n_x_a);
z_yy_a = MAX(st_ds[i].n_y_a, st_ds[j].n_y_a);
z_xx_b = MIN(st_ds[i].n_x_b, st_ds[j].n_x_b);
z_yy_b = MIN(st_ds[i].n_y_b, st_ds[j].n_y_b);
z_w = z_xx_b - z_xx_a;
z_h = z_yy_b - z_yy_a;
if ((z_w > 0) && (z_h > 0)) {
r_iou = intersection_over_union(z_w * z_h, st_ds[i].n_area, st_ds[j].n_area);
if (r_iou >= DISCARD_BOXES_IOU)
st_ds[j].n_state = 0; /* discard object */
}
}
}
}
}
for (i = 0; i < n_size; i++) {
if (st_ds[i].n_state != 0)
printf("Object %lu is active\n", i);
}
return 0;
}
REFERENCES: |