Interpolate using SSE instructions.

This commit is contained in:
Fabien Freling 2014-06-29 17:17:19 +02:00
parent 05777d59f8
commit 3af56f6619
2 changed files with 37 additions and 37 deletions

View file

@ -1,5 +1,5 @@
CXX = clang++ CXX = clang++
CXXFLAGS = -std=c++11 -W -Wall -O3 #-Werror CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math #-Werror
BUILD_DIR=/tmp BUILD_DIR=/tmp
all: rotation.cpp all: rotation.cpp

View file

@ -7,6 +7,9 @@
#include <cstring> #include <cstring>
#include <chrono> #include <chrono>
#include <xmmintrin.h>
using namespace std; using namespace std;
@ -279,7 +282,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
double min_h = 0; double min_h = 0;
double max_h = 0; double max_h = 0;
//cout << "Image dimensions: " << src.width << " x " << src.height << endl;
Point p(0, 0); Point p(0, 0);
double angle = convert_radian(src, p, ratio); double angle = convert_radian(src, p, ratio);
DPoint tl = convert_abs_coord(angle + rotation, ratio); DPoint tl = convert_abs_coord(angle + rotation, ratio);
@ -287,11 +289,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
max_w = max(max_w, tl.x); max_w = max(max_w, tl.x);
min_h = min(min_h, tl.y); min_h = min(min_h, tl.y);
max_h = max(max_h, tl.y); max_h = max(max_h, tl.y);
// debug print
// if (rotation == 0.0)
// {
// cout << "Rotated " << p << " = " << tl << endl << endl;
// }
p = Point(src.width - 1, 0); p = Point(src.width - 1, 0);
angle = convert_radian(src, p, ratio); angle = convert_radian(src, p, ratio);
@ -300,10 +297,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
max_w = max(max_w, tr.x); max_w = max(max_w, tr.x);
min_h = min(min_h, tr.y); min_h = min(min_h, tr.y);
max_h = max(max_h, tr.y); max_h = max(max_h, tr.y);
// if (rotation == 0.0)
// {
// cout << "Rotated " << p << " = " << tr << endl << endl;
// }
p = Point(0, src.height - 1); p = Point(0, src.height - 1);
angle = convert_radian(src, p, ratio); angle = convert_radian(src, p, ratio);
@ -312,10 +305,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
max_w = max(max_w, bl.x); max_w = max(max_w, bl.x);
min_h = min(min_h, bl.y); min_h = min(min_h, bl.y);
max_h = max(max_h, bl.y); max_h = max(max_h, bl.y);
// if (rotation == 0.0)
// {
// cout << "Rotated " << p << " = " << bl << endl << endl;
// }
p = Point(src.width - 1, src.height - 1); p = Point(src.width - 1, src.height - 1);
angle = convert_radian(src, p, ratio); angle = convert_radian(src, p, ratio);
@ -324,10 +313,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
max_w = max(max_w, br.x); max_w = max(max_w, br.x);
min_h = min(min_h, br.y); min_h = min(min_h, br.y);
max_h = max(max_h, br.y); max_h = max(max_h, br.y);
// if (rotation == 0.0)
// {
// cout << "Rotated " << p << " = " << br << endl << endl;
// }
width = (int) (max_w - min_w) + 1; width = (int) (max_w - min_w) + 1;
height = (int) (max_h - min_h) + 1; height = (int) (max_h - min_h) + 1;
@ -450,28 +435,26 @@ DPoint get_mapped_point(Image const& src, Point const& p, double const rotation)
return convert_polar_to_grid_coord(p_angle + rotation, dist); return convert_polar_to_grid_coord(p_angle + rotation, dist);
} }
inline
void rotate_pixel(Image const& src, Image& rotated, void rotate_pixel(Image const& src, Image& rotated,
DPoint const& src_rotated_point, Point const& rot_point, DPoint const& src_rotated_point, Point const& rot_point,
unsigned int const src_limit, unsigned int const rot_limit) unsigned int const src_limit, unsigned int const rot_limit)
{ {
// Out-of-bounds check
unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3; unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3;
unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3; unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3;
// Out-of-bounds check
if (src_index >= src_limit if (src_index >= src_limit
|| rot_index >= rot_limit) || rot_index >= rot_limit)
return; return;
static double interpolation_bottom[3];
static double interpolation_top[3];
static uint8_t interpolated[3];
// Bilinear interpolation // Bilinear interpolation
unsigned int src_index_1 = (floor(src_rotated_point.y) * src.width + floor(src_rotated_point.x)) * 3; unsigned int src_index_1 = src_index;
unsigned int src_index_2 = src_index_1 + 3; unsigned int src_index_2 = src_index_1 + 3;
unsigned int src_index_3 = src_index_1 + 3 * src.width; unsigned int src_index_3 = src_index_1 + 3 * src.width;
unsigned int src_index_4 = src_index_3 + 3; unsigned int src_index_4 = src_index_3 + 3;
if (src_index_1 >= src_limit || src_index_1 >= src_limit) if (src_index_4 >= src_limit)
return; return;
double x_delta = src_rotated_point.x - floor(src_rotated_point.x); double x_delta = src_rotated_point.x - floor(src_rotated_point.x);
@ -479,15 +462,31 @@ void rotate_pixel(Image const& src, Image& rotated,
double y_delta = src_rotated_point.y - floor(src_rotated_point.y); double y_delta = src_rotated_point.y - floor(src_rotated_point.y);
round_if_very_small(y_delta); round_if_very_small(y_delta);
// Interpolation // SIMD
// SIMD? __m128 const x_d = _mm_set_ps1(x_delta);
for (unsigned int i = 0; i < 3; ++i) __m128 const inv_x_d = _mm_set_ps1(1 - x_delta);
{ __m128 top_left = _mm_set_ps(src.buffer[src_index_1], src.buffer[src_index_1 + 1], src.buffer[src_index_1 + 2], 0.0);
interpolation_bottom[i] = src.buffer[src_index_1 + i] * (1 - x_delta) + src.buffer[src_index_2 + i] * x_delta; __m128 top_right = _mm_set_ps(src.buffer[src_index_2], src.buffer[src_index_2 + 1], src.buffer[src_index_2 + 2], 0.0);
interpolation_top[i] = src.buffer[src_index_3 + i] * (1 - x_delta) + src.buffer[src_index_4 + i] * x_delta; top_left = _mm_mul_ps(top_left, inv_x_d);
interpolated[i] = interpolation_bottom[i] * (1 - y_delta) + interpolation_top[i] * y_delta; top_right = _mm_mul_ps(top_right, x_d);
} top_left = _mm_add_ps(top_left, top_right);
memcpy(&rotated.buffer[rot_index], interpolated, 3 * sizeof (uint8_t));
__m128 bottom_left = _mm_set_ps(src.buffer[src_index_3], src.buffer[src_index_3 + 1], src.buffer[src_index_3 + 2], 0.0);
__m128 bottom_right = _mm_set_ps(src.buffer[src_index_4], src.buffer[src_index_4 + 1], src.buffer[src_index_4 + 2], 0.0);
bottom_left = _mm_mul_ps(bottom_left, inv_x_d);
bottom_right = _mm_mul_ps(bottom_right, x_d);
bottom_left = _mm_add_ps(bottom_left, bottom_right);
__m128 const y_d = _mm_set_ps1(y_delta);
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
top_left = _mm_mul_ps(top_left, inv_y_d);
bottom_left = _mm_mul_ps(bottom_left, y_d);
top_left = _mm_add_ps(top_left, bottom_left);
// convert float values to uint8_t
rotated.buffer[rot_index] = top_left[3];
rotated.buffer[rot_index + 1] = top_left[2];
rotated.buffer[rot_index + 2] = top_left[1];
} }
Image rotate(Image const& src, double angle) Image rotate(Image const& src, double angle)
@ -515,7 +514,6 @@ Image rotate(Image const& src, double angle)
DPoint src_delta_x = get_mapped_point(rotated, Point(1, 0), -rotation); DPoint src_delta_x = get_mapped_point(rotated, Point(1, 0), -rotation);
DPoint src_delta_y = get_mapped_point(rotated, Point(0, 1), -rotation); DPoint src_delta_y = get_mapped_point(rotated, Point(0, 1), -rotation);
src_delta_x.x = src_delta_x.x - src_origin.x; src_delta_x.x = src_delta_x.x - src_origin.x;
src_delta_x.y = src_delta_x.y - src_origin.y; src_delta_x.y = src_delta_x.y - src_origin.y;
round_if_very_small(src_delta_x.x); round_if_very_small(src_delta_x.x);
@ -524,6 +522,8 @@ Image rotate(Image const& src, double angle)
src_delta_y.y = src_delta_y.y - src_origin.y; src_delta_y.y = src_delta_y.y - src_origin.y;
round_if_very_small(src_delta_y.x); round_if_very_small(src_delta_y.x);
round_if_very_small(src_delta_y.y); round_if_very_small(src_delta_y.y);
// cout << "src delta x = " << src_delta_x << endl;
// cout << "src delta y = " << src_delta_y << endl;
// // steps for first column in source image (y) // // steps for first column in source image (y)
@ -744,7 +744,7 @@ int main(int argc, char* argv[])
Image img(argv[1]); Image img(argv[1]);
for (double rotation = 0; rotation < 360; rotation += 15) for (double rotation = 0; rotation < 360; rotation += 5)
{ {
// double rotation = 45; // double rotation = 45;
auto const before = chrono::high_resolution_clock::now(); auto const before = chrono::high_resolution_clock::now();