Interpolate using SSE instructions.
This commit is contained in:
parent
05777d59f8
commit
3af56f6619
2
Makefile
2
Makefile
|
@ -1,5 +1,5 @@
|
||||||
CXX = clang++
|
CXX = clang++
|
||||||
CXXFLAGS = -std=c++11 -W -Wall -O3 #-Werror
|
CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math #-Werror
|
||||||
BUILD_DIR=/tmp
|
BUILD_DIR=/tmp
|
||||||
|
|
||||||
all: rotation.cpp
|
all: rotation.cpp
|
||||||
|
|
72
rotation.cpp
72
rotation.cpp
|
@ -7,6 +7,9 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
@ -279,7 +282,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
|
||||||
double min_h = 0;
|
double min_h = 0;
|
||||||
double max_h = 0;
|
double max_h = 0;
|
||||||
|
|
||||||
//cout << "Image dimensions: " << src.width << " x " << src.height << endl;
|
|
||||||
Point p(0, 0);
|
Point p(0, 0);
|
||||||
double angle = convert_radian(src, p, ratio);
|
double angle = convert_radian(src, p, ratio);
|
||||||
DPoint tl = convert_abs_coord(angle + rotation, ratio);
|
DPoint tl = convert_abs_coord(angle + rotation, ratio);
|
||||||
|
@ -287,11 +289,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
|
||||||
max_w = max(max_w, tl.x);
|
max_w = max(max_w, tl.x);
|
||||||
min_h = min(min_h, tl.y);
|
min_h = min(min_h, tl.y);
|
||||||
max_h = max(max_h, tl.y);
|
max_h = max(max_h, tl.y);
|
||||||
// debug print
|
|
||||||
// if (rotation == 0.0)
|
|
||||||
// {
|
|
||||||
// cout << "Rotated " << p << " = " << tl << endl << endl;
|
|
||||||
// }
|
|
||||||
|
|
||||||
p = Point(src.width - 1, 0);
|
p = Point(src.width - 1, 0);
|
||||||
angle = convert_radian(src, p, ratio);
|
angle = convert_radian(src, p, ratio);
|
||||||
|
@ -300,10 +297,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
|
||||||
max_w = max(max_w, tr.x);
|
max_w = max(max_w, tr.x);
|
||||||
min_h = min(min_h, tr.y);
|
min_h = min(min_h, tr.y);
|
||||||
max_h = max(max_h, tr.y);
|
max_h = max(max_h, tr.y);
|
||||||
// if (rotation == 0.0)
|
|
||||||
// {
|
|
||||||
// cout << "Rotated " << p << " = " << tr << endl << endl;
|
|
||||||
// }
|
|
||||||
|
|
||||||
p = Point(0, src.height - 1);
|
p = Point(0, src.height - 1);
|
||||||
angle = convert_radian(src, p, ratio);
|
angle = convert_radian(src, p, ratio);
|
||||||
|
@ -312,10 +305,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
|
||||||
max_w = max(max_w, bl.x);
|
max_w = max(max_w, bl.x);
|
||||||
min_h = min(min_h, bl.y);
|
min_h = min(min_h, bl.y);
|
||||||
max_h = max(max_h, bl.y);
|
max_h = max(max_h, bl.y);
|
||||||
// if (rotation == 0.0)
|
|
||||||
// {
|
|
||||||
// cout << "Rotated " << p << " = " << bl << endl << endl;
|
|
||||||
// }
|
|
||||||
|
|
||||||
p = Point(src.width - 1, src.height - 1);
|
p = Point(src.width - 1, src.height - 1);
|
||||||
angle = convert_radian(src, p, ratio);
|
angle = convert_radian(src, p, ratio);
|
||||||
|
@ -324,10 +313,6 @@ void compute_output_size(Image const& src, double const rotation, unsigned int&
|
||||||
max_w = max(max_w, br.x);
|
max_w = max(max_w, br.x);
|
||||||
min_h = min(min_h, br.y);
|
min_h = min(min_h, br.y);
|
||||||
max_h = max(max_h, br.y);
|
max_h = max(max_h, br.y);
|
||||||
// if (rotation == 0.0)
|
|
||||||
// {
|
|
||||||
// cout << "Rotated " << p << " = " << br << endl << endl;
|
|
||||||
// }
|
|
||||||
|
|
||||||
width = (int) (max_w - min_w) + 1;
|
width = (int) (max_w - min_w) + 1;
|
||||||
height = (int) (max_h - min_h) + 1;
|
height = (int) (max_h - min_h) + 1;
|
||||||
|
@ -450,28 +435,26 @@ DPoint get_mapped_point(Image const& src, Point const& p, double const rotation)
|
||||||
return convert_polar_to_grid_coord(p_angle + rotation, dist);
|
return convert_polar_to_grid_coord(p_angle + rotation, dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
void rotate_pixel(Image const& src, Image& rotated,
|
void rotate_pixel(Image const& src, Image& rotated,
|
||||||
DPoint const& src_rotated_point, Point const& rot_point,
|
DPoint const& src_rotated_point, Point const& rot_point,
|
||||||
unsigned int const src_limit, unsigned int const rot_limit)
|
unsigned int const src_limit, unsigned int const rot_limit)
|
||||||
{
|
{
|
||||||
// Out-of-bounds check
|
|
||||||
unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3;
|
unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3;
|
||||||
unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3;
|
unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3;
|
||||||
|
|
||||||
|
// Out-of-bounds check
|
||||||
if (src_index >= src_limit
|
if (src_index >= src_limit
|
||||||
|| rot_index >= rot_limit)
|
|| rot_index >= rot_limit)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
static double interpolation_bottom[3];
|
|
||||||
static double interpolation_top[3];
|
|
||||||
static uint8_t interpolated[3];
|
|
||||||
|
|
||||||
// Bilinear interpolation
|
// Bilinear interpolation
|
||||||
unsigned int src_index_1 = (floor(src_rotated_point.y) * src.width + floor(src_rotated_point.x)) * 3;
|
unsigned int src_index_1 = src_index;
|
||||||
unsigned int src_index_2 = src_index_1 + 3;
|
unsigned int src_index_2 = src_index_1 + 3;
|
||||||
unsigned int src_index_3 = src_index_1 + 3 * src.width;
|
unsigned int src_index_3 = src_index_1 + 3 * src.width;
|
||||||
unsigned int src_index_4 = src_index_3 + 3;
|
unsigned int src_index_4 = src_index_3 + 3;
|
||||||
|
|
||||||
if (src_index_1 >= src_limit || src_index_1 >= src_limit)
|
if (src_index_4 >= src_limit)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
double x_delta = src_rotated_point.x - floor(src_rotated_point.x);
|
double x_delta = src_rotated_point.x - floor(src_rotated_point.x);
|
||||||
|
@ -479,15 +462,31 @@ void rotate_pixel(Image const& src, Image& rotated,
|
||||||
double y_delta = src_rotated_point.y - floor(src_rotated_point.y);
|
double y_delta = src_rotated_point.y - floor(src_rotated_point.y);
|
||||||
round_if_very_small(y_delta);
|
round_if_very_small(y_delta);
|
||||||
|
|
||||||
// Interpolation
|
// SIMD
|
||||||
// SIMD?
|
__m128 const x_d = _mm_set_ps1(x_delta);
|
||||||
for (unsigned int i = 0; i < 3; ++i)
|
__m128 const inv_x_d = _mm_set_ps1(1 - x_delta);
|
||||||
{
|
__m128 top_left = _mm_set_ps(src.buffer[src_index_1], src.buffer[src_index_1 + 1], src.buffer[src_index_1 + 2], 0.0);
|
||||||
interpolation_bottom[i] = src.buffer[src_index_1 + i] * (1 - x_delta) + src.buffer[src_index_2 + i] * x_delta;
|
__m128 top_right = _mm_set_ps(src.buffer[src_index_2], src.buffer[src_index_2 + 1], src.buffer[src_index_2 + 2], 0.0);
|
||||||
interpolation_top[i] = src.buffer[src_index_3 + i] * (1 - x_delta) + src.buffer[src_index_4 + i] * x_delta;
|
top_left = _mm_mul_ps(top_left, inv_x_d);
|
||||||
interpolated[i] = interpolation_bottom[i] * (1 - y_delta) + interpolation_top[i] * y_delta;
|
top_right = _mm_mul_ps(top_right, x_d);
|
||||||
}
|
top_left = _mm_add_ps(top_left, top_right);
|
||||||
memcpy(&rotated.buffer[rot_index], interpolated, 3 * sizeof (uint8_t));
|
|
||||||
|
__m128 bottom_left = _mm_set_ps(src.buffer[src_index_3], src.buffer[src_index_3 + 1], src.buffer[src_index_3 + 2], 0.0);
|
||||||
|
__m128 bottom_right = _mm_set_ps(src.buffer[src_index_4], src.buffer[src_index_4 + 1], src.buffer[src_index_4 + 2], 0.0);
|
||||||
|
bottom_left = _mm_mul_ps(bottom_left, inv_x_d);
|
||||||
|
bottom_right = _mm_mul_ps(bottom_right, x_d);
|
||||||
|
bottom_left = _mm_add_ps(bottom_left, bottom_right);
|
||||||
|
|
||||||
|
__m128 const y_d = _mm_set_ps1(y_delta);
|
||||||
|
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
|
||||||
|
top_left = _mm_mul_ps(top_left, inv_y_d);
|
||||||
|
bottom_left = _mm_mul_ps(bottom_left, y_d);
|
||||||
|
top_left = _mm_add_ps(top_left, bottom_left);
|
||||||
|
|
||||||
|
// convert float values to uint8_t
|
||||||
|
rotated.buffer[rot_index] = top_left[3];
|
||||||
|
rotated.buffer[rot_index + 1] = top_left[2];
|
||||||
|
rotated.buffer[rot_index + 2] = top_left[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
Image rotate(Image const& src, double angle)
|
Image rotate(Image const& src, double angle)
|
||||||
|
@ -515,7 +514,6 @@ Image rotate(Image const& src, double angle)
|
||||||
DPoint src_delta_x = get_mapped_point(rotated, Point(1, 0), -rotation);
|
DPoint src_delta_x = get_mapped_point(rotated, Point(1, 0), -rotation);
|
||||||
DPoint src_delta_y = get_mapped_point(rotated, Point(0, 1), -rotation);
|
DPoint src_delta_y = get_mapped_point(rotated, Point(0, 1), -rotation);
|
||||||
|
|
||||||
|
|
||||||
src_delta_x.x = src_delta_x.x - src_origin.x;
|
src_delta_x.x = src_delta_x.x - src_origin.x;
|
||||||
src_delta_x.y = src_delta_x.y - src_origin.y;
|
src_delta_x.y = src_delta_x.y - src_origin.y;
|
||||||
round_if_very_small(src_delta_x.x);
|
round_if_very_small(src_delta_x.x);
|
||||||
|
@ -524,6 +522,8 @@ Image rotate(Image const& src, double angle)
|
||||||
src_delta_y.y = src_delta_y.y - src_origin.y;
|
src_delta_y.y = src_delta_y.y - src_origin.y;
|
||||||
round_if_very_small(src_delta_y.x);
|
round_if_very_small(src_delta_y.x);
|
||||||
round_if_very_small(src_delta_y.y);
|
round_if_very_small(src_delta_y.y);
|
||||||
|
// cout << "src delta x = " << src_delta_x << endl;
|
||||||
|
// cout << "src delta y = " << src_delta_y << endl;
|
||||||
|
|
||||||
|
|
||||||
// // steps for first column in source image (y)
|
// // steps for first column in source image (y)
|
||||||
|
@ -744,7 +744,7 @@ int main(int argc, char* argv[])
|
||||||
|
|
||||||
Image img(argv[1]);
|
Image img(argv[1]);
|
||||||
|
|
||||||
for (double rotation = 0; rotation < 360; rotation += 15)
|
for (double rotation = 0; rotation < 360; rotation += 5)
|
||||||
{
|
{
|
||||||
// double rotation = 45;
|
// double rotation = 45;
|
||||||
auto const before = chrono::high_resolution_clock::now();
|
auto const before = chrono::high_resolution_clock::now();
|
||||||
|
|
Loading…
Reference in a new issue