rotate-me-fast/rotation.cpp
Fabien Freling da4bdf7702 Decompose complex lines.
Create a bunch of const variables to better profile.
2014-07-27 22:41:09 +02:00

665 lines
19 KiB
C++

#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <cmath>
#include <cassert>
#include <cstring>
#include <chrono>
#include <cstdlib>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include "image.h"
using namespace std;
//
//
// Trigonometry
//
DPoint convert_grid_coord(Image const& img, Point const& p)
{
return DPoint(p.x - img.width / 2.0f + 0.5, p.y - img.height / 2.0f + 0.5);
}
double convert_radian(Image const& img, Point const& p, double const ratio)
{
DPoint centered = convert_grid_coord(img, p);
double const cos_value = centered.x * ratio;
double const sin_value = - (centered.y * ratio);
double angle = acos(cos_value);
if (sin_value < 0)
{
angle = (2 * M_PI) - angle;
}
return angle;
}
DPoint convert_abs_coord(double const angle, double const ratio)
{
return DPoint(cos(angle) / ratio, - sin(angle) / ratio);
}
Point convert_img_coord(Image const& img, DPoint const& p)
{
int x = round(p.x + (img.width / 2.0f) - 0.5);
int y = round(p.y + (img.height / 2.0f) - 0.5);
return Point(x, y);
}
DPoint convert_img_coord_precision(Image const& img, DPoint const& p)
{
double x = p.x + (img.width / 2.0f) - 0.5;
double y = p.y + (img.height / 2.0f) - 0.5;
return DPoint(x, y);
}
void convert_abs_to_polar_coord(DPoint const& p, double& angle, double& dist)
{
angle = atan2(-p.y, p.x);
dist = sqrt(p.x * p.x + p.y * p.y);
}
DPoint convert_polar_to_grid_coord(double const angle, double const distance)
{
return DPoint(cos(angle) * distance, - (sin(angle) * distance));
}
double compute_ratio(Image const& img)
{
double const trigo_length = (sqrt(img.width * img.width + img.height * img.height) - 1) / 2;
return 1.0f / trigo_length;
}
void compute_output_size(Image const& src, double const rotation, unsigned int& width, unsigned int& height)
{
double const ratio = compute_ratio(src);
double min_w = 0;
double max_w = 0;
double min_h = 0;
double max_h = 0;
Point p(0, 0);
double angle = convert_radian(src, p, ratio);
DPoint const tl = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, tl.x);
max_w = max(max_w, tl.x);
min_h = min(min_h, tl.y);
max_h = max(max_h, tl.y);
p = Point(src.width - 1, 0);
angle = convert_radian(src, p, ratio);
DPoint const tr = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, tr.x);
max_w = max(max_w, tr.x);
min_h = min(min_h, tr.y);
max_h = max(max_h, tr.y);
p = Point(0, src.height - 1);
angle = convert_radian(src, p, ratio);
DPoint const bl = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, bl.x);
max_w = max(max_w, bl.x);
min_h = min(min_h, bl.y);
max_h = max(max_h, bl.y);
p = Point(src.width - 1, src.height - 1);
angle = convert_radian(src, p, ratio);
DPoint const br = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, br.x);
max_w = max(max_w, br.x);
min_h = min(min_h, br.y);
max_h = max(max_h, br.y);
width = (int) (max_w - min_w) + 1;
height = (int) (max_h - min_h) + 1;
}
//
//
// Math approximation
//
void round_if_very_small(double& d)
{
if (abs(d) < 1.0e-10)
d = 0.0;
if (abs(d - 1) < 1.0e-10)
d = 1.0;
}
inline
bool fequal(float a, float b, float sigma)
{
return abs(a - b) < sigma;
}
//
//
// Image rotation
//
DPoint get_mapped_point(Image const& src, Point const& p, double const rotation)
{
DPoint const d = convert_grid_coord(src, p);
double p_angle = 0;
double dist = 0;
convert_abs_to_polar_coord(d, p_angle, dist);
return convert_polar_to_grid_coord(p_angle + rotation, dist);
}
inline
void rotate_pixel(Image const& src,
Point const& src_rotated_point,
unsigned int const src_limit,
pvalue_t* rotate_buffer, unsigned int rot_index,
unsigned int q_pow)
{
// Quantize on a 8x8 grid
int const q_inter_pow = 3;
int const q_inter = 1 << q_inter_pow;
int const mask = 0x07;
int const src_x = src_rotated_point.x >> q_pow;
int const src_y = src_rotated_point.y >> q_pow;
// Bilinear interpolation
unsigned int const src_index_1 = (src_y * src.width + src_x) * src.pixel_size;
unsigned int const src_index_2 = src_index_1 + src.pixel_size;
unsigned int const src_index_3 = src_index_1 + src.pixel_size * src.width;
unsigned int const src_index_4 = src_index_3 + src.pixel_size;
// Out-of-bounds check
if (src_index_4 >= src_limit)
{
return;
}
pvalue_t const src_tl = src.buffer[src_index_1];
pvalue_t const src_tr = src.buffer[src_index_2];
pvalue_t const src_bl = src.buffer[src_index_3];
pvalue_t const src_br = src.buffer[src_index_4];
unsigned int const x_delta = (src_rotated_point.x >> (q_pow - q_inter_pow)) & mask;
unsigned int const y_delta = (src_rotated_point.y >> (q_pow - q_inter_pow)) & mask;
unsigned int const inv_x = q_inter - x_delta;
unsigned int const inv_y = q_inter - y_delta;
#ifndef SIMD
pvalue_t interpolated = ((src_tl * inv_x + src_tr * x_delta) * inv_y
+ (src_bl * inv_x + src_br * x_delta) * y_delta) >> (q_inter_pow << 1);
rotate_buffer[rot_index] = interpolated;
// rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
// + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
// rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
// + (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta) >> 6;
#else
// X-axis
__m128i top = _mm_loadu_si128((__m128i*) &src.buffer[src_index_1]);
__m128i bottom = _mm_loadu_si128((__m128i*) &src.buffer[src_index_3]);
__m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x);
top = _mm_mullo_epi16(top, coef);
bottom = _mm_mullo_epi16(bottom, coef);
// Y-axis
coef = _mm_set1_epi16(inv_y);
top = _mm_mullo_epi16(top, coef);
coef = _mm_set1_epi16(y_delta);
bottom = _mm_mullo_epi16(bottom, coef);
top = _mm_add_epi16(top, bottom);
top = _mm_srli_epi16(top, 2 * q_pow);
rotate_buffer[rot_index] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4);
// rotate_buffer[rot_index + 1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5);
// rotate_buffer[rot_index + 2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6);
#endif // ! SIMD
}
Image* rotate(Image const& src, double angle)
{
double const rotation = (angle / 180.0f) * M_PI;
unsigned int w = 0;
unsigned int h = 0;
compute_output_size(src, rotation, w, h);
Image* rotated = new Image(w, h, src.type);
DPoint const src_origin = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint src_delta_x = get_mapped_point(*rotated, Point(src.width - 1, 0), -rotation);
DPoint src_delta_y = get_mapped_point(*rotated, Point(0, src.height - 1), -rotation);
src_delta_x -= src_origin;
src_delta_y -= src_origin;
// Quantized position on a grid
int const q_pos_pow = 10;
int const q_pos = 1 << q_pos_pow;
// TODO: we could have only one delta and deduce the other one
Point const qdx((src_delta_x.x * q_pos) / src.width, (src_delta_x.y * q_pos) / src.width);
Point const qdy((src_delta_y.x * q_pos) / src.height, (src_delta_y.y * q_pos) / src.height);
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
unsigned int buffer_index = 0;
pvalue_t* buffer = rotated->buffer;
int64_t const src_qwidth = (int64_t) src.width * q_pos;
int64_t const src_qheight = (int64_t) src.height * q_pos;
unsigned int const src_limit = src.width * src.height * src.pixel_size;
size_t const buffer_step = rotated->pixel_size;
int const width = rotated->width;
int const height = rotated->height;
for (int y = 0; y < height; ++y)
{
Point src_rotated_point((rot_origin_in_src.x * q_pos) + y * qdy.x,
(rot_origin_in_src.y * q_pos) + y * qdy.y);
for (int x = 0; x < width; ++x)
{
if (src_rotated_point.x >= 0 && src_rotated_point.x < src_qwidth
&& src_rotated_point.y >= 0 && src_rotated_point.y < src_qheight)
{
rotate_pixel(src, src_rotated_point,
src_limit,
buffer, buffer_index,
q_pos_pow);
}
src_rotated_point += qdx;
buffer_index += buffer_step;
}
}
return rotated;
}
//
//
// Tile rotation
//
template<unsigned int W, unsigned int H>
void rotate_pixel(TiledImage<W, H> const& src,
Point const& src_rotated_point,
pvalue_t* rot_tile)
{
unsigned int const quantize = 8;
int const src_x = src_rotated_point.x >> 3;
int const src_y = src_rotated_point.y >> 3;
pvalue_t const* src_index_1 = src.access_pixel(src_x, src_y);
pvalue_t const* src_index_3 = src_index_1 + (W + 1) * src.pixel_size;
unsigned int x_delta = src_rotated_point.x & 0x07;;
unsigned int y_delta = src_rotated_point.y & 0x07;
unsigned int const inv_x = quantize - x_delta;
unsigned int const inv_y = quantize - y_delta;
#ifndef SIMD
pvalue_t const* src_index_2 = src_index_1 + src.pixel_size;
pvalue_t const* src_index_4 = src_index_3 + src.pixel_size;
rot_tile[0] = ((src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
+ (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta) >> 6;
rot_tile[1] = ((src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
+ (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta) >> 6;
rot_tile[2] = ((src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
+ (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta) >> 6;
#else
// X-axis
__m128i top = _mm_loadu_si128((__m128i*) src_index_1);
__m128i bottom = _mm_loadu_si128((__m128i*) src_index_3);
__m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x);
top = _mm_mullo_epi16(top, coef);
bottom = _mm_mullo_epi16(bottom, coef);
// Y-axis
coef = _mm_set1_epi16(inv_y);
top = _mm_mullo_epi16(top, coef);
coef = _mm_set1_epi16(y_delta);
bottom = _mm_mullo_epi16(bottom, coef);
top = _mm_add_epi16(top, bottom);
top = _mm_srli_epi16(top, 6);
rot_tile[0] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4);
rot_tile[1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5);
rot_tile[2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6);
#endif // ! SIMD
}
template<unsigned int W, unsigned int H>
TiledImage<W, H>*
rotate(TiledImage<W, H> const& src, double angle)
{
double const rotation = (angle / 180.0f) * M_PI;
unsigned int w = 0;
unsigned int h = 0;
compute_output_size(src, rotation, w, h);
auto rotated = new TiledImage<W, H>(w, h);
DPoint src_origin = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint src_delta_x = get_mapped_point(*rotated, Point(1, 0), -rotation);
DPoint src_delta_y = get_mapped_point(*rotated, Point(0, 1), -rotation);
src_delta_x.x = src_delta_x.x - src_origin.x;
src_delta_x.y = src_delta_x.y - src_origin.y;
round_if_very_small(src_delta_x.x);
round_if_very_small(src_delta_x.y);
src_delta_y.x = src_delta_y.x - src_origin.x;
src_delta_y.y = src_delta_y.y - src_origin.y;
round_if_very_small(src_delta_y.x);
round_if_very_small(src_delta_y.y);
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
unsigned int const quantize = 8;
int const& src_qwidth = src.width * quantize;
int const& src_qheight = src.height * quantize;
for (unsigned int y = 0; y < rotated->nb_row_tile; ++y)
{
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
{
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
pvalue_t* runner = rotated->get_tile(rot_tile_index);
for (unsigned int j = 0; j < H; ++j)
{
int const y_index = y * H + j;
int x_index = x * W;
DPoint const src_rotated_point((rot_origin_in_src.x + x_index * src_delta_x.x + y_index * src_delta_y.x) * quantize,
(rot_origin_in_src.y + x_index * src_delta_x.y + y_index * src_delta_y.y) * quantize);
for (unsigned int i = 0; i < W; ++i)
{
Point const src_runner(src_rotated_point.x + i * src_delta_x.x * quantize,
src_rotated_point.y + i * src_delta_x.y * quantize);
if (src_runner.x >= 0 && src_runner.x < src_qwidth
&& src_runner.y >= 0 && src_runner.y < src_qheight)
{
rotate_pixel(src, src_runner, runner);
}
runner += rotated->pixel_size;
}
// Jump overlapping pixel
runner += rotated->pixel_size;
}
}
}
// rotated->fill_overlap();
return rotated;
}
//
//
// Check
//
bool check_points()
{
Image five(5, 5, pnm::Format::PGM);
Point origin(0, 0);
DPoint d1 = convert_grid_coord(five, origin);
assert(d1.x == -2);
assert(d1.y == -2);
return true;
}
bool check_trigo()
{
Image square(500, 500, pnm::Format::PGM);
double const ratio = compute_ratio(square);
double const sigma = 1.0e-2;
if (!fequal(ratio, 1 / 707.106, sigma))
{
cerr << __LINE__ << " | Invalid ratio: " << ratio << " != " << 1 / 707.106 << endl;
return false;
}
// Check that the origin of a square image is at sqrt(2) / 2
double const angle = convert_radian(square, Point(0, 0), ratio);
if (!fequal(angle, 3 * M_PI / 4, sigma))
{
cerr << __LINE__ << " | Invalid angle value: " << angle << " != " << 3 * M_PI / 4 << endl;
return false;
}
// Check that we can reverse the origin point.
DPoint const abs_reverse_point = convert_abs_coord(angle, ratio);
Point const reverse_point = convert_img_coord(square, abs_reverse_point);
if (!fequal(0.0, reverse_point.x, sigma)
|| !fequal(0.0, reverse_point.y, sigma))
{
cerr << __LINE__ << "Reverse origin fail" << endl;
cerr << " " << reverse_point << " != (0, 0)" << endl;
cerr << " abs point " << abs_reverse_point << endl;
return false;
}
// Check that when rotating the origin by 45 degrees
double const rotation = M_PI / 4; // 45 degrees
unsigned int w = 0;
unsigned int h = 0;
compute_output_size(square, rotation, w, h);
if (!fequal(w, square.width * sqrt(2), sigma * square.width)
|| !fequal(h, square.height * sqrt(2), sigma * square.height))
{
cerr << "Invalid rotated image dimensions " << w << " x " << h << endl;
cerr << " expected " << (int) ceil(square.width * sqrt(2)) << " x " << (int) ceil(square.height * sqrt(2)) << endl;
return false;
}
Image rotated(w, h, pnm::Format::PGM);
DPoint const a_p45 = convert_abs_coord(angle + rotation, ratio);
Point const p45 = convert_img_coord(rotated, a_p45);
if (!fequal(0, p45.x, sigma))
{
cerr << __LINE__ << " > Rotation origin by 45 degrees:" << endl;
cerr << " invalid x value: " << p45.x << " != " << 0 << endl;
cerr << " absolute point: " << a_p45 << endl;
cerr << " relative point: " << p45 << endl;
return false;
}
if (!fequal(p45.y, (h - 1) / 2.0f, sigma))
{
cerr << __LINE__ << " > Rotation origin by 45 degrees:" << endl;
cerr << "Invalid y value: " << p45.y << " != " << (h - 1) / 2.0f << endl;
cerr << " absolute point: " << a_p45 << endl;
cerr << " relative point: " << p45 << endl;
return false;
}
// Polar coordinates
{
DPoint const d(-42.5, 37.5);
double angle = 0;
double dist = 0;
convert_abs_to_polar_coord(d, angle, dist);
DPoint const reversed = convert_polar_to_grid_coord(angle, dist);
if (!fequal(d.x, reversed.x, sigma)
|| !fequal(d.y, reversed.y, sigma))
{
cerr << __LINE__ << " > Reverse polar coordinates:" << endl;
cerr << reversed << " != " << d << endl;
cerr << "polar (" << angle << ", " << dist << ")" << endl;
return false;
}
}
return true;
}
bool check_90(string const& path)
{
Image const src(path);
Image const* rotated = rotate(src, 90);
for (unsigned int y = 0; y < rotated->height; ++y)
{
for (unsigned int x = 0; x < rotated->width; ++x)
{
unsigned rot_index = (y * rotated->width + x) * rotated->pixel_size;
unsigned src_index = (x * src.width + (src.width - 1 - y)) * src.pixel_size;
if (memcmp(&rotated->buffer[rot_index], &src.buffer[src_index], src.pixel_size * sizeof (pvalue_t)) != 0)
{
Point r(x, y);
Point s((src.width - 1 - y), x);
cerr << __LINE__ << " | R: " << r << " != S:" << s << endl;
cerr << "R dim: " << rotated->width << " x " << rotated->height << endl;
cerr << "S dim: " << src.width << " x " << src.height << endl;
return false;
}
}
}
delete rotated;
return true;
}
//
//
// Main
//
string get_save_path(string const& base, unsigned int i)
{
stringstream filename;
//filename << "/tmp/";
filename << base << "_";
if (i < 100)
filename << "0";
if (i < 10)
filename << "0";
filename << i << ".pnm";
return filename.str();
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
cout << "Usage: " << argv[0] << " image.ppm" << endl;
return 1;
}
bool perform_check = false;
if (perform_check)
{
if (!check_points())
return 1;
if (!check_trigo())
return 1;
if (!check_90(argv[1]))
{
cerr << __LINE__ << " | 90 degrees check failed" << endl << endl;
// return 1;
}
}
double const step = 15;
bool save_output_img = false;
bool print_each_run = false;
bool test_tile = false;
// No tile
Image img(argv[1]);
float average = 0.0;
int i = 0;
for (double rotation = 0; rotation < 360; rotation += step)
{
auto const before = chrono::high_resolution_clock::now();
Image* const rotated = rotate(img, rotation);
auto const after = chrono::high_resolution_clock::now();
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
average += duration_ms.count();
if (print_each_run)
cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (save_output_img)
rotated->save(get_save_path("rotated", rotation));
delete rotated;
++i;
}
cout << "---------" << endl;
cout << " average: " << average / i << "ms" << endl << endl;
// Tile
if (test_tile)
{
TiledImage<32, 32> tiled_img(argv[1]);
average = 0.0;
i = 0;
cout << "Tiled image" << endl;
for (double rotation = 0; rotation < 360; rotation += step)
{
auto const before = chrono::high_resolution_clock::now();
auto const rotated = rotate(tiled_img, rotation);
auto const after = chrono::high_resolution_clock::now();
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
average += duration_ms.count();
if (print_each_run)
cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (save_output_img)
rotated->save(get_save_path("rotated_tiled", rotation));
delete rotated;
++i;
}
cout << "---------" << endl;
cout << " average: " << average / i << "ms" << endl;
}
return 0;
}