rotate-me-fast/rotation.cpp

761 lines
21 KiB
C++

#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <cmath>
#include <cassert>
#include <cstring>
#include <chrono>
#include <cstdlib>
#include <memory>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include "image.h"
using namespace std;
#define LOG cout << __FUNCTION__ << ": " << __LINE__ << " | "
#define ERRLOG cerr << __FUNCTION__ << ": " << __LINE__ << " | "
//
//
// Trigonometry
//
DPoint convert_grid_coord(Image const& img, Point const& p)
{
return DPoint(p.x - img.width / 2.0f + 0.5, p.y - img.height / 2.0f + 0.5);
}
double convert_radian(Image const& img, Point const& p, double const ratio)
{
DPoint centered = convert_grid_coord(img, p);
double const cos_value = centered.x * ratio;
double const sin_value = - (centered.y * ratio);
double angle = acos(cos_value);
if (sin_value < 0)
{
angle = (2 * M_PI) - angle;
}
return angle;
}
DPoint convert_abs_coord(double const angle, double const ratio)
{
return DPoint(cos(angle) / ratio, - sin(angle) / ratio);
}
Point convert_img_coord(Image const& img, DPoint const& p)
{
int x = round(p.x + (img.width / 2.0f) - 0.5);
int y = round(p.y + (img.height / 2.0f) - 0.5);
return Point(x, y);
}
DPoint convert_img_coord_precision(Image const& img, DPoint const& p)
{
double x = p.x + (img.width / 2.0f) - 0.5;
double y = p.y + (img.height / 2.0f) - 0.5;
return DPoint(x, y);
}
void convert_abs_to_polar_coord(DPoint const& p, double& angle, double& dist)
{
angle = atan2(-p.y, p.x);
dist = sqrt(p.x * p.x + p.y * p.y);
}
DPoint convert_polar_to_grid_coord(double const angle, double const distance)
{
return DPoint(cos(angle) * distance, - (sin(angle) * distance));
}
double compute_ratio(Image const& img)
{
double const trigo_length = (sqrt(img.width * img.width + img.height * img.height) - 1) / 2;
return 1.0f / trigo_length;
}
void compute_output_size(Image const& src, double const rotation, unsigned int& width, unsigned int& height)
{
double const ratio = compute_ratio(src);
double min_w = 0;
double max_w = 0;
double min_h = 0;
double max_h = 0;
Point p(0, 0);
double angle = convert_radian(src, p, ratio);
DPoint const tl = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, tl.x);
max_w = max(max_w, tl.x);
min_h = min(min_h, tl.y);
max_h = max(max_h, tl.y);
p = Point(src.width - 1, 0);
angle = convert_radian(src, p, ratio);
DPoint const tr = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, tr.x);
max_w = max(max_w, tr.x);
min_h = min(min_h, tr.y);
max_h = max(max_h, tr.y);
p = Point(0, src.height - 1);
angle = convert_radian(src, p, ratio);
DPoint const bl = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, bl.x);
max_w = max(max_w, bl.x);
min_h = min(min_h, bl.y);
max_h = max(max_h, bl.y);
p = Point(src.width - 1, src.height - 1);
angle = convert_radian(src, p, ratio);
DPoint const br = convert_abs_coord(angle + rotation, ratio);
min_w = min(min_w, br.x);
max_w = max(max_w, br.x);
min_h = min(min_h, br.y);
max_h = max(max_h, br.y);
width = (int) (max_w - min_w) + 1;
height = (int) (max_h - min_h) + 1;
}
DPoint get_mapped_point(Image const& src, Point const& p, double const rotation)
{
DPoint const d = convert_grid_coord(src, p);
double p_angle = 0;
double dist = 0;
convert_abs_to_polar_coord(d, p_angle, dist);
return convert_polar_to_grid_coord(p_angle + rotation, dist);
}
//
//
// Math approximation
//
void round_if_very_small(double& d)
{
double const sigma = 1.0e-10;
if (abs(d) < sigma)
d = 0.0;
if (abs(d - 1) < sigma)
d = 1.0;
}
inline
bool fequal(float a, float b, float sigma)
{
return abs(a - b) < sigma;
}
//
//
// Padding
//
int get_iteration(int distance, int upper_bound, int step)
{
if (distance < 0)
{
return ceil((float) -distance / (float) step);
}
else if (distance >= upper_bound)
{
return ceil((float) (distance - upper_bound + 1) / (float) (-step));
}
return 0;
}
uint16_t* generate_padding_table(Image const& rotated,
Point src_rotated_origin,
Point const& qdx, Point const& qdy,
int src_qwidth, int src_qheight,
int q_pos)
{
uint16_t* padding_table = new uint16_t[2 * rotated.height];
Point right_edge = src_rotated_origin;
right_edge.x += (rotated.width - 1) * qdx.x;
right_edge.y += (rotated.width - 1) * qdx.y;
for (unsigned int i = 0; i < rotated.height; ++i)
{
int x_range = get_iteration(src_rotated_origin.x, src_qwidth - q_pos, qdx.x);
int y_range = get_iteration(src_rotated_origin.y, src_qheight - q_pos, qdx.y);
padding_table[2 * i] = max(max(x_range, y_range), 0);
Point border(src_rotated_origin.x + padding_table[2 * i] * qdx.x,
src_rotated_origin.y + padding_table[2 * i] * qdx.y);
if (border.x < 0 || border.y < 0
|| border.x >= src_qwidth || border.y >= src_qheight)
{
padding_table[2 * i] = rotated.width;
}
// Right padding
x_range = get_iteration(right_edge.x, src_qwidth - q_pos, -qdx.x);
y_range = get_iteration(right_edge.y, src_qheight - q_pos, -qdx.y);
padding_table[2 * i + 1] = max(max(x_range, y_range), 0);
padding_table[2 * i + 1] = min((int) padding_table[2 * i + 1], (int) rotated.width - padding_table[2 * i]);
src_rotated_origin += qdy;
right_edge += qdy;
}
// Right padding
padding_table[1] = rotated.width - padding_table[0];
padding_table[2 * (rotated.height - 1) + 1] = rotated.width - padding_table[2 * (rotated.height - 1)];
return padding_table;
}
//
//
// Border
//
uint16_t* generate_border_table(uint16_t const* front_padding,
uint16_t const* back_padding,
Image const& image)
{
uint16_t* border_table = new uint16_t[image.height];
border_table[0] = image.width - front_padding[0] - back_padding[0];
for (unsigned int i = 1; i < image.height - 1; ++i)
{
if (front_padding[i] == front_padding[i - 1])
{
border_table[i] = 1;
}
else
{
if (front_padding[i - 1] > front_padding[i])
{
border_table[i] = front_padding[i - 1] - front_padding[i] + 1;
}
else
{
border_table[i - 1] = front_padding[i] - front_padding[i - 1] + 1;
border_table[i] = 1;
}
}
}
// Check that we don't add too much border
for (unsigned int i = 1; i < image.height - 1; ++i)
{
while (front_padding[i] + border_table[i] + back_padding[i] > (int) image.width)
{
border_table[i] -= 1;
}
}
border_table[image.height - 1] = image.width - front_padding[image.height - 1] - back_padding[image.height - 1];
return border_table;
}
uint16_t* generate_border_table_back(uint16_t const* front_padding,
uint16_t const* front_border,
uint16_t const* back_padding,
Image const& image)
{
uint16_t* back_border = new uint16_t[image.height];
back_border[0] = 0;
for (unsigned int i = 1; i < image.height - 1; ++i)
{
if (back_padding[i] == back_padding[i - 1])
{
back_border[i] = 1;
}
else
{
if (back_padding[i - 1] > back_padding[i])
{
back_border[i] = back_padding[i - 1] - back_padding[i] + 1;
}
else
{
back_border[i - 1] = back_padding[i] - back_padding[i - 1] + 1;
back_border[i] = 1;
}
}
}
back_border[0] = 0;
// Check that we don't add too much border
for (unsigned int i = 1; i < image.height - 1; ++i)
{
while (front_padding[i] + front_border[i] + back_border[i] + back_padding[i] > (int) image.width)
{
back_border[i] -= 1;
}
}
back_border[image.height - 1] = 0;
return back_border;
}
//
//
// Image rotation
//
inline
void rotate_pixel(Image const& src,
Point const& src_rotated_point,
pvalue_t* rotate_buffer, unsigned int rot_index,
int q_pow)
{
// Quantize on a 8x8 grid
int const q_inter_pow = 3;
int const q_inter = 1 << q_inter_pow;
int const mask = 0x07;
int const src_x = src_rotated_point.x >> q_pow;
int const src_y = src_rotated_point.y >> q_pow;
// Bilinear interpolation
unsigned int const src_index_1 = (src_y * src.width + src_x) * src.pixel_size;
unsigned int const src_index_2 = src_index_1 + src.pixel_size;
unsigned int const src_index_3 = src_index_1 + src.pixel_size * src.width;
unsigned int const src_index_4 = src_index_3 + src.pixel_size;
pvalue_t const src_tl = src.buffer[src_index_1];
pvalue_t const src_tr = src.buffer[src_index_2];
pvalue_t const src_bl = src.buffer[src_index_3];
pvalue_t const src_br = src.buffer[src_index_4];
unsigned int const x_delta = (src_rotated_point.x >> (q_pow - q_inter_pow)) & mask;
unsigned int const y_delta = (src_rotated_point.y >> (q_pow - q_inter_pow)) & mask;
unsigned int const inv_x = q_inter - x_delta;
unsigned int const inv_y = q_inter - y_delta;
#ifndef SIMD
pvalue_t interpolated = ((src_tl * inv_x + src_tr * x_delta) * inv_y
+ (src_bl * inv_x + src_br * x_delta) * y_delta) >> (q_inter_pow << 1);
rotate_buffer[rot_index] = interpolated;
// rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
// + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
// rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
// + (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta) >> 6;
#else
// X-axis
__m128i top = _mm_loadu_si128((__m128i*) &src.buffer[src_index_1]);
__m128i bottom = _mm_loadu_si128((__m128i*) &src.buffer[src_index_3]);
__m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x);
top = _mm_mullo_epi16(top, coef);
bottom = _mm_mullo_epi16(bottom, coef);
// Y-axis
coef = _mm_set1_epi16(inv_y);
top = _mm_mullo_epi16(top, coef);
coef = _mm_set1_epi16(y_delta);
bottom = _mm_mullo_epi16(bottom, coef);
top = _mm_add_epi16(top, bottom);
top = _mm_srli_epi16(top, 2 * q_pow);
rotate_buffer[rot_index] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4);
// rotate_buffer[rot_index + 1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5);
// rotate_buffer[rot_index + 2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6);
#endif // ! SIMD
}
Image* rotate(Image const& src, double angle)
{
double const rotation = (angle / 180.0f) * M_PI;
unsigned int w = 0;
unsigned int h = 0;
compute_output_size(src, rotation, w, h);
Image* rotated = new Image(w, h, src.type);
DPoint const src_origin = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint src_delta_x = get_mapped_point(*rotated, Point(src.width, 0), -rotation);
DPoint src_delta_y = get_mapped_point(*rotated, Point(0, src.height), -rotation);
src_delta_x -= src_origin;
round_if_very_small(src_delta_x.x);
round_if_very_small(src_delta_x.y);
src_delta_y -= src_origin;
round_if_very_small(src_delta_y.x);
round_if_very_small(src_delta_y.y);
// Quantized position on a grid
int const q_pos_pow = 10;
int const q_pos = 1 << q_pos_pow;
// TODO: we could have only one delta and deduce the other one
Point const qdx(ceil(src_delta_x.x * q_pos / src.width), ceil(src_delta_x.y * q_pos / src.width));
Point const qdy(ceil(src_delta_y.x * q_pos / src.height), ceil(src_delta_y.y * q_pos / src.height));
if (false)
{
LOG << "src delta x: " << src_delta_x << endl;
LOG << "src delta y: " << src_delta_y << endl;
LOG << "qdx: " << qdx << endl;
LOG << "qdy: " << qdy << endl;
LOG << "q pos: " << q_pos << endl;
}
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
unsigned int buffer_index = 0;
pvalue_t* buffer = rotated->buffer;
int const width = rotated->width;
int const height = rotated->height;
int const& src_qwidth = src.width * q_pos;
int const& src_qheight = src.height * q_pos;
Point src_rotated_origin(rot_origin_in_src.x * q_pos,
rot_origin_in_src.y * q_pos);
// Padding
unique_ptr<uint16_t[]> padding_table(generate_padding_table(*rotated, src_rotated_origin,
qdx, qdy,
src_qwidth, src_qheight,
q_pos));
int previous_right_padding = 0;
for (int y = 0; y < height; ++y)
{
int const left_padding = padding_table[2 * y];
// int const left_border = 0;
// int const right_border = 0;
int const right_padding = padding_table[2 * y + 1];
int const core_pixels = width - left_padding - right_padding;
if (core_pixels < 0)
{
ERRLOG << "Too much border at line " << y << endl;
ERRLOG << " width = " << width << endl;
ERRLOG << " left padding = " << left_padding << endl;
ERRLOG << " right padding = " << right_padding << endl;
ERRLOG << " core pixels = " << core_pixels << endl;
abort();
}
if (false)
{
LOG << "left padding = " << left_padding << endl;
LOG << "right padding = " << right_padding << endl;
LOG << "core pixels = " << core_pixels << endl;
}
// Padding
int const padding = left_padding + previous_right_padding;
memset(buffer + buffer_index, 0, padding * sizeof (pvalue_t));
buffer_index += padding;
previous_right_padding = right_padding;
// // Border
// for (int x = 0; x < left_border; ++x, ++buffer_index)
// {
// buffer[buffer_index] = 0; // TODO: handle border
// }
Point src_rotated_point(src_rotated_origin.x + left_padding * qdx.x,
src_rotated_origin.y + left_padding * qdx.y);
// Body
for (int x = 0; x < core_pixels; ++x, ++buffer_index)
{
rotate_pixel(src, src_rotated_point, buffer, buffer_index, q_pos_pow);
src_rotated_point += qdx;
}
// // Border
// for (int x = 0; x < right_border; ++x, ++buffer_index)
// {
// buffer[buffer_index] = 0; // TODO: handle border
// src_rotated_index += pdx;
// }
src_rotated_origin += qdy;
}
// Final right padding
memset(buffer + buffer_index, 0, padding_table[2 * (height - 1) + 1] * sizeof (pvalue_t));
return rotated;
}
//
//
// Check
//
bool check_points()
{
Image five(5, 5, pnm::Format::PGM);
Point origin(0, 0);
DPoint d1 = convert_grid_coord(five, origin);
assert(d1.x == -2);
assert(d1.y == -2);
return true;
}
bool check_trigo()
{
Image square(500, 500, pnm::Format::PGM);
double const ratio = compute_ratio(square);
double const sigma = 1.0e-2;
if (!fequal(ratio, 1 / 707.106, sigma))
{
cerr << __LINE__ << " | Invalid ratio: " << ratio << " != " << 1 / 707.106 << endl;
return false;
}
// Check that the origin of a square image is at sqrt(2) / 2
double const angle = convert_radian(square, Point(0, 0), ratio);
if (!fequal(angle, 3 * M_PI / 4, sigma))
{
cerr << __LINE__ << " | Invalid angle value: " << angle << " != " << 3 * M_PI / 4 << endl;
return false;
}
// Check that we can reverse the origin point.
DPoint const abs_reverse_point = convert_abs_coord(angle, ratio);
Point const reverse_point = convert_img_coord(square, abs_reverse_point);
if (!fequal(0.0, reverse_point.x, sigma)
|| !fequal(0.0, reverse_point.y, sigma))
{
cerr << __LINE__ << "Reverse origin fail" << endl;
cerr << " " << reverse_point << " != (0, 0)" << endl;
cerr << " abs point " << abs_reverse_point << endl;
return false;
}
// Check that when rotating the origin by 45 degrees
double const rotation = M_PI / 4; // 45 degrees
unsigned int w = 0;
unsigned int h = 0;
compute_output_size(square, rotation, w, h);
if (!fequal(w, square.width * sqrt(2), sigma * square.width)
|| !fequal(h, square.height * sqrt(2), sigma * square.height))
{
cerr << "Invalid rotated image dimensions " << w << " x " << h << endl;
cerr << " expected " << (int) ceil(square.width * sqrt(2)) << " x " << (int) ceil(square.height * sqrt(2)) << endl;
return false;
}
Image rotated(w, h, pnm::Format::PGM);
DPoint const a_p45 = convert_abs_coord(angle + rotation, ratio);
Point const p45 = convert_img_coord(rotated, a_p45);
if (!fequal(0, p45.x, sigma))
{
cerr << __LINE__ << " > Rotation origin by 45 degrees:" << endl;
cerr << " invalid x value: " << p45.x << " != " << 0 << endl;
cerr << " absolute point: " << a_p45 << endl;
cerr << " relative point: " << p45 << endl;
return false;
}
if (!fequal(p45.y, (h - 1) / 2.0f, sigma))
{
cerr << __LINE__ << " > Rotation origin by 45 degrees:" << endl;
cerr << "Invalid y value: " << p45.y << " != " << (h - 1) / 2.0f << endl;
cerr << " absolute point: " << a_p45 << endl;
cerr << " relative point: " << p45 << endl;
return false;
}
// Polar coordinates
{
DPoint const d(-42.5, 37.5);
double angle = 0;
double dist = 0;
convert_abs_to_polar_coord(d, angle, dist);
DPoint const reversed = convert_polar_to_grid_coord(angle, dist);
if (!fequal(d.x, reversed.x, sigma)
|| !fequal(d.y, reversed.y, sigma))
{
cerr << __LINE__ << " > Reverse polar coordinates:" << endl;
cerr << reversed << " != " << d << endl;
cerr << "polar (" << angle << ", " << dist << ")" << endl;
return false;
}
}
return true;
}
bool check_00(string const& path)
{
Image const src(path);
Image const* rotated = rotate(src, 0);
for (unsigned int y = 0; y < rotated->height; ++y)
{
for (unsigned int x = 0; x < rotated->width; ++x)
{
unsigned rot_index = (y * rotated->width + x) * rotated->pixel_size;
unsigned src_index = (y * src.width + x) * src.pixel_size;
if (memcmp(&rotated->buffer[rot_index], &src.buffer[src_index], src.pixel_size * sizeof (pvalue_t)) != 0)
{
Point r(x, y);
Point s(x, y);
LOG << "R" << r << " != S" << s << endl;
LOG << "R: " << rot_index << " != S: " << src_index << endl;
LOG << rotated->buffer[rot_index] << " != " << src.buffer[src_index] << endl;
LOG << "R dim: " << rotated->width << " x " << rotated->height << endl;
LOG << "S dim: " << src.width << " x " << src.height << endl;
return false;
}
}
}
delete rotated;
return true;
}
bool check_90(string const& path)
{
Image const src(path);
Image const* rotated = rotate(src, 90);
for (unsigned int y = 0; y < rotated->height; ++y)
{
for (unsigned int x = 0; x < rotated->width; ++x)
{
unsigned rot_index = (y * rotated->width + x) * rotated->pixel_size;
unsigned src_index = (x * src.width + (src.width - 1 - y)) * src.pixel_size;
if (memcmp(&rotated->buffer[rot_index], &src.buffer[src_index], src.pixel_size * sizeof (pvalue_t)) != 0)
{
Point r(x, y);
Point s((src.width - 1 - y), x);
cerr << __LINE__ << " | R: " << r << " != S:" << s << endl;
cerr << "R dim: " << rotated->width << " x " << rotated->height << endl;
cerr << "S dim: " << src.width << " x " << src.height << endl;
return false;
}
}
}
delete rotated;
return true;
}
//
//
// Main
//
string get_save_path(string const& base, unsigned int i)
{
stringstream filename;
//filename << "/tmp/";
filename << base << "_";
if (i < 100)
filename << "0";
if (i < 10)
filename << "0";
filename << i << ".pnm";
return filename.str();
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
cout << "Usage: " << argv[0] << " image.ppm" << endl;
return 1;
}
bool perform_check = false;
if (perform_check)
{
if (!check_points())
return 1;
if (!check_trigo())
return 1;
if (!check_00(argv[1]))
{
ERRLOG << "0 degree check failed" << endl << endl;
return 1;
}
if (!check_90(argv[1]))
{
ERRLOG << "90 degrees check failed" << endl << endl;
return 1;
}
}
double const step = 5;
bool save_output_img = true;
bool print_each_run = false;
// No tile
Image img(argv[1]);
float average = 0.0;
int i = 0;
for (double rotation = 0; rotation <= 360; rotation += step)
{
auto const before = chrono::high_resolution_clock::now();
Image* const rotated = rotate(img, rotation);
auto const after = chrono::high_resolution_clock::now();
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
average += duration_ms.count();
if (print_each_run)
cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (save_output_img)
rotated->save(get_save_path("rotated", rotation));
delete rotated;
++i;
}
cout << "---------" << endl;
cout << " average: " << average / i << "ms" << endl << endl;
return 0;
}