Decompose complex lines.
Create a bunch of const variables to better profile.
This commit is contained in:
parent
4dba86c9f0
commit
da4bdf7702
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -3,6 +3,7 @@
|
||||||
*.swp
|
*.swp
|
||||||
*.o
|
*.o
|
||||||
|
|
||||||
|
rotation
|
||||||
rotated*.ppm
|
rotated*.ppm
|
||||||
rotated*.pnm
|
rotated*.pnm
|
||||||
*.png
|
*.png
|
||||||
|
|
5
Makefile
5
Makefile
|
@ -1,9 +1,8 @@
|
||||||
include Makefile.rules
|
include Makefile.rules
|
||||||
|
|
||||||
CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math -g $(CXXFLAGS_PLAFORM)
|
CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math -g $(CXXFLAGS_PLAFORM)
|
||||||
LFLAGS = #-flto
|
|
||||||
DEFINES = #-DSIMD
|
DEFINES = #-DSIMD
|
||||||
BUILD_DIR = /tmp
|
BUILD_DIR = .
|
||||||
SRC = rotation.cpp \
|
SRC = rotation.cpp \
|
||||||
image.cpp \
|
image.cpp \
|
||||||
pnm.cpp
|
pnm.cpp
|
||||||
|
@ -19,7 +18,7 @@ all: $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(DEFINES) $< -c -o $@
|
$(CXX) $(CXXFLAGS) $(DEFINES) $< -c -o $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -f *~ *.o .*.swp *.ppm *.pgm *.pnm cachegrind.out.*
|
@rm -f rotation *~ *.o .*.swp *.ppm *.pgm *.pnm cachegrind.out.* callgrind.out.*
|
||||||
|
|
||||||
run: all
|
run: all
|
||||||
$(BUILD_DIR)/rotation $(IMG)
|
$(BUILD_DIR)/rotation $(IMG)
|
||||||
|
|
6
configure
vendored
6
configure
vendored
|
@ -4,12 +4,14 @@ case `uname -s` in
|
||||||
|
|
||||||
Darwin)
|
Darwin)
|
||||||
echo 'CXX = clang++' > Makefile.rules
|
echo 'CXX = clang++' > Makefile.rules
|
||||||
echo 'CXXFLAGS_PLATFORM = ' > Makefile.rules
|
echo 'CXXFLAGS_PLATFORM = ' >> Makefile.rules
|
||||||
|
echo 'LFLAGS = -flto' >> Makefile.rules
|
||||||
;;
|
;;
|
||||||
|
|
||||||
Linux)
|
Linux)
|
||||||
echo 'CXX = g++' > Makefile.rules
|
echo 'CXX = g++' > Makefile.rules
|
||||||
echo 'CXXFLAGS_PLATFORM = ' > Makefile.rules
|
echo 'CXXFLAGS_PLATFORM = ' >> Makefile.rules
|
||||||
|
echo 'LFLAGS = ' >> Makefile.rules
|
||||||
;;
|
;;
|
||||||
|
|
||||||
esac
|
esac
|
||||||
|
|
24
rotation.cpp
24
rotation.cpp
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
#include <tmmintrin.h>
|
||||||
|
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
|
||||||
|
@ -177,6 +178,7 @@ void rotate_pixel(Image const& src,
|
||||||
|
|
||||||
// Bilinear interpolation
|
// Bilinear interpolation
|
||||||
unsigned int const src_index_1 = (src_y * src.width + src_x) * src.pixel_size;
|
unsigned int const src_index_1 = (src_y * src.width + src_x) * src.pixel_size;
|
||||||
|
unsigned int const src_index_2 = src_index_1 + src.pixel_size;
|
||||||
unsigned int const src_index_3 = src_index_1 + src.pixel_size * src.width;
|
unsigned int const src_index_3 = src_index_1 + src.pixel_size * src.width;
|
||||||
unsigned int const src_index_4 = src_index_3 + src.pixel_size;
|
unsigned int const src_index_4 = src_index_3 + src.pixel_size;
|
||||||
|
|
||||||
|
@ -186,6 +188,11 @@ void rotate_pixel(Image const& src,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pvalue_t const src_tl = src.buffer[src_index_1];
|
||||||
|
pvalue_t const src_tr = src.buffer[src_index_2];
|
||||||
|
pvalue_t const src_bl = src.buffer[src_index_3];
|
||||||
|
pvalue_t const src_br = src.buffer[src_index_4];
|
||||||
|
|
||||||
unsigned int const x_delta = (src_rotated_point.x >> (q_pow - q_inter_pow)) & mask;
|
unsigned int const x_delta = (src_rotated_point.x >> (q_pow - q_inter_pow)) & mask;
|
||||||
unsigned int const y_delta = (src_rotated_point.y >> (q_pow - q_inter_pow)) & mask;
|
unsigned int const y_delta = (src_rotated_point.y >> (q_pow - q_inter_pow)) & mask;
|
||||||
unsigned int const inv_x = q_inter - x_delta;
|
unsigned int const inv_x = q_inter - x_delta;
|
||||||
|
@ -193,10 +200,9 @@ void rotate_pixel(Image const& src,
|
||||||
|
|
||||||
#ifndef SIMD
|
#ifndef SIMD
|
||||||
|
|
||||||
unsigned int src_index_2 = src_index_1 + src.pixel_size;
|
pvalue_t interpolated = ((src_tl * inv_x + src_tr * x_delta) * inv_y
|
||||||
|
+ (src_bl * inv_x + src_br * x_delta) * y_delta) >> (q_inter_pow << 1);
|
||||||
rotate_buffer[rot_index] = ((src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
|
rotate_buffer[rot_index] = interpolated;
|
||||||
+ (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta) >> (q_inter_pow << 1);
|
|
||||||
// rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
|
// rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
|
||||||
// + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
|
// + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
|
||||||
// rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
|
// rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
|
||||||
|
@ -260,13 +266,16 @@ Image* rotate(Image const& src, double angle)
|
||||||
int64_t const src_qheight = (int64_t) src.height * q_pos;
|
int64_t const src_qheight = (int64_t) src.height * q_pos;
|
||||||
|
|
||||||
unsigned int const src_limit = src.width * src.height * src.pixel_size;
|
unsigned int const src_limit = src.width * src.height * src.pixel_size;
|
||||||
|
size_t const buffer_step = rotated->pixel_size;
|
||||||
|
int const width = rotated->width;
|
||||||
|
int const height = rotated->height;
|
||||||
|
|
||||||
for (int y = 0; y < (int) rotated->height; ++y)
|
for (int y = 0; y < height; ++y)
|
||||||
{
|
{
|
||||||
Point src_rotated_point((rot_origin_in_src.x * q_pos) + y * qdy.x,
|
Point src_rotated_point((rot_origin_in_src.x * q_pos) + y * qdy.x,
|
||||||
(rot_origin_in_src.y * q_pos) + y * qdy.y);
|
(rot_origin_in_src.y * q_pos) + y * qdy.y);
|
||||||
|
|
||||||
for (unsigned int x = 0; x < rotated->width; ++x)
|
for (int x = 0; x < width; ++x)
|
||||||
{
|
{
|
||||||
if (src_rotated_point.x >= 0 && src_rotated_point.x < src_qwidth
|
if (src_rotated_point.x >= 0 && src_rotated_point.x < src_qwidth
|
||||||
&& src_rotated_point.y >= 0 && src_rotated_point.y < src_qheight)
|
&& src_rotated_point.y >= 0 && src_rotated_point.y < src_qheight)
|
||||||
|
@ -278,8 +287,7 @@ Image* rotate(Image const& src, double angle)
|
||||||
}
|
}
|
||||||
|
|
||||||
src_rotated_point += qdx;
|
src_rotated_point += qdx;
|
||||||
|
buffer_index += buffer_step;
|
||||||
buffer_index += rotated->pixel_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue