On 09/02/2009 10:09 PM, Diederik van Lierop wrote:
Hi all,
As reported in https://bugs.launchpad.net/inkscape/+bug/168384, bitmap pixels appear slightly offset when nearing the maximum zoom.
I found out that this is caused by the fixed point calculations in libnr, for example in nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0(). This is easily fixed by increasing the fixed point precision from 12 to 18 bits on line 42 in libr/nr-compose-transform.cpp, but if we do that then our headroom will become quite small. Can we switch to long long instead of just long? Or should we go to floating point math, and accept the speed penalty (if any)? Or should we leave things as they are?
Some simple measurements showed that using "double" increased the rendering time with with 18%, whereas "long long" needed only 5% extra time (measured with oversampling turned off, making this a worst-case scenario for the relative measurement). So I've prepared a patch using "long long" and attached it to this mail.
Now I'll leave it up to the release wardens to have a quick look and to decide whether this should be included in v0.47.
Diederik
Index: /home/diedenrezi/eclipse/InkscapeSVN/src/libnr/nr-compose-transform.cpp =================================================================== --- /home/diedenrezi/eclipse/InkscapeSVN/src/libnr/nr-compose-transform.cpp (revision 22193) +++ /home/diedenrezi/eclipse/InkscapeSVN/src/libnr/nr-compose-transform.cpp (working copy) @@ -25,10 +25,10 @@ int nr_have_mmx (void); void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int sw, int sh, int srs, - const long *FFd2s, unsigned int alpha); + const long long *FFd2s, unsigned int alpha); void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int sw, int sh, int srs, - const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits); + const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits); #define NR_PIXOPS_MMX (1 && nr_have_mmx ()) #ifdef __cplusplus } @@ -40,6 +40,7 @@
/* Fixed point precision */ #define FBITS 12 +#define FBITS_HP 18 // In some places we need a higher precision
void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int sw, int sh, int srs, @@ -168,10 +169,10 @@ static void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int sw, int sh, int srs, - const long *FFd2s, unsigned int alpha) + const long long *FFd2s, unsigned int alpha) { - unsigned char *d0; - int FFsx0, FFsy0; + unsigned char *d0; + long long FFsx0, FFsy0; int x, y;
d0 = px; @@ -180,15 +181,15 @@
for (y = 0; y < h; y++) { unsigned char *d; - long FFsx, FFsy; + long long FFsx, FFsy; d = d0; FFsx = FFsx0; FFsy = FFsy0; for (x = 0; x < w; x++) { long sx, sy; - sx = FFsx >> FBITS; + sx = long(FFsx >> FBITS_HP); if ((sx >= 0) && (sx < sw)) { - sy = FFsy >> FBITS; + sy = long(FFsy >> FBITS_HP); if ((sy >= 0) && (sy < sh)) { const unsigned char *s; unsigned int a; @@ -224,11 +225,11 @@ static void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int sw, int sh, int srs, - const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits) + const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits) { int size; unsigned char *d0; - int FFsx0, FFsy0; + long long FFsx0, FFsy0; int x, y;
size = (1 << dbits); @@ -242,7 +243,7 @@
for (y = 0; y < h; y++) { unsigned char *d; - long FFsx, FFsy; + long long FFsx, FFsy; d = d0; FFsx = FFsx0; FFsy = FFsy0; @@ -252,9 +253,9 @@ r = g = b = a = 0; for (i = 0; i < size; i++) { long sx, sy; - sx = (FFsx + FF_S[2 * i]) >> FBITS; + sx = (FFsx >> FBITS_HP) + (FF_S[2 * i] >> FBITS); if ((sx >= 0) && (sx < sw)) { - sy = (FFsy + FF_S[2 * i + 1]) >> FBITS; + sy = (FFsy >> FBITS_HP) + (FF_S[2 * i + 1] >> FBITS); if ((sy >= 0) && (sy < sh)) { const unsigned char *s; unsigned int ca; @@ -302,6 +303,7 @@ { int dbits; long FFd2s[6]; + long long FFd2s_HP[6]; // with higher precision int i;
if (alpha == 0) return; @@ -310,17 +312,18 @@
for (i = 0; i < 6; i++) { FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5); + FFd2s_HP[i] = (long) (d2s[i] * (1 << FBITS_HP) + 0.5);; }
if (dbits == 0) { #ifdef WITH_MMX if (NR_PIXOPS_MMX) { /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */ - nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha); + nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha); return; } #endif - nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha); + nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha); } else { int xsize, ysize; long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S; @@ -347,11 +350,11 @@ #ifdef WITH_MMX if (NR_PIXOPS_MMX) { /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */ - nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits); + nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits); return; } #endif - nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits); + nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits); } }