Re: [Inkscape-devel] Fixed point arithmetic in libnr causes pixel offset at large zoom

6 Sep 2009

On 09/02/2009 10:09 PM, Diederik van Lierop wrote:
...
Hi all,
As reported in https://bugs.launchpad.net/inkscape/+bug/168384, bitmap 
pixels appear slightly offset when nearing the maximum zoom.
I found out that this is caused by the fixed point calculations in 
libnr, for example in nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0(). 
This is easily fixed by increasing the fixed point precision from 12 to 
18 bits on line 42 in libr/nr-compose-transform.cpp, but if we do that 
then our headroom will become quite small.  Can we switch to long long 
instead of just long? Or should we go to floating point math, and accept 
the speed penalty (if any)? Or should we leave things as they are?
Some simple measurements showed that using "double" increased the 
rendering time with with 18%, whereas "long long" needed only 5% extra 
time (measured with oversampling turned off, making this a worst-case 
scenario for the relative measurement). So I've prepared a patch using 
"long long" and attached it to this mail.
Now I'll leave it up to the release wardens to have a quick look and to 
decide whether this should be included in v0.47.
Diederik
Index: /home/diedenrezi/eclipse/InkscapeSVN/src/libnr/nr-compose-transform.cpp
===================================================================

--- /home/diedenrezi/eclipse/InkscapeSVN/src/libnr/nr-compose-transform.cpp	(revision 22193)
+++ /home/diedenrezi/eclipse/InkscapeSVN/src/libnr/nr-compose-transform.cpp	(working copy)
@@ -25,10 +25,10 @@
 int nr_have_mmx (void);
 void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
    						  const unsigned char *spx, int sw, int sh, int srs,
-							  const long *FFd2s, unsigned int alpha);
+							  const long long *FFd2s, unsigned int alpha);
 void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
    						  const unsigned char *spx, int sw, int sh, int srs,
-							  const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits);
+							  const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits);
 #define NR_PIXOPS_MMX (1 && nr_have_mmx ())
 #ifdef __cplusplus
 }
@@ -40,6 +40,7 @@
/* Fixed point precision */
 #define FBITS 12
+#define FBITS_HP 18 // In some places we need a higher precision
void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
    				       const unsigned char *spx, int sw, int sh, int srs,
@@ -168,10 +169,10 @@
 static void
 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
    					 const unsigned char *spx, int sw, int sh, int srs,
-						 const long *FFd2s, unsigned int alpha)
+						 const long long *FFd2s, unsigned int alpha)
 {
-	unsigned char *d0;
-	int FFsx0, FFsy0;
+    unsigned char *d0;
+	long long FFsx0, FFsy0;
    int x, y;
d0 = px;
@@ -180,15 +181,15 @@
for (y = 0; y < h; y++) {
    	unsigned char *d;
-		long FFsx, FFsy;
+		long long FFsx, FFsy;
    	d = d0;
    	FFsx = FFsx0;
    	FFsy = FFsy0;
    	for (x = 0; x < w; x++) {
    		long sx, sy;
-			sx = FFsx >> FBITS;
+			sx = long(FFsx >> FBITS_HP);
    		if ((sx >= 0) && (sx < sw)) {
-				sy = FFsy >> FBITS;
+				sy = long(FFsy >> FBITS_HP);
    			if ((sy >= 0) && (sy < sh)) {
    				const unsigned char *s;
    				unsigned int a;
@@ -224,11 +225,11 @@
 static void
 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
    					 const unsigned char *spx, int sw, int sh, int srs,
-						 const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
+						 const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
 {
    int size;
    unsigned char *d0;
-	int FFsx0, FFsy0;
+	long long FFsx0, FFsy0;
    int x, y;
size = (1 << dbits);
@@ -242,7 +243,7 @@
for (y = 0; y < h; y++) {
    	unsigned char *d;
-		long FFsx, FFsy;
+		long long FFsx, FFsy;
    	d = d0;
    	FFsx = FFsx0;
    	FFsy = FFsy0;
@@ -252,9 +253,9 @@
    		r = g = b = a = 0;
    		for (i = 0; i < size; i++) {
    			long sx, sy;
-				sx = (FFsx + FF_S[2 * i]) >> FBITS;
+				sx = (FFsx >> FBITS_HP) + (FF_S[2 * i] >> FBITS);
    			if ((sx >= 0) && (sx < sw)) {
-					sy = (FFsy + FF_S[2 * i + 1]) >> FBITS;
+					sy = (FFsy >> FBITS_HP) + (FF_S[2 * i + 1] >> FBITS);
    				if ((sy >= 0) && (sy < sh)) {
    					const unsigned char *s;
    					unsigned int ca;
@@ -302,6 +303,7 @@
 {
    int dbits;
    long FFd2s[6];
+	long long FFd2s_HP[6]; // with higher precision
    int i;
if (alpha == 0) return;
@@ -310,17 +312,18 @@
for (i = 0; i < 6; i++) {
    	FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5);
+		FFd2s_HP[i] = (long) (d2s[i] * (1 << FBITS_HP) + 0.5);;
    }
if (dbits == 0) {
 #ifdef WITH_MMX
    	if (NR_PIXOPS_MMX) {
    		/* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
-			nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
+			nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha);
    		return;
    	}
 #endif
-		nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
+		nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha);
    } else {
    	int xsize, ysize;
    	long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S;
@@ -347,11 +350,11 @@
 #ifdef WITH_MMX
    	if (NR_PIXOPS_MMX) {
    		/* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
-			nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
+			nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits);
    		return;
    	}
 #endif
-		nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
+		nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits);
    }
 }