parallella · mansourmoufid · Jul 2, 2015 · Jul 4, 2015 · Jul 4, 2015 · Jul 4, 2015
diff --git a/include/pal_math.h b/include/pal_math.h
@@ -145,6 +145,12 @@ void p_div_f32(const float *a, const float *b, float *c, int n);
 /*exponential: c = exp ( a ) */
 void p_exp_f32(const float *a, float *c, int n);
 
+/*floating-point remainder: c = a - i * x */
+void p_fmod_f32(const float *a, float *c, int n, const float x);
+
+/*floating-point remainder with divisor 2pi: c = a - i * 2pi */
+void p_fmod_2pi_f32(const float *a, float *c, int n);
+
 /*inverse: c = 1 / ( a ) */
 void p_inv_f32(const float *a, float *c, int n);
 

diff --git a/src/math/Makefile.am b/src/math/Makefile.am
@@ -21,6 +21,7 @@ libpal_math_la_SOURCES = \
     p_div.c \
     p_dot.c \
     p_exp.c \
+    p_fmod.c p_fmod.h \
     p_ftoi.c \
     p_inv.c \
     p_invcbrt.c \

diff --git a/src/math/p_fmod.c b/src/math/p_fmod.c
@@ -0,0 +1,31 @@
+#include <pal.h>
+
+#include "p_fmod.h"
+
+/**
+ * Compute the floating-point remainder value of a[i] / x.
+ *
+ * The floating-point remainder of a[i] / x is r = a[i] - j * x
+ * for some integer j, for a[i] >= 0, and r = a[i] - j * x + x
+ * for a[i] < 0, such that 0 <= r <= x.
+ *
+ * @param a     Pointer to the input vector
+ * @param c     Pointer to the output vector
+ * @param n     The size of the a and c vectors
+ * @param x     The divisor
+ * @return      None
+ */
+void p_fmod_f32(const float *a, float *c, int n, const float x)
+{
+    _p_fmod(a, c, n, x);
+}
+
+/**
+ * Compute the floating-point remainder with divisor 2pi.
+ *
+ * See p_fmod_f32 for details.
+ */
+void p_fmod_2pi_f32(const float *a, float *c, int n)
+{
+    _p_fmod(a, c, n, 2.f * M_PI);
+}
diff --git a/src/math/p_fmod.h b/src/math/p_fmod.h
@@ -0,0 +1,21 @@
+#pragma once
+
+/* Return the remainder of x / y. */
+static inline float __p_fmod(const float x, const float y)
+{
+    long int i;
+    i = x / y;
+    return x - i * y + (x < 0.f ? y : 0.f);
+}
+
+/* Map __fmod on a array. */
+static inline void _p_fmod(const float *a, float *c, int n, const float x)
+{
+    int i;
+    float tmp;
+    for (i = 0; i < n; i++) {
+        /* With temporary variable, a can overlap c. */
+        tmp = __p_fmod(a[i], x);
+        c[i] = tmp;
+    }
+}
diff --git a/tests/math/Makefile.am b/tests/math/Makefile.am
@@ -33,6 +33,7 @@ BUILT_SOURCES = \
     gold/p_div_f32.gold.h \
     gold/p_dot_f32.gold.h \
     gold/p_exp_f32.gold.h \
+    gold/p_fmod_2pi_f32.gold.h \
     gold/p_invcbrt_f32.gold.h \
     gold/p_inv_f32.gold.h \
     gold/p_invsqrt_f32.gold.h \
@@ -86,6 +87,7 @@ check_PROGRAMS = \
 	check_p_div_f32 \
 	check_p_dot_f32 \
 	check_p_exp_f32 \
+	check_p_fmod_2pi_f32 \
 	check_p_ftoi \
 	check_p_invcbrt_f32 \
 	check_p_inv_f32 \
@@ -132,6 +134,7 @@ check_p_div_f32_SOURCES         = $(SIMPLE)
 check_p_dot_f32_SOURCES         = $(SIMPLE)
 check_p_exp_f32_SOURCES         = $(SIMPLE) p_exp.c
 check_p_ftoi_SOURCES            = notest.c
+check_p_fmod_2pi_f32_SOURCES    = $(SIMPLE) p_fmod_2pi.c
 check_p_inv_f32_SOURCES         = $(SIMPLE)
 check_p_invcbrt_f32_SOURCES     = $(SIMPLE)
 check_p_invsqrt_f32_SOURCES     = $(SIMPLE) p_invsqrt.c
@@ -177,6 +180,7 @@ check_p_cosh_f32_CFLAGS         = -DFUNCTION=p_cosh_f32    -DIS_UNARY
 check_p_div_f32_CFLAGS          = -DFUNCTION=p_div_f32     -DIS_BINARY
 check_p_dot_f32_CFLAGS          = -DFUNCTION=p_dot_f32     -DIS_BINARY -DSCALAR_OUTPUT
 check_p_exp_f32_CFLAGS          = -DFUNCTION=p_exp_f32     -DIS_UNARY
+check_p_fmod_2pi_f32_CFLAGS     = -DFUNCTION=p_fmod_2pi_f32 -DIS_UNARY
 check_p_ftoi_CFLAGS             = -DFUNCTION=p_ftoi
 check_p_invcbrt_f32_CFLAGS      = -DFUNCTION=p_invcbrt_f32 -DIS_UNARY
 check_p_inv_f32_CFLAGS          = -DFUNCTION=p_inv_f32     -DIS_UNARY

diff --git a/tests/math/gold/p_fmod_2pi_f32.dat b/tests/math/gold/p_fmod_2pi_f32.dat
@@ -0,0 +1,100 @@
+-73.151069,0.000000,0.000000,2.247155
+-41.229381,0.000000,0.000000,2.752916
+-41.571556,0.000000,0.000000,2.410741
+87.935243,0.000000,0.000000,6.253834
+72.091828,0.000000,0.000000,2.976790
+38.719546,0.000000,0.000000,1.020434
+73.946113,0.000000,0.000000,4.831074
+-56.302218,0.000000,0.000000,0.246449
+-86.870868,0.000000,0.000000,1.093726
+-62.092752,0.000000,0.000000,0.739101
+92.185153,0.000000,0.000000,4.220559
+87.685583,0.000000,0.000000,6.004174
+-73.134079,0.000000,0.000000,2.264145
+58.260211,0.000000,0.000000,1.711543
+93.208450,0.000000,0.000000,5.243856
+-57.352012,0.000000,0.000000,5.479841
+95.949440,0.000000,0.000000,1.701660
+25.358186,0.000000,0.000000,0.225445
+-73.722375,0.000000,0.000000,1.675848
+-34.751585,0.000000,0.000000,2.947527
+22.526080,0.000000,0.000000,3.676524
+96.421245,0.000000,0.000000,2.173466
+-35.379634,0.000000,0.000000,2.319477
+32.640696,0.000000,0.000000,1.224770
+70.247906,0.000000,0.000000,1.132868
+69.145630,0.000000,0.000000,0.030591
+93.967350,0.000000,0.000000,6.002756
+-32.006928,0.000000,0.000000,5.692184
+5.391561,0.000000,0.000000,5.391561
+-99.097048,0.000000,0.000000,1.433917
+67.876876,0.000000,0.000000,5.045022
+65.030388,0.000000,0.000000,2.198534
+60.885730,0.000000,0.000000,4.337062
+89.882442,0.000000,0.000000,1.917848
+-78.777811,0.000000,0.000000,2.903598
+77.015560,0.000000,0.000000,1.617336
+47.657817,0.000000,0.000000,3.675519
+-0.539278,0.000000,0.000000,5.743908
+63.199742,0.000000,0.000000,0.367888
+58.966444,0.000000,0.000000,2.417776
+27.721960,0.000000,0.000000,2.589218
+26.160962,0.000000,0.000000,1.028221
+16.848931,0.000000,0.000000,4.282561
+11.398246,0.000000,0.000000,5.115060
+-5.287673,0.000000,0.000000,0.995512
+-75.662695,0.000000,0.000000,6.018714
+17.304752,0.000000,0.000000,4.738382
+-75.666061,0.000000,0.000000,6.015348
+-30.177494,0.000000,0.000000,1.238433
+-0.402864,0.000000,0.000000,5.880321
+-50.843464,0.000000,0.000000,5.705203
+-62.476272,0.000000,0.000000,0.355581
+17.254263,0.000000,0.000000,4.687892
+-76.803239,0.000000,0.000000,4.878170
+14.964506,0.000000,0.000000,2.398135
+97.402893,0.000000,0.000000,3.155113
+-5.402715,0.000000,0.000000,0.880470
+-60.431126,0.000000,0.000000,2.400727
+70.271604,0.000000,0.000000,1.156566
+15.487283,0.000000,0.000000,2.920912
+-68.864562,0.000000,0.000000,0.250477
+18.015849,0.000000,0.000000,5.449479
+-41.245544,0.000000,0.000000,2.736754
+-73.827542,0.000000,0.000000,1.570682
+64.470133,0.000000,0.000000,1.638280
+86.252020,0.000000,0.000000,4.570611
+32.344140,0.000000,0.000000,0.928214
+95.303687,0.000000,0.000000,1.055908
+75.493892,0.000000,0.000000,0.095668
+-94.643699,0.000000,0.000000,5.887266
+-50.186897,0.000000,0.000000,0.078585
+6.115036,0.000000,0.000000,6.115036
+-34.549710,0.000000,0.000000,3.149402
+61.429573,0.000000,0.000000,4.880905
+-57.952601,0.000000,0.000000,4.879252
+-99.119640,0.000000,0.000000,1.411325
+71.558474,0.000000,0.000000,2.443436
+-50.398997,0.000000,0.000000,6.149671
+93.969644,0.000000,0.000000,6.005049
+-47.958961,0.000000,0.000000,2.306522
+82.580425,0.000000,0.000000,0.899016
+-7.909680,0.000000,0.000000,4.656691
+77.170744,0.000000,0.000000,1.772520
+94.990176,0.000000,0.000000,0.742396
+-85.007026,0.000000,0.000000,2.957568
+0.992721,0.000000,0.000000,0.992721
+-79.334674,0.000000,0.000000,2.346735
+51.929450,0.000000,0.000000,1.663967
+68.659900,0.000000,0.000000,5.828047
+-81.847852,0.000000,0.000000,6.116743
+94.112480,0.000000,0.000000,6.147886
+-81.408641,0.000000,0.000000,0.272768
+0.593477,0.000000,0.000000,0.593477
+-6.568346,0.000000,0.000000,5.998025
+17.752890,0.000000,0.000000,5.186519
+43.540731,0.000000,0.000000,5.841619
+-84.813309,0.000000,0.000000,3.151286
+-44.040582,0.000000,0.000000,6.224900
+-39.592007,0.000000,0.000000,4.390290
+-63.546451,0.000000,0.000000,5.568587
diff --git a/tests/math/p_fmod_2pi.c b/tests/math/p_fmod_2pi.c
@@ -0,0 +1,11 @@
+#include <math.h>
+#include "simple.h"
+
+void generate_ref(float *out, size_t n)
+{
+    size_t i;
+    float twopi = 2.0 * M_PI;
+
+    for (i = 0; i < n; i++)
+        out[i] = fmodf(ai[i], twopi) + (ai[i] >= 0.f ? 0.f : twopi);
+}