Skip to content

Commit 2bf2d2e

Browse files
committed
Make SIMD optional and support non-x86 targets
1 parent 7e4668c commit 2bf2d2e

File tree

1 file changed

+59
-3
lines changed

1 file changed

+59
-3
lines changed

si_normalmap.h

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@
4747
#define sinm__aligned_var(type, bytes) type __attribute__((aligned(bytes)))
4848
#endif
4949

50+
#ifndef SINM_USE_SIMD
51+
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
52+
#define SINM_USE_SIMD 1
53+
#endif
54+
#endif
55+
5056
#ifndef SINM_TYPES
5157
#define SINM_TYPES
5258
typedef enum
@@ -96,12 +102,15 @@ sinm_composite(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t
96102

97103
#else // SI_NORMALMAP_IMPLEMENTATION
98104

105+
#if SINM_USE_SIMD
99106
#ifdef _MSC_VER
100107
#include <intrin.h>
101108
#else
102109
#include <x86intrin.h>
103110
#endif
111+
#endif
104112

113+
#if SINM_USE_SIMD
105114
#ifdef __AVX__
106115
#define SINM_SIMD_ALIGNMENT 32
107116
#define simd_prefix_float(name) _mm256_##name
@@ -112,6 +121,8 @@ sinm_composite(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t
112121
#define simd__or_ix(a, b) _mm256_or_si256(a, b)
113122
#define simd__loadu_ix(a) _mm256_loadu_si256(a)
114123
#define simd__storeu_ix(ptr, v) _mm256_storeu_si256(ptr, v)
124+
#define simd__setzero_ix() _mm256_setzero_si256()
125+
#define simd__setzero_ps() _mm256_setzero_ps()
115126
#else
116127
#define simd_prefix_float(name) _mm_##name
117128
#define SINM_SIMD_ALIGNMENT 16
@@ -122,16 +133,31 @@ sinm_composite(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t
122133
#define simd__or_ix(a, b) _mm_or_si128(a, b)
123134
#define simd__loadu_ix(a) _mm_loadu_si128(a)
124135
#define simd__storeu_ix(ptr, v) _mm_storeu_si128(ptr, v)
136+
#define simd__setzero_ix() _mm_setzero_si128()
137+
#define simd__setzero_ps() _mm_setzero_ps()
125138
#endif // __AVX__
126139

127140
#define simd__set1_epi32(a) simd_prefix_float(set1_epi32(a))
128-
#define simd__setzero_ix() simd_prefix_float(setzero_si256())
129-
#define simd__setzero_ps() simd_prefix_float(setzero_ps())
130141
#define simd__andnot_ps(a, b) simd_prefix_float(andnot_ps(a, b))
131142
#define simd__add_epi32(a, b) simd_prefix_float(add_epi32(a, b))
132143
#define simd__sub_epi32(a, b) simd_prefix_float(sub_epi32(a, b))
144+
145+
#if defined(__AVX__) || defined(__SSE4_1__)
133146
#define simd__max_epi32(a, b) simd_prefix_float(max_epi32(a, b))
134147
#define simd__min_epi32(a, b) simd_prefix_float(min_epi32(a, b))
148+
#else
149+
static sinm__inline __m128i sinm__sse2_max_epi32(__m128i a, __m128i b) {
150+
__m128i mask = _mm_cmpgt_epi32(a, b);
151+
return _mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b));
152+
}
153+
static sinm__inline __m128i sinm__sse2_min_epi32(__m128i a, __m128i b) {
154+
__m128i mask = _mm_cmpgt_epi32(b, a);
155+
return _mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b));
156+
}
157+
#define simd__max_epi32(a, b) sinm__sse2_max_epi32(a, b)
158+
#define simd__min_epi32(a, b) sinm__sse2_min_epi32(a, b)
159+
#endif
160+
135161
#define simd__loadu_ps(a) simd_prefix_float(loadu_ps(a))
136162
#define simd__srli_epi32(a, i) simd_prefix_float(srli_epi32(a, i))
137163
#define simd__slli_epi32(a, i) simd_prefix_float(slli_epi32(a, i))
@@ -145,6 +171,7 @@ sinm_composite(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t
145171
#define simd__div_ps(a, b) simd_prefix_float(div_ps(a, b))
146172
#define simd__hadd_ps(a, b) simd_prefix_float(hadd_ps(a, b))
147173
#define simd__cvtss_f32(a) simd_prefix_float(cvtss_f32(a))
174+
#endif // SINM_USE_SIMD
148175

149176
#define sinm__min(a, b) ((a) < (b) ? (a) : (b))
150177
#define sinm__max(a, b) ((a) > (b) ? (a) : (b))
@@ -165,11 +192,13 @@ sinm__length(float x, float y, float z)
165192
return sqrtf(x * x + y * y + z * z);
166193
}
167194

195+
#if SINM_USE_SIMD
168196
sinm__inline static simd__float
169197
sinm__length_simd(simd__float x, simd__float y, simd__float z)
170198
{
171199
return simd__sqrt_ps(simd__add_ps(simd__add_ps(simd__mul_ps(x, x), simd__mul_ps(y, y)), simd__mul_ps(z, z)));
172200
}
201+
#endif
173202

174203
sinm__inline static sinm__v3
175204
sinm__normalized(float x, float y, float z)
@@ -222,6 +251,7 @@ sinm__rgba_to_v3(uint32_t c)
222251
return result;
223252
}
224253

254+
#if SINM_USE_SIMD
225255
static sinm__inline void
226256
sinm__rgba_to_v3_simd(simd__int c, simd__float *x, simd__float *y, simd__float *z)
227257
{
@@ -231,6 +261,7 @@ sinm__rgba_to_v3_simd(simd__int c, simd__float *x, simd__float *y, simd__float *
231261
*y = simd__cvtepi32_ps(simd__sub_epi32(simd__and_ix(simd__srli_epi32(c, 8), ff), v127));
232262
*z = simd__cvtepi32_ps(simd__sub_epi32(simd__and_ix(simd__srli_epi32(c, 16), ff), v127));
233263
}
264+
#endif
234265

235266
static sinm__inline uint32_t
236267
sinm__unit_vector_to_rgba(sinm__v3 v)
@@ -241,6 +272,7 @@ sinm__unit_vector_to_rgba(sinm__v3 v)
241272
return r | g << 8u | b << 16u | 255u << 24u;
242273
}
243274

275+
#if SINM_USE_SIMD
244276
static sinm__inline simd__int
245277
sinm__v3_to_rgba_simd(simd__float x, simd__float y, simd__float z)
246278
{
@@ -253,6 +285,7 @@ sinm__v3_to_rgba_simd(simd__float x, simd__float y, simd__float z)
253285
simd__int c = simd__or_ix(simd__or_ix(simd__or_ix(r, simd__slli_epi32(g, 8)), simd__slli_epi32(b, 16)), a);
254286
return c;
255287
}
288+
#endif
256289

257290
SINM_DEF void
258291
sinm__generate_gaussian_box(float *outBoxes, int32_t n, float sigma)
@@ -396,6 +429,7 @@ sinm__sobel3x3_normals(const uint32_t *in, uint32_t *out, int32_t w, int32_t h,
396429
sinm__sobel3x3_normals_row_range(in, out, 0, w, w, h, scale, flipY);
397430
}
398431

432+
#if SINM_USE_SIMD
399433
static void
400434
sinm__sobel3x3_normals_simd(const uint32_t *in, uint32_t *out, int32_t w, int32_t h, float scale, int flipY)
401435
{
@@ -473,6 +507,7 @@ sinm__sobel3x3_normals_simd(const uint32_t *in, uint32_t *out, int32_t w, int32_
473507

474508
sinm__sobel3x3_normals_row_range(in, out, w - remainder - 8, w, w, h, scale, flipY);
475509
}
510+
#endif
476511

477512
SINM_DEF void
478513
sinm__normalize(uint32_t *in, int32_t w, int32_t h, float scale, int flipY)
@@ -485,6 +520,7 @@ sinm__normalize(uint32_t *in, int32_t w, int32_t h, float scale, int flipY)
485520
}
486521
}
487522

523+
#if SINM_USE_SIMD
488524
SINM_DEF void
489525
sinm__normalize_simd(uint32_t *in, int32_t w, int32_t h, float scale, int flipY)
490526
{
@@ -505,11 +541,16 @@ sinm__normalize_simd(uint32_t *in, int32_t w, int32_t h, float scale, int flipY)
505541

506542
sinm__normalize(offset_in, 1, remainder, scale, flipY);
507543
}
544+
#endif
508545

509546
SINM_DEF sinm__inline void
510547
sinm_normalize(uint32_t *in, int32_t w, int32_t h, float scale, int flipY)
511548
{
549+
#if SINM_USE_SIMD
512550
sinm__normalize_simd(in, w, h, scale, flipY);
551+
#else
552+
sinm__normalize(in, w, h, scale, flipY);
553+
#endif
513554
}
514555

515556
SINM_DEF void
@@ -531,6 +572,7 @@ sinm__composite(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t
531572
}
532573
}
533574

575+
#if SINM_USE_SIMD
534576
SINM_DEF void
535577
sinm__composite_simd(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t w, int32_t h)
536578
{
@@ -563,11 +605,16 @@ sinm__composite_simd(const uint32_t *in1, const uint32_t *in2, uint32_t *out, in
563605

564606
sinm__composite(offset_in1, offset_in2, offset_out, 1, remainder);
565607
}
608+
#endif
566609

567610
SINM_DEF sinm__inline void
568611
sinm_composite(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t w, int32_t h)
569612
{
613+
#if SINM_USE_SIMD
570614
sinm__composite_simd(in1, in2, out, w, h);
615+
#else
616+
sinm__composite(in1, in2, out, w, h);
617+
#endif
571618
}
572619

573620
SINM_DEF sinm__inline uint32_t *
@@ -615,6 +662,7 @@ sinm__greyscale(const uint32_t *in, uint32_t *out, int32_t w, int32_t h, sinm_gr
615662
}
616663
}
617664

665+
#if SINM_USE_SIMD
618666
static void
619667
sinm__simd_greyscale(const uint32_t *in, uint32_t *out, int32_t w, int32_t h, sinm_greyscale_type type)
620668
{
@@ -694,12 +742,16 @@ sinm__simd_greyscale(const uint32_t *in, uint32_t *out, int32_t w, int32_t h, si
694742
sinm__greyscale(offset_in, offset_out, 1, remainder, type);
695743
}
696744
}
745+
#endif
697746

698747
SINM_DEF void
699748
sinm_greyscale(const uint32_t *in, uint32_t *out, int32_t w, int32_t h, sinm_greyscale_type type)
700749
{
701-
int32_t count = w * h;
750+
#if SINM_USE_SIMD
702751
sinm__simd_greyscale(in, out, w, h, type);
752+
#else
753+
sinm__greyscale(in, out, w, h, type);
754+
#endif
703755
}
704756

705757
SINM_DEF int
@@ -729,7 +781,11 @@ sinm_normal_map_buffer(const uint32_t *in,
729781
memcpy(intermediate, out, w * h * sizeof(uint32_t));
730782
}
731783

784+
#if SINM_USE_SIMD
732785
sinm__sobel3x3_normals_simd(intermediate, out, w, h, scale, flipY);
786+
#else
787+
sinm__sobel3x3_normals(intermediate, out, w, h, scale, flipY);
788+
#endif
733789

734790
free(intermediate);
735791
return 1;

0 commit comments

Comments
 (0)