OpenShot Library | libopenshot-audio  0.2.0
juce_FloatVectorOperations.cpp
1 /*
2  ==============================================================================
3 
4  This file is part of the JUCE library.
5  Copyright (c) 2017 - ROLI Ltd.
6 
7  JUCE is an open source library subject to commercial or open-source
8  licensing.
9 
10  The code included in this file is provided under the terms of the ISC license
11  http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12  To use, copy, modify, and/or distribute this software for any purpose with or
13  without fee is hereby granted provided that the above copyright notice and
14  this permission notice appear in all copies.
15 
16  JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17  EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18  DISCLAIMED.
19 
20  ==============================================================================
21 */
22 
23 namespace juce
24 {
25 
26 namespace FloatVectorHelpers
27 {
28  #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
29  #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
30  #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
31 
32  #if JUCE_USE_SSE_INTRINSICS
33  inline static bool isAligned (const void* p) noexcept
34  {
35  return (((pointer_sized_int) p) & 15) == 0;
36  }
37 
38  struct BasicOps32
39  {
40  using Type = float;
41  using ParallelType = __m128;
42  using IntegerType = __m128;
43  enum { numParallel = 4 };
44 
45  // Integer and parallel types are the same for SSE. On neon they have different types
46  static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
47  static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
48 
49  static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
50  static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
51  static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
52  static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
53  static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
54 
55  static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
56  static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
57  static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
58  static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
59  static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
60 
61  static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
62  static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
63  static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
64  static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
65 
66  static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
67  static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
68  };
69 
70  struct BasicOps64
71  {
72  using Type = double;
73  using ParallelType = __m128d;
74  using IntegerType = __m128d;
75  enum { numParallel = 2 };
76 
77  // Integer and parallel types are the same for SSE. On neon they have different types
78  static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
79  static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
80 
81  static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
82  static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
83  static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
84  static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
85  static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
86 
87  static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
88  static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
89  static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
90  static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
91  static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
92 
93  static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
94  static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
95  static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
96  static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
97 
98  static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
99  static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
100  };
101 
102 
103 
104  #define JUCE_BEGIN_VEC_OP \
105  using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
106  { \
107  const int numLongOps = num / Mode::numParallel;
108 
109  #define JUCE_FINISH_VEC_OP(normalOp) \
110  num &= (Mode::numParallel - 1); \
111  if (num == 0) return; \
112  } \
113  for (int i = 0; i < num; ++i) normalOp;
114 
115  #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
116  JUCE_BEGIN_VEC_OP \
117  setupOp \
118  if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
119  else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
120  JUCE_FINISH_VEC_OP (normalOp)
121 
122  #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
123  JUCE_BEGIN_VEC_OP \
124  setupOp \
125  if (FloatVectorHelpers::isAligned (dest)) \
126  { \
127  if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
128  else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
129  }\
130  else \
131  { \
132  if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
133  else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
134  } \
135  JUCE_FINISH_VEC_OP (normalOp)
136 
137  #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
138  JUCE_BEGIN_VEC_OP \
139  setupOp \
140  if (FloatVectorHelpers::isAligned (dest)) \
141  { \
142  if (FloatVectorHelpers::isAligned (src1)) \
143  { \
144  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
145  else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
146  } \
147  else \
148  { \
149  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
150  else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
151  } \
152  } \
153  else \
154  { \
155  if (FloatVectorHelpers::isAligned (src1)) \
156  { \
157  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
158  else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
159  } \
160  else \
161  { \
162  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
163  else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
164  } \
165  } \
166  JUCE_FINISH_VEC_OP (normalOp)
167 
168  #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
169  JUCE_BEGIN_VEC_OP \
170  setupOp \
171  if (FloatVectorHelpers::isAligned (dest)) \
172  { \
173  if (FloatVectorHelpers::isAligned (src1)) \
174  { \
175  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
176  else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
177  } \
178  else \
179  { \
180  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
181  else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
182  } \
183  } \
184  else \
185  { \
186  if (FloatVectorHelpers::isAligned (src1)) \
187  { \
188  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
189  else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
190  } \
191  else \
192  { \
193  if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
194  else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
195  } \
196  } \
197  JUCE_FINISH_VEC_OP (normalOp)
198 
199 
200  //==============================================================================
201  #elif JUCE_USE_ARM_NEON
202 
203  struct BasicOps32
204  {
205  using Type = float;
206  using ParallelType = float32x4_t;
207  using IntegerType = uint32x4_t;
208  union signMaskUnion { ParallelType f; IntegerType i; };
209  enum { numParallel = 4 };
210 
211  static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
212  static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
213 
214  static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
215  static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
216  static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
217  static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
218  static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
219 
220  static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
221  static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
222  static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
223  static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
224  static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
225 
226  static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
227  static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
228  static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
229  static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
230 
231  static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
232  static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
233  };
234 
235  struct BasicOps64
236  {
237  using Type = double;
238  using ParallelType = double;
239  using IntegerType = uint64;
240  union signMaskUnion { ParallelType f; IntegerType i; };
241  enum { numParallel = 1 };
242 
243  static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
244  static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
245 
246  static forcedinline ParallelType load1 (Type v) noexcept { return v; }
247  static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
248  static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
249  static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
250  static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
251 
252  static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
253  static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
254  static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
255  static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
256  static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
257 
258  static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
259  static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
260  static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
261  static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
262 
263  static forcedinline Type max (ParallelType a) noexcept { return a; }
264  static forcedinline Type min (ParallelType a) noexcept { return a; }
265  };
266 
267  #define JUCE_BEGIN_VEC_OP \
268  using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
269  if (Mode::numParallel > 1) \
270  { \
271  const int numLongOps = num / Mode::numParallel;
272 
273  #define JUCE_FINISH_VEC_OP(normalOp) \
274  num &= (Mode::numParallel - 1); \
275  if (num == 0) return; \
276  } \
277  for (int i = 0; i < num; ++i) normalOp;
278 
279  #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
280  JUCE_BEGIN_VEC_OP \
281  setupOp \
282  JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
283  JUCE_FINISH_VEC_OP (normalOp)
284 
285  #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
286  JUCE_BEGIN_VEC_OP \
287  setupOp \
288  JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
289  JUCE_FINISH_VEC_OP (normalOp)
290 
291  #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
292  JUCE_BEGIN_VEC_OP \
293  setupOp \
294  JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
295  JUCE_FINISH_VEC_OP (normalOp)
296 
297  #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
298  JUCE_BEGIN_VEC_OP \
299  setupOp \
300  JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
301  JUCE_FINISH_VEC_OP (normalOp)
302 
303 
304  //==============================================================================
305  #else
306  #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
307  for (int i = 0; i < num; ++i) normalOp;
308 
309  #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
310  for (int i = 0; i < num; ++i) normalOp;
311 
312  #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
313  for (int i = 0; i < num; ++i) normalOp;
314 
315  #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
316  for (int i = 0; i < num; ++i) normalOp;
317 
318  #endif
319 
320  //==============================================================================
321  #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
322  for (int i = 0; i < numLongOps; ++i) \
323  { \
324  locals (srcLoad, dstLoad); \
325  dstStore (dest, vecOp); \
326  increment; \
327  }
328 
329  #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
330  for (int i = 0; i < numLongOps; ++i) \
331  { \
332  locals (src1Load, src2Load); \
333  dstStore (dest, vecOp); \
334  increment; \
335  }
336 
337  #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
338  for (int i = 0; i < numLongOps; ++i) \
339  { \
340  locals (src1Load, src2Load, dstLoad); \
341  dstStore (dest, vecOp); \
342  increment; \
343  }
344 
345  #define JUCE_LOAD_NONE(srcLoad, dstLoad)
346  #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
347  #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
348  #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
349  #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
350  #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
351 
352  union signMask32 { float f; uint32 i; };
353  union signMask64 { double d; uint64 i; };
354 
355  #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
356  template<int typeSize> struct ModeType { using Mode = BasicOps32; };
357  template<> struct ModeType<8> { using Mode = BasicOps64; };
358 
359  template <typename Mode>
360  struct MinMax
361  {
362  using Type = typename Mode::Type;
363  using ParallelType = typename Mode::ParallelType;
364 
365  static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
366  {
367  int numLongOps = num / Mode::numParallel;
368 
369  if (numLongOps > 1)
370  {
371  ParallelType val;
372 
373  #if ! JUCE_USE_ARM_NEON
374  if (isAligned (src))
375  {
376  val = Mode::loadA (src);
377 
378  if (isMinimum)
379  {
380  while (--numLongOps > 0)
381  {
382  src += Mode::numParallel;
383  val = Mode::min (val, Mode::loadA (src));
384  }
385  }
386  else
387  {
388  while (--numLongOps > 0)
389  {
390  src += Mode::numParallel;
391  val = Mode::max (val, Mode::loadA (src));
392  }
393  }
394  }
395  else
396  #endif
397  {
398  val = Mode::loadU (src);
399 
400  if (isMinimum)
401  {
402  while (--numLongOps > 0)
403  {
404  src += Mode::numParallel;
405  val = Mode::min (val, Mode::loadU (src));
406  }
407  }
408  else
409  {
410  while (--numLongOps > 0)
411  {
412  src += Mode::numParallel;
413  val = Mode::max (val, Mode::loadU (src));
414  }
415  }
416  }
417 
418  Type result = isMinimum ? Mode::min (val)
419  : Mode::max (val);
420 
421  num &= (Mode::numParallel - 1);
422  src += Mode::numParallel;
423 
424  for (int i = 0; i < num; ++i)
425  result = isMinimum ? jmin (result, src[i])
426  : jmax (result, src[i]);
427 
428  return result;
429  }
430 
431  return isMinimum ? juce::findMinimum (src, num)
432  : juce::findMaximum (src, num);
433  }
434 
435  static Range<Type> findMinAndMax (const Type* src, int num) noexcept
436  {
437  int numLongOps = num / Mode::numParallel;
438 
439  if (numLongOps > 1)
440  {
441  ParallelType mn, mx;
442 
443  #if ! JUCE_USE_ARM_NEON
444  if (isAligned (src))
445  {
446  mn = Mode::loadA (src);
447  mx = mn;
448 
449  while (--numLongOps > 0)
450  {
451  src += Mode::numParallel;
452  const ParallelType v = Mode::loadA (src);
453  mn = Mode::min (mn, v);
454  mx = Mode::max (mx, v);
455  }
456  }
457  else
458  #endif
459  {
460  mn = Mode::loadU (src);
461  mx = mn;
462 
463  while (--numLongOps > 0)
464  {
465  src += Mode::numParallel;
466  const ParallelType v = Mode::loadU (src);
467  mn = Mode::min (mn, v);
468  mx = Mode::max (mx, v);
469  }
470  }
471 
472  Range<Type> result (Mode::min (mn),
473  Mode::max (mx));
474 
475  num &= (Mode::numParallel - 1);
476  src += Mode::numParallel;
477 
478  for (int i = 0; i < num; ++i)
479  result = result.getUnionWith (src[i]);
480 
481  return result;
482  }
483 
484  return Range<Type>::findMinAndMax (src, num);
485  }
486  };
487  #endif
488 }
489 
490 //==============================================================================
491 namespace
492 {
493  #if JUCE_USE_VDSP_FRAMEWORK
494  // This casts away constness to account for slightly different vDSP function signatures
495  // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
496  template <typename ValueType>
497  ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
498  #endif
499 }
500 
501 //==============================================================================
502 void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
503 {
504  #if JUCE_USE_VDSP_FRAMEWORK
505  vDSP_vclr (dest, 1, (size_t) num);
506  #else
507  zeromem (dest, (size_t) num * sizeof (float));
508  #endif
509 }
510 
511 void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
512 {
513  #if JUCE_USE_VDSP_FRAMEWORK
514  vDSP_vclrD (dest, 1, (size_t) num);
515  #else
516  zeromem (dest, (size_t) num * sizeof (double));
517  #endif
518 }
519 
520 void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
521 {
522  #if JUCE_USE_VDSP_FRAMEWORK
523  vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
524  #else
525  JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
526  const Mode::ParallelType val = Mode::load1 (valueToFill);)
527  #endif
528 }
529 
530 void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
531 {
532  #if JUCE_USE_VDSP_FRAMEWORK
533  vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
534  #else
535  JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
536  const Mode::ParallelType val = Mode::load1 (valueToFill);)
537  #endif
538 }
539 
540 void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
541 {
542  memcpy (dest, src, (size_t) num * sizeof (float));
543 }
544 
545 void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
546 {
547  memcpy (dest, src, (size_t) num * sizeof (double));
548 }
549 
550 void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
551 {
552  #if JUCE_USE_VDSP_FRAMEWORK
553  vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
554  #else
555  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
556  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
557  const Mode::ParallelType mult = Mode::load1 (multiplier);)
558  #endif
559 }
560 
561 void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
562 {
563  #if JUCE_USE_VDSP_FRAMEWORK
564  vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
565  #else
566  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
567  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
568  const Mode::ParallelType mult = Mode::load1 (multiplier);)
569  #endif
570 }
571 
572 void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
573 {
574  #if JUCE_USE_VDSP_FRAMEWORK
575  vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
576  #else
577  JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
578  const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
579  #endif
580 }
581 
582 void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
583 {
584  JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
585  const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
586 }
587 
588 void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
589 {
590  #if JUCE_USE_VDSP_FRAMEWORK
591  vDSP_vsadd (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
592  #else
593  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
594  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
595  const Mode::ParallelType am = Mode::load1 (amount);)
596  #endif
597 }
598 
599 void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
600 {
601  #if JUCE_USE_VDSP_FRAMEWORK
602  vDSP_vsaddD (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
603  #else
604  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
605  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
606  const Mode::ParallelType am = Mode::load1 (amount);)
607  #endif
608 }
609 
610 void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
611 {
612  #if JUCE_USE_VDSP_FRAMEWORK
613  vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
614  #else
615  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
616  #endif
617 }
618 
619 void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
620 {
621  #if JUCE_USE_VDSP_FRAMEWORK
622  vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
623  #else
624  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
625  #endif
626 }
627 
628 void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
629 {
630  #if JUCE_USE_VDSP_FRAMEWORK
631  vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
632  #else
633  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
634  #endif
635 }
636 
637 void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
638 {
639  #if JUCE_USE_VDSP_FRAMEWORK
640  vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
641  #else
642  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
643  #endif
644 }
645 
646 void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
647 {
648  #if JUCE_USE_VDSP_FRAMEWORK
649  vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
650  #else
651  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
652  #endif
653 }
654 
655 void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
656 {
657  #if JUCE_USE_VDSP_FRAMEWORK
658  vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
659  #else
660  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
661  #endif
662 }
663 
664 void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
665 {
666  #if JUCE_USE_VDSP_FRAMEWORK
667  vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
668  #else
669  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
670  #endif
671 }
672 
673 void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
674 {
675  #if JUCE_USE_VDSP_FRAMEWORK
676  vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
677  #else
678  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
679  #endif
680 }
681 
682 void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
683 {
684  #if JUCE_USE_VDSP_FRAMEWORK
685  vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
686  #else
687  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
688  JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
689  const Mode::ParallelType mult = Mode::load1 (multiplier);)
690  #endif
691 }
692 
693 void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
694 {
695  #if JUCE_USE_VDSP_FRAMEWORK
696  vDSP_vsmaD (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
697  #else
698  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
699  JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
700  const Mode::ParallelType mult = Mode::load1 (multiplier);)
701  #endif
702 }
703 
704 void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
705 {
706  #if JUCE_USE_VDSP_FRAMEWORK
707  vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
708  #else
709  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
710  JUCE_LOAD_SRC1_SRC2_DEST,
711  JUCE_INCREMENT_SRC1_SRC2_DEST, )
712  #endif
713 }
714 
715 void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
716 {
717  #if JUCE_USE_VDSP_FRAMEWORK
718  vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
719  #else
720  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
721  JUCE_LOAD_SRC1_SRC2_DEST,
722  JUCE_INCREMENT_SRC1_SRC2_DEST, )
723  #endif
724 }
725 
726 void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
727 {
728  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
729  JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
730  const Mode::ParallelType mult = Mode::load1 (multiplier);)
731 }
732 
733 void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
734 {
735  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
736  JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
737  const Mode::ParallelType mult = Mode::load1 (multiplier);)
738 }
739 
740 void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
741 {
742  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
743  JUCE_LOAD_SRC1_SRC2_DEST,
744  JUCE_INCREMENT_SRC1_SRC2_DEST, )
745 }
746 
747 void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
748 {
749  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
750  JUCE_LOAD_SRC1_SRC2_DEST,
751  JUCE_INCREMENT_SRC1_SRC2_DEST, )
752 }
753 
754 void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
755 {
756  #if JUCE_USE_VDSP_FRAMEWORK
757  vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
758  #else
759  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
760  #endif
761 }
762 
763 void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
764 {
765  #if JUCE_USE_VDSP_FRAMEWORK
766  vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
767  #else
768  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
769  #endif
770 }
771 
772 void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
773 {
774  #if JUCE_USE_VDSP_FRAMEWORK
775  vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
776  #else
777  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
778  #endif
779 }
780 
781 void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
782 {
783  #if JUCE_USE_VDSP_FRAMEWORK
784  vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
785  #else
786  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
787  #endif
788 }
789 
790 void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
791 {
792  #if JUCE_USE_VDSP_FRAMEWORK
793  vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
794  #else
795  JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
796  const Mode::ParallelType mult = Mode::load1 (multiplier);)
797  #endif
798 }
799 
800 void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
801 {
802  #if JUCE_USE_VDSP_FRAMEWORK
803  vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
804  #else
805  JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
806  const Mode::ParallelType mult = Mode::load1 (multiplier);)
807  #endif
808 }
809 
810 void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
811 {
812  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
813  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
814  const Mode::ParallelType mult = Mode::load1 (multiplier);)
815 }
816 
817 void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
818 {
819  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
820  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
821  const Mode::ParallelType mult = Mode::load1 (multiplier);)
822 }
823 
824 void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
825 {
826  #if JUCE_USE_VDSP_FRAMEWORK
827  vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
828  #else
829  copyWithMultiply (dest, src, -1.0f, num);
830  #endif
831 }
832 
833 void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
834 {
835  #if JUCE_USE_VDSP_FRAMEWORK
836  vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
837  #else
838  copyWithMultiply (dest, src, -1.0f, num);
839  #endif
840 }
841 
842 void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
843 {
844  #if JUCE_USE_VDSP_FRAMEWORK
845  vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
846  #else
848  signMask.i = 0x7fffffffUL;
849  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]), Mode::bit_and (s, mask),
850  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
851  const Mode::ParallelType mask = Mode::load1 (signMask.f);)
852 
853  ignoreUnused (signMask);
854  #endif
855 }
856 
857 void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
858 {
859  #if JUCE_USE_VDSP_FRAMEWORK
860  vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
861  #else
863  signMask.i = 0x7fffffffffffffffULL;
864 
865  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]), Mode::bit_and (s, mask),
866  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
867  const Mode::ParallelType mask = Mode::load1 (signMask.d);)
868 
869  ignoreUnused (signMask);
870  #endif
871 }
872 
873 void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
874 {
875  #if JUCE_USE_ARM_NEON
876  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
877  vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
878  JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
879  #else
880  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
881  Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast<const __m128i*> (src)))),
882  JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
883  const Mode::ParallelType mult = Mode::load1 (multiplier);)
884  #endif
885 }
886 
887 void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
888 {
889  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
890  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
891  const Mode::ParallelType cmp = Mode::load1 (comp);)
892 }
893 
894 void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
895 {
896  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
897  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
898  const Mode::ParallelType cmp = Mode::load1 (comp);)
899 }
900 
901 void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
902 {
903  #if JUCE_USE_VDSP_FRAMEWORK
904  vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
905  #else
906  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
907  #endif
908 }
909 
910 void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
911 {
912  #if JUCE_USE_VDSP_FRAMEWORK
913  vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
914  #else
915  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
916  #endif
917 }
918 
919 void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
920 {
921  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
922  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
923  const Mode::ParallelType cmp = Mode::load1 (comp);)
924 }
925 
926 void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
927 {
928  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
929  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
930  const Mode::ParallelType cmp = Mode::load1 (comp);)
931 }
932 
933 void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
934 {
935  #if JUCE_USE_VDSP_FRAMEWORK
936  vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
937  #else
938  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
939  #endif
940 }
941 
942 void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
943 {
944  #if JUCE_USE_VDSP_FRAMEWORK
945  vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
946  #else
947  JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
948  #endif
949 }
950 
951 void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
952 {
953  jassert(high >= low);
954 
955  #if JUCE_USE_VDSP_FRAMEWORK
956  vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
957  #else
958  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
959  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
960  const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
961  #endif
962 }
963 
964 void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
965 {
966  jassert(high >= low);
967 
968  #if JUCE_USE_VDSP_FRAMEWORK
969  vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
970  #else
971  JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
972  JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
973  const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
974  #endif
975 }
976 
977 Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
978 {
979  #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
980  return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
981  #else
982  return Range<float>::findMinAndMax (src, num);
983  #endif
984 }
985 
986 Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
987 {
988  #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
989  return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
990  #else
991  return Range<double>::findMinAndMax (src, num);
992  #endif
993 }
994 
995 float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
996 {
997  #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
998  return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
999  #else
1000  return juce::findMinimum (src, num);
1001  #endif
1002 }
1003 
1004 double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
1005 {
1006  #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1007  return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
1008  #else
1009  return juce::findMinimum (src, num);
1010  #endif
1011 }
1012 
1013 float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
1014 {
1015  #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1016  return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
1017  #else
1018  return juce::findMaximum (src, num);
1019  #endif
1020 }
1021 
1022 double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
1023 {
1024  #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1025  return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
1026  #else
1027  return juce::findMaximum (src, num);
1028  #endif
1029 }
1030 
1031 intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept
1032 {
1033  intptr_t fpsr = 0;
1034  #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
1035  fpsr = static_cast<intptr_t> (_mm_getcsr());
1036  #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
1037  #if defined (__arm64__) || defined (__aarch64__)
1038  asm volatile("mrs %0, fpcr" : "=r" (fpsr));
1039  #elif JUCE_USE_ARM_NEON
1040  asm volatile("vmrs %0, fpscr" : "=r" (fpsr));
1041  #endif
1042  #else
1043  #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1044  jassertfalse; // No support for getting the floating point status register for your platform
1045  #endif
1046  #endif
1047 
1048  return fpsr;
1049 }
1050 
1051 void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister (intptr_t fpsr) noexcept
1052 {
1053  #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
1054  auto fpsr_w = static_cast<uint32_t> (fpsr);
1055  _mm_setcsr (fpsr_w);
1056  #elif defined (__arm64__) || defined (__aarch64__) || JUCE_USE_ARM_NEON
1057  #if defined (__arm64__) || defined (__aarch64__)
1058  asm volatile("msr fpcr, %0" : : "ri" (fpsr));
1059  #elif JUCE_USE_ARM_NEON
1060  asm volatile("vmsr fpscr, %0" : : "ri" (fpsr));
1061  #endif
1062  #else
1063  #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1064  jassertfalse; // No support for getting the floating point status register for your platform
1065  #endif
1066  ignoreUnused (fpsr);
1067  #endif
1068 }
1069 
1070 void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
1071 {
1072  #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1073  #if JUCE_USE_SSE_INTRINSICS
1074  intptr_t mask = _MM_FLUSH_ZERO_MASK;
1075  #else /*JUCE_USE_ARM_NEON*/
1076  intptr_t mask = (1 << 24 /* FZ */);
1077  #endif
1078  setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0));
1079  #else
1080  #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1081  jassertfalse; // No support for flush to zero mode on your platform
1082  #endif
1083  ignoreUnused (shouldEnable);
1084  #endif
1085 }
1086 
1087 void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport (bool shouldDisable) noexcept
1088 {
1089  #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1090  #if JUCE_USE_SSE_INTRINSICS
1091  intptr_t mask = 0x8040;
1092  #else /*JUCE_USE_ARM_NEON*/
1093  intptr_t mask = (1 << 24 /* FZ */);
1094  #endif
1095 
1096  setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldDisable ? mask : 0));
1097  #else
1098  ignoreUnused (shouldDisable);
1099 
1100  #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1101  jassertfalse; // No support for disable denormals mode on your platform
1102  #endif
1103  #endif
1104 }
1105 
1106 bool JUCE_CALLTYPE FloatVectorOperations::areDenormalsDisabled() noexcept
1107 {
1108  #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1109  #if JUCE_USE_SSE_INTRINSICS
1110  intptr_t mask = 0x8040;
1111  #else /*JUCE_USE_ARM_NEON*/
1112  intptr_t mask = (1 << 24 /* FZ */);
1113  #endif
1114 
1115  return ((getFpStatusRegister() & mask) == mask);
1116  #else
1117  return false;
1118  #endif
1119 }
1120 
1121 ScopedNoDenormals::ScopedNoDenormals() noexcept
1122 {
1123  #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1124  #if JUCE_USE_SSE_INTRINSICS
1125  intptr_t mask = 0x8040;
1126  #else /*JUCE_USE_ARM_NEON*/
1127  intptr_t mask = (1 << 24 /* FZ */);
1128  #endif
1129 
1130  fpsr = FloatVectorOperations::getFpStatusRegister();
1131  FloatVectorOperations::setFpStatusRegister (fpsr | mask);
1132  #endif
1133 }
1134 
1135 ScopedNoDenormals::~ScopedNoDenormals() noexcept
1136 {
1137  #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || defined (__arm64__) || defined (__aarch64__))
1138  FloatVectorOperations::setFpStatusRegister (fpsr);
1139  #endif
1140 }
1141 
1142 //==============================================================================
1143 //==============================================================================
1144 #if JUCE_UNIT_TESTS
1145 
1146 class FloatVectorOperationsTests : public UnitTest
1147 {
1148 public:
1149  FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations", "Audio") {}
1150 
1151  template <typename ValueType>
1152  struct TestRunner
1153  {
1154  static void runTest (UnitTest& u, Random random)
1155  {
1156  const int range = random.nextBool() ? 500 : 10;
1157  const int num = random.nextInt (range) + 1;
1158 
1159  HeapBlock<ValueType> buffer1 (num + 16), buffer2 (num + 16);
1160  HeapBlock<int> buffer3 (num + 16);
1161 
1162  #if JUCE_ARM
1163  ValueType* const data1 = buffer1;
1164  ValueType* const data2 = buffer2;
1165  int* const int1 = buffer3;
1166  #else
1167  // These tests deliberately operate on misaligned memory and will be flagged up by
1168  // checks for undefined behavior!
1169  ValueType* const data1 = addBytesToPointer (buffer1.get(), random.nextInt (16));
1170  ValueType* const data2 = addBytesToPointer (buffer2.get(), random.nextInt (16));
1171  int* const int1 = addBytesToPointer (buffer3.get(), random.nextInt (16));
1172  #endif
1173 
1174  fillRandomly (random, data1, num);
1175  fillRandomly (random, data2, num);
1176 
1178  Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
1179  u.expect (minMax1 == minMax2);
1180 
1181  u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
1182  u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
1183 
1184  u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
1185  u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
1186 
1187  FloatVectorOperations::clear (data1, num);
1188  u.expect (areAllValuesEqual (data1, num, 0));
1189 
1190  FloatVectorOperations::fill (data1, (ValueType) 2, num);
1191  u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
1192 
1193  FloatVectorOperations::add (data1, (ValueType) 2, num);
1194  u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
1195 
1196  FloatVectorOperations::copy (data2, data1, num);
1197  u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
1198 
1199  FloatVectorOperations::add (data2, data1, num);
1200  u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
1201 
1202  FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
1203  u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
1204 
1205  FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
1206  u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
1207 
1208  FloatVectorOperations::multiply (data1, (ValueType) 2, num);
1209  u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
1210 
1211  FloatVectorOperations::multiply (data1, data2, num);
1212  u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
1213 
1214  FloatVectorOperations::negate (data2, data1, num);
1215  u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
1216 
1217  FloatVectorOperations::subtract (data1, data2, num);
1218  u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
1219 
1220  FloatVectorOperations::abs (data1, data2, num);
1221  u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
1222 
1223  FloatVectorOperations::abs (data2, data1, num);
1224  u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
1225 
1226  fillRandomly (random, int1, num);
1227  doConversionTest (u, data1, data2, int1, num);
1228 
1229  FloatVectorOperations::fill (data1, (ValueType) 2, num);
1230  FloatVectorOperations::fill (data2, (ValueType) 3, num);
1231  FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
1232  u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
1233  }
1234 
1235  static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
1236  {
1237  FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
1238  convertFixed (data2, int1, 2.0f, num);
1239  u.expect (buffersMatch (data1, data2, num));
1240  }
1241 
1242  static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
1243 
1244  static void fillRandomly (Random& random, ValueType* d, int num)
1245  {
1246  while (--num >= 0)
1247  *d++ = (ValueType) (random.nextDouble() * 1000.0);
1248  }
1249 
1250  static void fillRandomly (Random& random, int* d, int num)
1251  {
1252  while (--num >= 0)
1253  *d++ = random.nextInt();
1254  }
1255 
1256  static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
1257  {
1258  while (--num >= 0)
1259  *d++ = (float) *s++ * multiplier;
1260  }
1261 
1262  static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
1263  {
1264  while (--num >= 0)
1265  if (*d++ != target)
1266  return false;
1267 
1268  return true;
1269  }
1270 
1271  static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
1272  {
1273  while (--num >= 0)
1274  if (! valuesMatch (*d1++, *d2++))
1275  return false;
1276 
1277  return true;
1278  }
1279 
1280  static bool valuesMatch (ValueType v1, ValueType v2)
1281  {
1282  return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
1283  }
1284  };
1285 
1286  void runTest() override
1287  {
1288  beginTest ("FloatVectorOperations");
1289 
1290  for (int i = 1000; --i >= 0;)
1291  {
1292  TestRunner<float>::runTest (*this, getRandom());
1293  TestRunner<double>::runTest (*this, getRandom());
1294  }
1295  }
1296 };
1297 
1298 static FloatVectorOperationsTests vectorOpTests;
1299 
1300 #endif
1301 
1302 } // namespace juce
static bool JUCE_CALLTYPE areDenormalsDisabled() noexcept
This method returns true if denormals are currently disabled.
static void JUCE_CALLTYPE clip(float *dest, const float *src, float low, float high, int num) noexcept
Each element of dest is calculated by hard clipping the corresponding src element so that it is in th...
JUCE_CONSTEXPR Range getUnionWith(Range other) const noexcept
Returns the smallest range that contains both this one and the other one.
Definition: juce_Range.h:245
static void JUCE_CALLTYPE add(float *dest, float amountToAdd, int numValues) noexcept
Adds a fixed value to the destination values.
int nextInt() noexcept
Returns the next random 32 bit integer.
Definition: juce_Random.cpp:78
static void JUCE_CALLTYPE copy(float *dest, const float *src, int numValues) noexcept
Copies a vector of floats.
bool nextBool() noexcept
Returns the next random boolean value.
Very simple container class to hold a pointer to some data on the heap.
static void JUCE_CALLTYPE fill(float *dest, float valueToFill, int numValues) noexcept
Copies a repeated value into a vector of floats.
static void JUCE_CALLTYPE multiply(float *dest, const float *src, int numValues) noexcept
Multiplies the destination values by the source values.
static void JUCE_CALLTYPE convertFixedToFloat(float *dest, const int *src, float multiplier, int numValues) noexcept
Converts a stream of integers to floats, multiplying each one by the given multiplier.
ElementType * get() const noexcept
Returns a raw pointer to the allocated data.
static void JUCE_CALLTYPE abs(float *dest, const float *src, int numValues) noexcept
Copies a source vector to a destination, taking the absolute of each value.
This is a base class for classes that perform a unit test.
Definition: juce_UnitTest.h:73
static Range< float > JUCE_CALLTYPE findMinAndMax(const float *src, int numValues) noexcept
Finds the minimum and maximum values in the given array.
static void JUCE_CALLTYPE subtract(float *dest, const float *src, int numValues) noexcept
Subtracts the source values from the destination values.
double nextDouble() noexcept
Returns the next random floating-point number.
static void JUCE_CALLTYPE clear(float *dest, int numValues) noexcept
Clears a vector of floats.
static void JUCE_CALLTYPE subtractWithMultiply(float *dest, const float *src, float multiplier, int numValues) noexcept
Multiplies each source value by the given multiplier, then subtracts it to the destination value...
static void JUCE_CALLTYPE enableFlushToZeroMode(bool shouldEnable) noexcept
This method enables or disables the SSE/NEON flush-to-zero mode.
static void JUCE_CALLTYPE max(float *dest, const float *src, float comp, int num) noexcept
Each element of dest will be the maximum of the corresponding element of the source array and the giv...
static float JUCE_CALLTYPE findMinimum(const float *src, int numValues) noexcept
Finds the minimum value in the given array.
static void JUCE_CALLTYPE negate(float *dest, const float *src, int numValues) noexcept
Copies a source vector to a destination, negating each value.
static Range findMinAndMax(const ValueType *values, int numValues) noexcept
Scans an array of values for its min and max, and returns these as a Range.
Definition: juce_Range.h:277
void expect(bool testResult, const String &failureMessage=String())
Checks that the result of a test is true, and logs this result.
static void JUCE_CALLTYPE copyWithMultiply(float *dest, const float *src, float multiplier, int numValues) noexcept
Copies a vector of floats, multiplying each value by a given multiplier.
static void JUCE_CALLTYPE addWithMultiply(float *dest, const float *src, float multiplier, int numValues) noexcept
Multiplies each source value by the given multiplier, then adds it to the destination value...
static float JUCE_CALLTYPE findMaximum(const float *src, int numValues) noexcept
Finds the maximum value in the given array.
A random number generator.
Definition: juce_Random.h:38
static void JUCE_CALLTYPE min(float *dest, const float *src, float comp, int num) noexcept
Each element of dest will be the minimum of the corresponding element of the source array and the giv...
static void JUCE_CALLTYPE disableDenormalisedNumberSupport(bool shouldDisable=true) noexcept
On Intel CPUs, this method enables the SSE flush-to-zero and denormalised-are-zero modes...