intrin.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17. // Third party copyrights are property of their respective owners.
  18. //
  19. // Redistribution and use in source and binary forms, with or without modification,
  20. // are permitted provided that the following conditions are met:
  21. //
  22. // * Redistribution's of source code must retain the above copyright notice,
  23. // this list of conditions and the following disclaimer.
  24. //
  25. // * Redistribution's in binary form must reproduce the above copyright notice,
  26. // this list of conditions and the following disclaimer in the documentation
  27. // and/or other materials provided with the distribution.
  28. //
  29. // * The name of the copyright holders may not be used to endorse or promote products
  30. // derived from this software without specific prior written permission.
  31. //
  32. // This software is provided by the copyright holders and contributors "as is" and
  33. // any express or implied warranties, including, but not limited to, the implied
  34. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35. // In no event shall the Intel Corporation or contributors be liable for any direct,
  36. // indirect, incidental, special, exemplary, or consequential damages
  37. // (including, but not limited to, procurement of substitute goods or services;
  38. // loss of use, data, or profits; or business interruption) however caused
  39. // and on any theory of liability, whether in contract, strict liability,
  40. // or tort (including negligence or otherwise) arising in any way out of
  41. // the use of this software, even if advised of the possibility of such damage.
  42. //
  43. //M*/
  44. #ifndef OPENCV_HAL_INTRIN_HPP
  45. #define OPENCV_HAL_INTRIN_HPP
  46. #include <cmath>
  47. #include <float.h>
  48. #include <stdlib.h>
  49. #include "opencv2/core/cvdef.h"
  50. #define OPENCV_HAL_ADD(a, b) ((a) + (b))
  51. #define OPENCV_HAL_AND(a, b) ((a) & (b))
  52. #define OPENCV_HAL_NOP(a) (a)
  53. #define OPENCV_HAL_1ST(a, b) (a)
  54. namespace {
  55. inline unsigned int trailingZeros32(unsigned int value) {
  56. #if defined(_MSC_VER)
  57. #if (_MSC_VER < 1700) || defined(_M_ARM)
  58. unsigned long index = 0;
  59. _BitScanForward(&index, value);
  60. return (unsigned int)index;
  61. #elif defined(__clang__)
  62. // clang-cl doesn't export _tzcnt_u32 for non BMI systems
  63. return value ? __builtin_ctz(value) : 32;
  64. #else
  65. return _tzcnt_u32(value);
  66. #endif
  67. #elif defined(__GNUC__) || defined(__GNUG__)
  68. return __builtin_ctz(value);
  69. #elif defined(__ICC) || defined(__INTEL_COMPILER)
  70. return _bit_scan_forward(value);
  71. #elif defined(__clang__)
  72. return llvm.cttz.i32(value, true);
  73. #else
  74. static const int MultiplyDeBruijnBitPosition[32] = {
  75. 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
  76. 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
  77. return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
  78. #endif
  79. }
  80. }
  81. // unlike HAL API, which is in cv::hal,
  82. // we put intrinsics into cv namespace to make its
  83. // access from within opencv code more accessible
  84. namespace cv {
  85. namespace hal {
  86. enum StoreMode
  87. {
  88. STORE_UNALIGNED = 0,
  89. STORE_ALIGNED = 1,
  90. STORE_ALIGNED_NOCACHE = 2
  91. };
  92. }
  93. template<typename _Tp> struct V_TypeTraits
  94. {
  95. };
  96. #define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \
  97. template<> struct V_TypeTraits<type> \
  98. { \
  99. typedef type value_type; \
  100. typedef int_type_ int_type; \
  101. typedef abs_type_ abs_type; \
  102. typedef uint_type_ uint_type; \
  103. typedef w_type_ w_type; \
  104. typedef q_type_ q_type; \
  105. typedef sum_type_ sum_type; \
  106. enum { nlanes128 = nlanes128_ }; \
  107. \
  108. static inline int_type reinterpret_int(type x) \
  109. { \
  110. union { type l; int_type i; } v; \
  111. v.l = x; \
  112. return v.i; \
  113. } \
  114. \
  115. static inline type reinterpret_from_int(int_type x) \
  116. { \
  117. union { type l; int_type i; } v; \
  118. v.i = x; \
  119. return v.l; \
  120. } \
  121. }
  122. CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16);
  123. CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16);
  124. CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8);
  125. CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8);
  126. CV_INTRIN_DEF_TYPE_TRAITS(unsigned, int, unsigned, unsigned, uint64, void, unsigned, 4);
  127. CV_INTRIN_DEF_TYPE_TRAITS(int, int, unsigned, unsigned, int64, void, int, 4);
  128. CV_INTRIN_DEF_TYPE_TRAITS(float, int, unsigned, float, double, void, float, 4);
  129. CV_INTRIN_DEF_TYPE_TRAITS(uint64, int64, uint64, uint64, void, void, uint64, 2);
  130. CV_INTRIN_DEF_TYPE_TRAITS(int64, int64, uint64, uint64, void, void, int64, 2);
  131. CV_INTRIN_DEF_TYPE_TRAITS(double, int64, uint64, double, void, void, double, 2);
  132. #ifndef CV_DOXYGEN
  133. #ifdef CV_CPU_DISPATCH_MODE
  134. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
  135. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
  136. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  137. #else
  138. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
  139. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
  140. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  141. #endif
  142. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  143. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  144. using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
  145. #endif
  146. }
  147. #ifdef CV_DOXYGEN
  148. # undef CV_AVX2
  149. # undef CV_SSE2
  150. # undef CV_NEON
  151. # undef CV_VSX
  152. # undef CV_FP16
  153. #endif
  154. #if CV_SSE2 || CV_NEON || CV_VSX
  155. #define CV__SIMD_FORWARD 128
  156. #include "opencv2/core/hal/intrin_forward.hpp"
  157. #endif
  158. #if CV_SSE2
  159. #include "opencv2/core/hal/intrin_sse_em.hpp"
  160. #include "opencv2/core/hal/intrin_sse.hpp"
  161. #elif CV_NEON
  162. #include "opencv2/core/hal/intrin_neon.hpp"
  163. #elif CV_VSX
  164. #include "opencv2/core/hal/intrin_vsx.hpp"
  165. #else
  166. #define CV_SIMD128_CPP 1
  167. #include "opencv2/core/hal/intrin_cpp.hpp"
  168. #endif
  169. // AVX2 can be used together with SSE2, so
  170. // we define those two sets of intrinsics at once.
  171. // Most of the intrinsics do not conflict (the proper overloaded variant is
  172. // resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
  173. // but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
  174. // Correspondingly, the wide intrinsics (which are mapped to the "widest"
  175. // available instruction set) will get vx_ prefix
  176. // (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
  177. #if CV_AVX2
  178. #define CV__SIMD_FORWARD 256
  179. #include "opencv2/core/hal/intrin_forward.hpp"
  180. #include "opencv2/core/hal/intrin_avx.hpp"
  181. #endif
  182. // AVX512 can be used together with SSE2 and AVX2, so
  183. // we define those sets of intrinsics at once.
  184. // For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
  185. // Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
  186. #if CV_AVX512_SKX
  187. #define CV__SIMD_FORWARD 512
  188. #include "opencv2/core/hal/intrin_forward.hpp"
  189. #include "opencv2/core/hal/intrin_avx512.hpp"
  190. #endif
  191. //! @cond IGNORED
  192. namespace cv {
  193. #ifndef CV_DOXYGEN
  194. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  195. #endif
  196. #ifndef CV_SIMD128
  197. #define CV_SIMD128 0
  198. #endif
  199. #ifndef CV_SIMD128_64F
  200. #define CV_SIMD128_64F 0
  201. #endif
  202. #ifndef CV_SIMD256
  203. #define CV_SIMD256 0
  204. #endif
  205. #ifndef CV_SIMD256_64F
  206. #define CV_SIMD256_64F 0
  207. #endif
  208. #ifndef CV_SIMD512
  209. #define CV_SIMD512 0
  210. #endif
  211. #ifndef CV_SIMD512_64F
  212. #define CV_SIMD512_64F 0
  213. #endif
  214. #ifndef CV_SIMD128_FP16
  215. #define CV_SIMD128_FP16 0
  216. #endif
  217. #ifndef CV_SIMD256_FP16
  218. #define CV_SIMD256_FP16 0
  219. #endif
  220. #ifndef CV_SIMD512_FP16
  221. #define CV_SIMD512_FP16 0
  222. #endif
  223. //==================================================================================================
  224. #define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
  225. inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \
  226. inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
  227. inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
  228. inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
  229. inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
  230. inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
  231. inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
  232. inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \
  233. inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \
  234. inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); }
  235. #define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
  236. inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); }
  237. #define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
  238. inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
  239. #define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
  240. inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
  241. #define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
  242. CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
  243. CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
  244. CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
  245. CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix)
  246. #define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \
  247. CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \
  248. CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \
  249. CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \
  250. CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \
  251. CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \
  252. CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \
  253. CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \
  254. CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \
  255. CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \
  256. CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \
  257. CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \
  258. CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \
  259. CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \
  260. CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \
  261. CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \
  262. CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \
  263. CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \
  264. CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \
  265. CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix)
  266. template<typename _Tp> struct V_RegTraits
  267. {
  268. };
  269. #define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
  270. template<> struct V_RegTraits<_reg> \
  271. { \
  272. typedef _reg reg; \
  273. typedef _u_reg u_reg; \
  274. typedef _w_reg w_reg; \
  275. typedef _q_reg q_reg; \
  276. typedef _int_reg int_reg; \
  277. typedef _round_reg round_reg; \
  278. }
  279. #if CV_SIMD128 || CV_SIMD128_CPP
  280. CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
  281. CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
  282. CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
  283. CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
  284. CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
  285. CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
  286. #if CV_SIMD128_64F
  287. CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
  288. #else
  289. CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
  290. #endif
  291. CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
  292. CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
  293. #if CV_SIMD128_64F
  294. CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
  295. #endif
  296. #endif
  297. #if CV_SIMD256
  298. CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
  299. CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
  300. CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
  301. CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
  302. CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
  303. CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
  304. CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
  305. CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
  306. CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
  307. CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
  308. #endif
  309. #if CV_SIMD512
  310. CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
  311. CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
  312. CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
  313. CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
  314. CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
  315. CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
  316. CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
  317. CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
  318. CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
  319. CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
  320. #endif
  321. #if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
  322. #define CV__SIMD_NAMESPACE simd512
  323. namespace CV__SIMD_NAMESPACE {
  324. #define CV_SIMD 1
  325. #define CV_SIMD_64F CV_SIMD512_64F
  326. #define CV_SIMD_FP16 CV_SIMD512_FP16
  327. #define CV_SIMD_WIDTH 64
  328. typedef v_uint8x64 v_uint8;
  329. typedef v_int8x64 v_int8;
  330. typedef v_uint16x32 v_uint16;
  331. typedef v_int16x32 v_int16;
  332. typedef v_uint32x16 v_uint32;
  333. typedef v_int32x16 v_int32;
  334. typedef v_uint64x8 v_uint64;
  335. typedef v_int64x8 v_int64;
  336. typedef v_float32x16 v_float32;
  337. CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512)
  338. #if CV_SIMD512_64F
  339. typedef v_float64x8 v_float64;
  340. CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load)
  341. #endif
  342. inline void vx_cleanup() { v512_cleanup(); }
  343. } // namespace
  344. using namespace CV__SIMD_NAMESPACE;
  345. #elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
  346. #define CV__SIMD_NAMESPACE simd256
  347. namespace CV__SIMD_NAMESPACE {
  348. #define CV_SIMD 1
  349. #define CV_SIMD_64F CV_SIMD256_64F
  350. #define CV_SIMD_FP16 CV_SIMD256_FP16
  351. #define CV_SIMD_WIDTH 32
  352. typedef v_uint8x32 v_uint8;
  353. typedef v_int8x32 v_int8;
  354. typedef v_uint16x16 v_uint16;
  355. typedef v_int16x16 v_int16;
  356. typedef v_uint32x8 v_uint32;
  357. typedef v_int32x8 v_int32;
  358. typedef v_uint64x4 v_uint64;
  359. typedef v_int64x4 v_int64;
  360. typedef v_float32x8 v_float32;
  361. CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
  362. #if CV_SIMD256_64F
  363. typedef v_float64x4 v_float64;
  364. CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
  365. #endif
  366. inline void vx_cleanup() { v256_cleanup(); }
  367. } // namespace
  368. using namespace CV__SIMD_NAMESPACE;
  369. #elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
  370. #define CV__SIMD_NAMESPACE simd128
  371. namespace CV__SIMD_NAMESPACE {
  372. #define CV_SIMD CV_SIMD128
  373. #define CV_SIMD_64F CV_SIMD128_64F
  374. #define CV_SIMD_WIDTH 16
  375. typedef v_uint8x16 v_uint8;
  376. typedef v_int8x16 v_int8;
  377. typedef v_uint16x8 v_uint16;
  378. typedef v_int16x8 v_int16;
  379. typedef v_uint32x4 v_uint32;
  380. typedef v_int32x4 v_int32;
  381. typedef v_uint64x2 v_uint64;
  382. typedef v_int64x2 v_int64;
  383. typedef v_float32x4 v_float32;
  384. CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v)
  385. #if CV_SIMD128_64F
  386. typedef v_float64x2 v_float64;
  387. CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load)
  388. #endif
  389. inline void vx_cleanup() { v_cleanup(); }
  390. } // namespace
  391. using namespace CV__SIMD_NAMESPACE;
  392. #endif
  393. #ifndef CV_DOXYGEN
  394. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  395. #endif
  396. #ifndef CV_SIMD_64F
  397. #define CV_SIMD_64F 0
  398. #endif
  399. #ifndef CV_SIMD_FP16
  400. #define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
  401. #endif
  402. #ifndef CV_SIMD
  403. #define CV_SIMD 0
  404. #endif
  405. } // cv::
  406. //! @endcond
  407. #endif