type_vec4_simd.inl 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. #if GLM_ARCH & GLM_ARCH_SSE2_BIT
  2. namespace glm{
  3. namespace detail
  4. {
  5. # if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
  6. template<qualifier Q, int E0, int E1, int E2, int E3>
  7. struct _swizzle_base1<4, float, Q, E0,E1,E2,E3, true> : public _swizzle_base0<float, 4>
  8. {
  9. GLM_FUNC_QUALIFIER vec<4, float, Q> operator ()() const
  10. {
  11. __m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer);
  12. vec<4, float, Q> Result;
  13. # if GLM_ARCH & GLM_ARCH_AVX_BIT
  14. Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0));
  15. # else
  16. Result.data = _mm_shuffle_ps(data, data, _MM_SHUFFLE(E3, E2, E1, E0));
  17. # endif
  18. return Result;
  19. }
  20. };
  21. template<qualifier Q, int E0, int E1, int E2, int E3>
  22. struct _swizzle_base1<4, int, Q, E0,E1,E2,E3, true> : public _swizzle_base0<int, 4>
  23. {
  24. GLM_FUNC_QUALIFIER vec<4, int, Q> operator ()() const
  25. {
  26. __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
  27. vec<4, int, Q> Result;
  28. Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
  29. return Result;
  30. }
  31. };
  32. template<qualifier Q, int E0, int E1, int E2, int E3>
  33. struct _swizzle_base1<4, uint, Q, E0,E1,E2,E3, true> : public _swizzle_base0<uint, 4>
  34. {
  35. GLM_FUNC_QUALIFIER vec<4, uint, Q> operator ()() const
  36. {
  37. __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
  38. vec<4, uint, Q> Result;
  39. Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
  40. return Result;
  41. }
  42. };
  43. # endif// GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
  44. template<qualifier Q>
  45. struct compute_vec4_add<float, Q, true>
  46. {
  47. static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  48. {
  49. vec<4, float, Q> Result;
  50. Result.data = _mm_add_ps(a.data, b.data);
  51. return Result;
  52. }
  53. };
  54. # if GLM_ARCH & GLM_ARCH_AVX_BIT
  55. template<qualifier Q>
  56. struct compute_vec4_add<double, Q, true>
  57. {
  58. static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
  59. {
  60. vec<4, double, Q> Result;
  61. Result.data = _mm256_add_pd(a.data, b.data);
  62. return Result;
  63. }
  64. };
  65. # endif
  66. template<qualifier Q>
  67. struct compute_vec4_sub<float, Q, true>
  68. {
  69. static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  70. {
  71. vec<4, float, Q> Result;
  72. Result.data = _mm_sub_ps(a.data, b.data);
  73. return Result;
  74. }
  75. };
  76. # if GLM_ARCH & GLM_ARCH_AVX_BIT
  77. template<qualifier Q>
  78. struct compute_vec4_sub<double, Q, true>
  79. {
  80. static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
  81. {
  82. vec<4, double, Q> Result;
  83. Result.data = _mm256_sub_pd(a.data, b.data);
  84. return Result;
  85. }
  86. };
  87. # endif
  88. template<qualifier Q>
  89. struct compute_vec4_mul<float, Q, true>
  90. {
  91. static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  92. {
  93. vec<4, float, Q> Result;
  94. Result.data = _mm_mul_ps(a.data, b.data);
  95. return Result;
  96. }
  97. };
  98. # if GLM_ARCH & GLM_ARCH_AVX_BIT
  99. template<qualifier Q>
  100. struct compute_vec4_mul<double, Q, true>
  101. {
  102. static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
  103. {
  104. vec<4, double, Q> Result;
  105. Result.data = _mm256_mul_pd(a.data, b.data);
  106. return Result;
  107. }
  108. };
  109. # endif
  110. template<qualifier Q>
  111. struct compute_vec4_div<float, Q, true>
  112. {
  113. static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  114. {
  115. vec<4, float, Q> Result;
  116. Result.data = _mm_div_ps(a.data, b.data);
  117. return Result;
  118. }
  119. };
  120. # if GLM_ARCH & GLM_ARCH_AVX_BIT
  121. template<qualifier Q>
  122. struct compute_vec4_div<double, Q, true>
  123. {
  124. static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
  125. {
  126. vec<4, double, Q> Result;
  127. Result.data = _mm256_div_pd(a.data, b.data);
  128. return Result;
  129. }
  130. };
  131. # endif
  132. template<>
  133. struct compute_vec4_div<float, aligned_lowp, true>
  134. {
  135. static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b)
  136. {
  137. vec<4, float, aligned_lowp> Result;
  138. Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data));
  139. return Result;
  140. }
  141. };
  142. template<typename T, qualifier Q>
  143. struct compute_vec4_and<T, Q, true, 32, true>
  144. {
  145. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  146. {
  147. vec<4, T, Q> Result;
  148. Result.data = _mm_and_si128(a.data, b.data);
  149. return Result;
  150. }
  151. };
  152. # if GLM_ARCH & GLM_ARCH_AVX2_BIT
  153. template<typename T, qualifier Q>
  154. struct compute_vec4_and<T, Q, true, 64, true>
  155. {
  156. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  157. {
  158. vec<4, T, Q> Result;
  159. Result.data = _mm256_and_si256(a.data, b.data);
  160. return Result;
  161. }
  162. };
  163. # endif
  164. template<typename T, qualifier Q>
  165. struct compute_vec4_or<T, Q, true, 32, true>
  166. {
  167. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  168. {
  169. vec<4, T, Q> Result;
  170. Result.data = _mm_or_si128(a.data, b.data);
  171. return Result;
  172. }
  173. };
  174. # if GLM_ARCH & GLM_ARCH_AVX2_BIT
  175. template<typename T, qualifier Q>
  176. struct compute_vec4_or<T, Q, true, 64, true>
  177. {
  178. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  179. {
  180. vec<4, T, Q> Result;
  181. Result.data = _mm256_or_si256(a.data, b.data);
  182. return Result;
  183. }
  184. };
  185. # endif
  186. template<typename T, qualifier Q>
  187. struct compute_vec4_xor<T, Q, true, 32, true>
  188. {
  189. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  190. {
  191. vec<4, T, Q> Result;
  192. Result.data = _mm_xor_si128(a.data, b.data);
  193. return Result;
  194. }
  195. };
  196. # if GLM_ARCH & GLM_ARCH_AVX2_BIT
  197. template<typename T, qualifier Q>
  198. struct compute_vec4_xor<T, Q, true, 64, true>
  199. {
  200. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  201. {
  202. vec<4, T, Q> Result;
  203. Result.data = _mm256_xor_si256(a.data, b.data);
  204. return Result;
  205. }
  206. };
  207. # endif
  208. template<typename T, qualifier Q>
  209. struct compute_vec4_shift_left<T, Q, true, 32, true>
  210. {
  211. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  212. {
  213. vec<4, T, Q> Result;
  214. Result.data = _mm_sll_epi32(a.data, b.data);
  215. return Result;
  216. }
  217. };
  218. # if GLM_ARCH & GLM_ARCH_AVX2_BIT
  219. template<typename T, qualifier Q>
  220. struct compute_vec4_shift_left<T, Q, true, 64, true>
  221. {
  222. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  223. {
  224. vec<4, T, Q> Result;
  225. Result.data = _mm256_sll_epi64(a.data, b.data);
  226. return Result;
  227. }
  228. };
  229. # endif
  230. template<typename T, qualifier Q>
  231. struct compute_vec4_shift_right<T, Q, true, 32, true>
  232. {
  233. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  234. {
  235. vec<4, T, Q> Result;
  236. Result.data = _mm_srl_epi32(a.data, b.data);
  237. return Result;
  238. }
  239. };
  240. # if GLM_ARCH & GLM_ARCH_AVX2_BIT
  241. template<typename T, qualifier Q>
  242. struct compute_vec4_shift_right<T, Q, true, 64, true>
  243. {
  244. static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
  245. {
  246. vec<4, T, Q> Result;
  247. Result.data = _mm256_srl_epi64(a.data, b.data);
  248. return Result;
  249. }
  250. };
  251. # endif
  252. template<typename T, qualifier Q>
  253. struct compute_vec4_bitwise_not<T, Q, true, 32, true>
  254. {
  255. static vec<4, T, Q> call(vec<4, T, Q> const& v)
  256. {
  257. vec<4, T, Q> Result;
  258. Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
  259. return Result;
  260. }
  261. };
  262. # if GLM_ARCH & GLM_ARCH_AVX2_BIT
  263. template<typename T, qualifier Q>
  264. struct compute_vec4_bitwise_not<T, Q, true, 64, true>
  265. {
  266. static vec<4, T, Q> call(vec<4, T, Q> const& v)
  267. {
  268. vec<4, T, Q> Result;
  269. Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1));
  270. return Result;
  271. }
  272. };
  273. # endif
  274. template<qualifier Q>
  275. struct compute_vec4_equal<float, Q, false, 32, true>
  276. {
  277. static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
  278. {
  279. return _mm_movemask_ps(_mm_cmpeq_ps(v1.data, v2.data)) != 0;
  280. }
  281. };
  282. # if GLM_ARCH & GLM_ARCH_SSE41_BIT
  283. template<qualifier Q>
  284. struct compute_vec4_equal<int, Q, true, 32, true>
  285. {
  286. static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
  287. {
  288. //return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
  289. __m128i neq = _mm_xor_si128(v1.data, v2.data);
  290. return _mm_test_all_zeros(neq, neq) == 0;
  291. }
  292. };
  293. # endif
  294. template<qualifier Q>
  295. struct compute_vec4_nequal<float, Q, false, 32, true>
  296. {
  297. static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
  298. {
  299. return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
  300. }
  301. };
  302. # if GLM_ARCH & GLM_ARCH_SSE41_BIT
  303. template<qualifier Q>
  304. struct compute_vec4_nequal<int, Q, true, 32, true>
  305. {
  306. static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
  307. {
  308. //return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
  309. __m128i neq = _mm_xor_si128(v1.data, v2.data);
  310. return _mm_test_all_zeros(neq, neq) != 0;
  311. }
  312. };
  313. # endif
  314. }//namespace detail
  315. template<>
  316. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(float _s) :
  317. data(_mm_set1_ps(_s))
  318. {}
  319. template<>
  320. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(float _s) :
  321. data(_mm_set1_ps(_s))
  322. {}
  323. template<>
  324. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(float _s) :
  325. data(_mm_set1_ps(_s))
  326. {}
  327. # if GLM_ARCH & GLM_ARCH_AVX_BIT
  328. template<>
  329. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_lowp>::vec(double _s) :
  330. data(_mm256_set1_pd(_s))
  331. {}
  332. template<>
  333. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_mediump>::vec(double _s) :
  334. data(_mm256_set1_pd(_s))
  335. {}
  336. template<>
  337. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_highp>::vec(double _s) :
  338. data(_mm256_set1_pd(_s))
  339. {}
  340. # endif
  341. template<>
  342. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _s) :
  343. data(_mm_set1_epi32(_s))
  344. {}
  345. template<>
  346. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _s) :
  347. data(_mm_set1_epi32(_s))
  348. {}
  349. template<>
  350. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _s) :
  351. data(_mm_set1_epi32(_s))
  352. {}
  353. # if GLM_ARCH & GLM_ARCH_AVX2_BIT
  354. template<>
  355. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_lowp>::vec(detail::int64 _s) :
  356. data(_mm256_set1_epi64x(_s))
  357. {}
  358. template<>
  359. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_mediump>::vec(detail::int64 _s) :
  360. data(_mm256_set1_epi64x(_s))
  361. {}
  362. template<>
  363. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_highp>::vec(detail::int64 _s) :
  364. data(_mm256_set1_epi64x(_s))
  365. {}
  366. # endif
  367. template<>
  368. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(float _x, float _y, float _z, float _w) :
  369. data(_mm_set_ps(_w, _z, _y, _x))
  370. {}
  371. template<>
  372. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(float _x, float _y, float _z, float _w) :
  373. data(_mm_set_ps(_w, _z, _y, _x))
  374. {}
  375. template<>
  376. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(float _x, float _y, float _z, float _w) :
  377. data(_mm_set_ps(_w, _z, _y, _x))
  378. {}
  379. template<>
  380. template<>
  381. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
  382. data(_mm_set_epi32(_w, _z, _y, _x))
  383. {}
  384. template<>
  385. template<>
  386. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
  387. data(_mm_set_epi32(_w, _z, _y, _x))
  388. {}
  389. template<>
  390. template<>
  391. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
  392. data(_mm_set_epi32(_w, _z, _y, _x))
  393. {}
  394. template<>
  395. template<>
  396. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
  397. data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
  398. {}
  399. template<>
  400. template<>
  401. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
  402. data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
  403. {}
  404. template<>
  405. template<>
  406. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
  407. data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
  408. {}
  409. }//namespace glm
  410. #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
  411. #if GLM_ARCH & GLM_ARCH_NEON_BIT
  412. namespace glm {
  413. namespace detail {
  414. template<qualifier Q>
  415. struct compute_vec4_add<float, Q, true>
  416. {
  417. static
  418. vec<4, float, Q>
  419. call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  420. {
  421. vec<4, float, Q> Result;
  422. Result.data = vaddq_f32(a.data, b.data);
  423. return Result;
  424. }
  425. };
  426. template<qualifier Q>
  427. struct compute_vec4_add<uint, Q, true>
  428. {
  429. static
  430. vec<4, uint, Q>
  431. call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
  432. {
  433. vec<4, uint, Q> Result;
  434. Result.data = vaddq_u32(a.data, b.data);
  435. return Result;
  436. }
  437. };
  438. template<qualifier Q>
  439. struct compute_vec4_add<int, Q, true>
  440. {
  441. static
  442. vec<4, int, Q>
  443. call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
  444. {
  445. vec<4, uint, Q> Result;
  446. Result.data = vaddq_s32(a.data, b.data);
  447. return Result;
  448. }
  449. };
  450. template<qualifier Q>
  451. struct compute_vec4_sub<float, Q, true>
  452. {
  453. static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  454. {
  455. vec<4, float, Q> Result;
  456. Result.data = vsubq_f32(a.data, b.data);
  457. return Result;
  458. }
  459. };
  460. template<qualifier Q>
  461. struct compute_vec4_sub<uint, Q, true>
  462. {
  463. static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
  464. {
  465. vec<4, uint, Q> Result;
  466. Result.data = vsubq_u32(a.data, b.data);
  467. return Result;
  468. }
  469. };
  470. template<qualifier Q>
  471. struct compute_vec4_sub<int, Q, true>
  472. {
  473. static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
  474. {
  475. vec<4, int, Q> Result;
  476. Result.data = vsubq_s32(a.data, b.data);
  477. return Result;
  478. }
  479. };
  480. template<qualifier Q>
  481. struct compute_vec4_mul<float, Q, true>
  482. {
  483. static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  484. {
  485. vec<4, float, Q> Result;
  486. Result.data = vmulq_f32(a.data, b.data);
  487. return Result;
  488. }
  489. };
  490. template<qualifier Q>
  491. struct compute_vec4_mul<uint, Q, true>
  492. {
  493. static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
  494. {
  495. vec<4, uint, Q> Result;
  496. Result.data = vmulq_u32(a.data, b.data);
  497. return Result;
  498. }
  499. };
  500. template<qualifier Q>
  501. struct compute_vec4_mul<int, Q, true>
  502. {
  503. static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
  504. {
  505. vec<4, int, Q> Result;
  506. Result.data = vmulq_s32(a.data, b.data);
  507. return Result;
  508. }
  509. };
  510. template<qualifier Q>
  511. struct compute_vec4_div<float, Q, true>
  512. {
  513. static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
  514. {
  515. vec<4, float, Q> Result;
  516. Result.data = vdivq_f32(a.data, b.data);
  517. return Result;
  518. }
  519. };
  520. template<qualifier Q>
  521. struct compute_vec4_equal<float, Q, false, 32, true>
  522. {
  523. static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
  524. {
  525. uint32x4_t cmp = vceqq_f32(v1.data, v2.data);
  526. #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
  527. cmp = vpminq_u32(cmp, cmp);
  528. cmp = vpminq_u32(cmp, cmp);
  529. uint32_t r = cmp[0];
  530. #else
  531. uint32x2_t cmpx2 = vpmin_u32(vget_low_f32(cmp), vget_high_f32(cmp));
  532. cmpx2 = vpmin_u32(cmpx2, cmpx2);
  533. uint32_t r = cmpx2[0];
  534. #endif
  535. return r == ~0u;
  536. }
  537. };
  538. template<qualifier Q>
  539. struct compute_vec4_equal<uint, Q, false, 32, true>
  540. {
  541. static bool call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
  542. {
  543. uint32x4_t cmp = vceqq_u32(v1.data, v2.data);
  544. #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
  545. cmp = vpminq_u32(cmp, cmp);
  546. cmp = vpminq_u32(cmp, cmp);
  547. uint32_t r = cmp[0];
  548. #else
  549. uint32x2_t cmpx2 = vpmin_u32(vget_low_f32(cmp), vget_high_f32(cmp));
  550. cmpx2 = vpmin_u32(cmpx2, cmpx2);
  551. uint32_t r = cmpx2[0];
  552. #endif
  553. return r == ~0u;
  554. }
  555. };
  556. template<qualifier Q>
  557. struct compute_vec4_equal<int, Q, false, 32, true>
  558. {
  559. static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
  560. {
  561. uint32x4_t cmp = vceqq_s32(v1.data, v2.data);
  562. #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
  563. cmp = vpminq_u32(cmp, cmp);
  564. cmp = vpminq_u32(cmp, cmp);
  565. uint32_t r = cmp[0];
  566. #else
  567. uint32x2_t cmpx2 = vpmin_u32(vget_low_f32(cmp), vget_high_f32(cmp));
  568. cmpx2 = vpmin_u32(cmpx2, cmpx2);
  569. uint32_t r = cmpx2[0];
  570. #endif
  571. return r == ~0u;
  572. }
  573. };
  574. template<qualifier Q>
  575. struct compute_vec4_nequal<float, Q, false, 32, true>
  576. {
  577. static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
  578. {
  579. return !compute_vec4_equal<float, Q, false, 32, true>::call(v1, v2);
  580. }
  581. };
  582. template<qualifier Q>
  583. struct compute_vec4_nequal<uint, Q, false, 32, true>
  584. {
  585. static bool call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
  586. {
  587. return !compute_vec4_equal<uint, Q, false, 32, true>::call(v1, v2);
  588. }
  589. };
  590. template<qualifier Q>
  591. struct compute_vec4_nequal<int, Q, false, 32, true>
  592. {
  593. static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
  594. {
  595. return !compute_vec4_equal<int, Q, false, 32, true>::call(v1, v2);
  596. }
  597. };
  598. }//namespace detail
  599. #if !GLM_CONFIG_XYZW_ONLY
  600. template<>
  601. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(float _s) :
  602. data(vdupq_n_f32(_s))
  603. {}
  604. template<>
  605. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(float _s) :
  606. data(vdupq_n_f32(_s))
  607. {}
  608. template<>
  609. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(float _s) :
  610. data(vdupq_n_f32(_s))
  611. {}
  612. template<>
  613. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _s) :
  614. data(vdupq_n_s32(_s))
  615. {}
  616. template<>
  617. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _s) :
  618. data(vdupq_n_s32(_s))
  619. {}
  620. template<>
  621. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _s) :
  622. data(vdupq_n_s32(_s))
  623. {}
  624. template<>
  625. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, uint, aligned_lowp>::vec(uint _s) :
  626. data(vdupq_n_u32(_s))
  627. {}
  628. template<>
  629. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, uint, aligned_mediump>::vec(uint _s) :
  630. data(vdupq_n_u32(_s))
  631. {}
  632. template<>
  633. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, uint, aligned_highp>::vec(uint _s) :
  634. data(vdupq_n_u32(_s))
  635. {}
  636. template<>
  637. template<>
  638. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, float, aligned_highp>& rhs) :
  639. data(rhs.data)
  640. {}
  641. template<>
  642. template<>
  643. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, int, aligned_highp>& rhs) :
  644. data(vcvtq_f32_s32(rhs.data))
  645. {}
  646. template<>
  647. template<>
  648. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, uint, aligned_highp>& rhs) :
  649. data(vcvtq_f32_u32(rhs.data))
  650. {}
  651. template<>
  652. template<>
  653. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
  654. data(vcvtq_f32_s32(vec<4, int, aligned_lowp>(_x, _y, _z, _w).data))
  655. {}
  656. template<>
  657. template<>
  658. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
  659. data(vcvtq_f32_s32(vec<4, int, aligned_mediump>(_x, _y, _z, _w).data))
  660. {}
  661. template<>
  662. template<>
  663. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
  664. data(vcvtq_f32_s32(vec<4, int, aligned_highp>(_x, _y, _z, _w).data))
  665. {}
  666. template<>
  667. template<>
  668. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(uint _x, uint _y, uint _z, uint _w) :
  669. data(vcvtq_f32_u32(vec<4, uint, aligned_lowp>(_x, _y, _z, _w).data))
  670. {}
  671. template<>
  672. template<>
  673. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(uint _x, uint _y, uint _z, uint _w) :
  674. data(vcvtq_f32_u32(vec<4, uint, aligned_mediump>(_x, _y, _z, _w).data))
  675. {}
  676. template<>
  677. template<>
  678. GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(uint _x, uint _y, uint _z, uint _w) :
  679. data(vcvtq_f32_u32(vec<4, uint, aligned_highp>(_x, _y, _z, _w).data))
  680. {}
  681. #endif
  682. }//namespace glm
  683. #endif