9
3

func_common_simd.inl 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. /// @ref core
  2. /// @file glm/detail/func_common_simd.inl
  3. #if GLM_ARCH & GLM_ARCH_SSE2_BIT
  4. #include "../simd/common.h"
  5. #include <immintrin.h>
  6. namespace glm{
  7. namespace detail
  8. {
  9. template<qualifier Q>
  10. struct compute_abs_vector<4, float, Q, true>
  11. {
  12. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
  13. {
  14. vec<4, float, Q> result;
  15. result.data = glm_vec4_abs(v.data);
  16. return result;
  17. }
  18. };
  19. template<qualifier Q>
  20. struct compute_abs_vector<4, int, Q, true>
  21. {
  22. GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v)
  23. {
  24. vec<4, int, Q> result;
  25. result.data = glm_ivec4_abs(v.data);
  26. return result;
  27. }
  28. };
  29. template<qualifier Q>
  30. struct compute_floor<4, float, Q, true>
  31. {
  32. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
  33. {
  34. vec<4, float, Q> result;
  35. result.data = glm_vec4_floor(v.data);
  36. return result;
  37. }
  38. };
  39. template<qualifier Q>
  40. struct compute_ceil<4, float, Q, true>
  41. {
  42. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
  43. {
  44. vec<4, float, Q> result;
  45. result.data = glm_vec4_ceil(v.data);
  46. return result;
  47. }
  48. };
  49. template<qualifier Q>
  50. struct compute_fract<4, float, Q, true>
  51. {
  52. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
  53. {
  54. vec<4, float, Q> result;
  55. result.data = glm_vec4_fract(v.data);
  56. return result;
  57. }
  58. };
  59. template<qualifier Q>
  60. struct compute_round<4, float, Q, true>
  61. {
  62. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
  63. {
  64. vec<4, float, Q> result;
  65. result.data = glm_vec4_round(v.data);
  66. return result;
  67. }
  68. };
  69. template<qualifier Q>
  70. struct compute_mod<4, float, Q, true>
  71. {
  72. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
  73. {
  74. vec<4, float, Q> result;
  75. result.data = glm_vec4_mod(x.data, y.data);
  76. return result;
  77. }
  78. };
  79. template<qualifier Q>
  80. struct compute_min_vector<4, float, Q, true>
  81. {
  82. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
  83. {
  84. vec<4, float, Q> result;
  85. result.data = _mm_min_ps(v1.data, v2.data);
  86. return result;
  87. }
  88. };
  89. template<qualifier Q>
  90. struct compute_min_vector<4, int, Q, true>
  91. {
  92. GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
  93. {
  94. vec<4, int, Q> result;
  95. result.data = _mm_min_epi32(v1.data, v2.data);
  96. return result;
  97. }
  98. };
  99. template<qualifier Q>
  100. struct compute_min_vector<4, uint, Q, true>
  101. {
  102. GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
  103. {
  104. vec<4, uint, Q> result;
  105. result.data = _mm_min_epu32(v1.data, v2.data);
  106. return result;
  107. }
  108. };
  109. template<qualifier Q>
  110. struct compute_max_vector<4, float, Q, true>
  111. {
  112. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
  113. {
  114. vec<4, float, Q> result;
  115. result.data = _mm_max_ps(v1.data, v2.data);
  116. return result;
  117. }
  118. };
  119. template<qualifier Q>
  120. struct compute_max_vector<4, int, Q, true>
  121. {
  122. GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
  123. {
  124. vec<4, int, Q> result;
  125. result.data = _mm_max_epi32(v1.data, v2.data);
  126. return result;
  127. }
  128. };
  129. template<qualifier Q>
  130. struct compute_max_vector<4, uint, Q, true>
  131. {
  132. GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
  133. {
  134. vec<4, uint, Q> result;
  135. result.data = _mm_max_epu32(v1.data, v2.data);
  136. return result;
  137. }
  138. };
  139. template<qualifier Q>
  140. struct compute_clamp_vector<4, float, Q, true>
  141. {
  142. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& minVal, vec<4, float, Q> const& maxVal)
  143. {
  144. vec<4, float, Q> result;
  145. result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data);
  146. return result;
  147. }
  148. };
  149. template<qualifier Q>
  150. struct compute_clamp_vector<4, int, Q, true>
  151. {
  152. GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& x, vec<4, int, Q> const& minVal, vec<4, int, Q> const& maxVal)
  153. {
  154. vec<4, int, Q> result;
  155. result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data);
  156. return result;
  157. }
  158. };
  159. template<qualifier Q>
  160. struct compute_clamp_vector<4, uint, Q, true>
  161. {
  162. GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& x, vec<4, uint, Q> const& minVal, vec<4, uint, Q> const& maxVal)
  163. {
  164. vec<4, uint, Q> result;
  165. result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data);
  166. return result;
  167. }
  168. };
  169. template<qualifier Q>
  170. struct compute_mix_vector<4, float, bool, Q, true>
  171. {
  172. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y, vec<4, bool, Q> const& a)
  173. {
  174. __m128i const Load = _mm_set_epi32(-static_cast<int>(a.w), -static_cast<int>(a.z), -static_cast<int>(a.y), -static_cast<int>(a.x));
  175. __m128 const Mask = _mm_castsi128_ps(Load);
  176. vec<4, float, Q> Result;
  177. # if 0 && GLM_ARCH & GLM_ARCH_AVX
  178. Result.data = _mm_blendv_ps(x.data, y.data, Mask);
  179. # else
  180. Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data));
  181. # endif
  182. return Result;
  183. }
  184. };
  185. /* FIXME
  186. template<qualifier Q>
  187. struct compute_step_vector<float, Q, tvec4>
  188. {
  189. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& edge, vec<4, float, Q> const& x)
  190. {
  191. vec<4, float, Q> Result;
  192. result.data = glm_vec4_step(edge.data, x.data);
  193. return result;
  194. }
  195. };
  196. */
  197. template<qualifier Q>
  198. struct compute_smoothstep_vector<4, float, Q, true>
  199. {
  200. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& edge0, vec<4, float, Q> const& edge1, vec<4, float, Q> const& x)
  201. {
  202. vec<4, float, Q> Result;
  203. Result.data = glm_vec4_smoothstep(edge0.data, edge1.data, x.data);
  204. return Result;
  205. }
  206. };
  207. }//namespace detail
  208. }//namespace glm
  209. #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT