/// @ref simd_neon /// @file glm/simd/neon.h #pragma once #if GLM_ARCH & GLM_ARCH_NEON_BIT #include namespace glm { namespace neon { static float32x4_t dupq_lane(float32x4_t vsrc, int lane) { switch(lane) { #if GLM_ARCH & GLM_ARCH_ARMV8_BIT case 0: return vdupq_laneq_f32(vsrc, 0); case 1: return vdupq_laneq_f32(vsrc, 1); case 2: return vdupq_laneq_f32(vsrc, 2); case 3: return vdupq_laneq_f32(vsrc, 3); #else case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0)); case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1)); case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2)); case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3)); #endif } assert(!"Unreachable code executed!"); return vdupq_n_f32(0.0f); } static float32x2_t dup_lane(float32x4_t vsrc, int lane) { switch(lane) { #if GLM_ARCH & GLM_ARCH_ARMV8_BIT case 0: return vdup_laneq_f32(vsrc, 0); case 1: return vdup_laneq_f32(vsrc, 1); case 2: return vdup_laneq_f32(vsrc, 2); case 3: return vdup_laneq_f32(vsrc, 3); #else case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0)); case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1)); case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2)); case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3)); #endif } assert(!"Unreachable code executed!"); return vdup_n_f32(0.0f); } static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) { #if GLM_ARCH & GLM_ARCH_ARMV8_BIT switch(dlane) { case 0: switch(slane) { case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0); case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1); case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2); case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3); } assert(!"Unreachable code executed!"); case 1: switch(slane) { case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0); case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1); case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2); case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3); } assert(!"Unreachable code executed!"); case 2: switch(slane) { case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0); case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1); case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2); case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3); } assert(!"Unreachable code executed!"); case 3: switch(slane) { case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0); case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1); case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2); case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3); } assert(!"Unreachable code executed!"); } #else float l; switch(slane) { case 0: l = vgetq_lane_f32(vsrc, 0); break; case 1: l = vgetq_lane_f32(vsrc, 1); break; case 2: l = vgetq_lane_f32(vsrc, 2); break; case 3: l = vgetq_lane_f32(vsrc, 3); break; default: assert(!"Unreachable code executed!"); } switch(dlane) { case 0: return vsetq_lane_f32(l, vdst, 0); case 1: return vsetq_lane_f32(l, vdst, 1); case 2: return vsetq_lane_f32(l, vdst, 2); case 3: return vsetq_lane_f32(l, vdst, 3); } #endif assert(!"Unreachable code executed!"); return vdupq_n_f32(0.0f); } static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) { #if GLM_ARCH & GLM_ARCH_ARMV8_BIT switch(lane) { case 0: return vmulq_laneq_f32(v, vlane, 0); break; case 1: return vmulq_laneq_f32(v, vlane, 1); break; case 2: return vmulq_laneq_f32(v, vlane, 2); break; case 3: return vmulq_laneq_f32(v, vlane, 3); break; default: assert(!"Unreachable code executed!"); } assert(!"Unreachable code executed!"); return vdupq_n_f32(0.0f); #else return vmulq_f32(v, dupq_lane(vlane, lane)); #endif } static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) { #if GLM_ARCH & GLM_ARCH_ARMV8_BIT #ifdef GLM_CONFIG_FORCE_FMA # define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0) #else # define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0) #endif switch(lane) { case 0: FMADD_LANE(acc, v, vlane, 0); return acc; case 1: FMADD_LANE(acc, v, vlane, 1); return acc; case 2: FMADD_LANE(acc, v, vlane, 2); return acc; case 3: FMADD_LANE(acc, v, vlane, 3); return acc; default: assert(!"Unreachable code executed!"); } assert(!"Unreachable code executed!"); return vdupq_n_f32(0.0f); # undef FMADD_LANE #else return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane))); #endif } } //namespace neon } // namespace glm #endif // GLM_ARCH & GLM_ARCH_NEON_BIT