type_half.inl 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. namespace glm{
  2. namespace detail
  3. {
  4. GLM_FUNC_QUALIFIER float overflow()
  5. {
  6. volatile float f = 1e10;
  7. for(int i = 0; i < 10; ++i)
  8. f *= f; // this will overflow before the for loop terminates
  9. return f;
  10. }
  11. union uif32
  12. {
  13. GLM_FUNC_QUALIFIER uif32() :
  14. i(0)
  15. {}
  16. GLM_FUNC_QUALIFIER uif32(float f_) :
  17. f(f_)
  18. {}
  19. GLM_FUNC_QUALIFIER uif32(unsigned int i_) :
  20. i(i_)
  21. {}
  22. float f;
  23. unsigned int i;
  24. };
  25. GLM_FUNC_QUALIFIER float toFloat32(hdata value)
  26. {
  27. int s = (value >> 15) & 0x00000001;
  28. int e = (value >> 10) & 0x0000001f;
  29. int m = value & 0x000003ff;
  30. if(e == 0)
  31. {
  32. if(m == 0)
  33. {
  34. //
  35. // Plus or minus zero
  36. //
  37. detail::uif32 result;
  38. result.i = static_cast<unsigned int>(s << 31);
  39. return result.f;
  40. }
  41. else
  42. {
  43. //
  44. // Denormalized number -- renormalize it
  45. //
  46. while(!(m & 0x00000400))
  47. {
  48. m <<= 1;
  49. e -= 1;
  50. }
  51. e += 1;
  52. m &= ~0x00000400;
  53. }
  54. }
  55. else if(e == 31)
  56. {
  57. if(m == 0)
  58. {
  59. //
  60. // Positive or negative infinity
  61. //
  62. uif32 result;
  63. result.i = static_cast<unsigned int>((s << 31) | 0x7f800000);
  64. return result.f;
  65. }
  66. else
  67. {
  68. //
  69. // Nan -- preserve sign and significand bits
  70. //
  71. uif32 result;
  72. result.i = static_cast<unsigned int>((s << 31) | 0x7f800000 | (m << 13));
  73. return result.f;
  74. }
  75. }
  76. //
  77. // Normalized number
  78. //
  79. e = e + (127 - 15);
  80. m = m << 13;
  81. //
  82. // Assemble s, e and m.
  83. //
  84. uif32 Result;
  85. Result.i = static_cast<unsigned int>((s << 31) | (e << 23) | m);
  86. return Result.f;
  87. }
  88. GLM_FUNC_QUALIFIER hdata toFloat16(float const& f)
  89. {
  90. uif32 Entry;
  91. Entry.f = f;
  92. int i = static_cast<int>(Entry.i);
  93. //
  94. // Our floating point number, f, is represented by the bit
  95. // pattern in integer i. Disassemble that bit pattern into
  96. // the sign, s, the exponent, e, and the significand, m.
  97. // Shift s into the position where it will go in the
  98. // resulting half number.
  99. // Adjust e, accounting for the different exponent bias
  100. // of float and half (127 versus 15).
  101. //
  102. int s = (i >> 16) & 0x00008000;
  103. int e = ((i >> 23) & 0x000000ff) - (127 - 15);
  104. int m = i & 0x007fffff;
  105. //
  106. // Now reassemble s, e and m into a half:
  107. //
  108. if(e <= 0)
  109. {
  110. if(e < -10)
  111. {
  112. //
  113. // E is less than -10. The absolute value of f is
  114. // less than half_MIN (f may be a small normalized
  115. // float, a denormalized float or a zero).
  116. //
  117. // We convert f to a half zero.
  118. //
  119. return hdata(s);
  120. }
  121. //
  122. // E is between -10 and 0. F is a normalized float,
  123. // whose magnitude is less than __half_NRM_MIN.
  124. //
  125. // We convert f to a denormalized half.
  126. //
  127. m = (m | 0x00800000) >> (1 - e);
  128. //
  129. // Round to nearest, round "0.5" up.
  130. //
  131. // Rounding may cause the significand to overflow and make
  132. // our number normalized. Because of the way a half's bits
  133. // are laid out, we don't have to treat this case separately;
  134. // the code below will handle it correctly.
  135. //
  136. if(m & 0x00001000)
  137. m += 0x00002000;
  138. //
  139. // Assemble the half from s, e (zero) and m.
  140. //
  141. return hdata(s | (m >> 13));
  142. }
  143. else if(e == 0xff - (127 - 15))
  144. {
  145. if(m == 0)
  146. {
  147. //
  148. // F is an infinity; convert f to a half
  149. // infinity with the same sign as f.
  150. //
  151. return hdata(s | 0x7c00);
  152. }
  153. else
  154. {
  155. //
  156. // F is a NAN; we produce a half NAN that preserves
  157. // the sign bit and the 10 leftmost bits of the
  158. // significand of f, with one exception: If the 10
  159. // leftmost bits are all zero, the NAN would turn
  160. // into an infinity, so we have to set at least one
  161. // bit in the significand.
  162. //
  163. m >>= 13;
  164. return hdata(s | 0x7c00 | m | (m == 0));
  165. }
  166. }
  167. else
  168. {
  169. //
  170. // E is greater than zero. F is a normalized float.
  171. // We try to convert f to a normalized half.
  172. //
  173. //
  174. // Round to nearest, round "0.5" up
  175. //
  176. if(m & 0x00001000)
  177. {
  178. m += 0x00002000;
  179. if(m & 0x00800000)
  180. {
  181. m = 0; // overflow in significand,
  182. e += 1; // adjust exponent
  183. }
  184. }
  185. //
  186. // Handle exponent overflow
  187. //
  188. if (e > 30)
  189. {
  190. overflow(); // Cause a hardware floating point overflow;
  191. return hdata(s | 0x7c00);
  192. // if this returns, the half becomes an
  193. } // infinity with the same sign as f.
  194. //
  195. // Assemble the half from s, e and m.
  196. //
  197. return hdata(s | (e << 10) | (m >> 13));
  198. }
  199. }
  200. }//namespace detail
  201. }//namespace glm