students_t_single_sample.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. // Copyright John Maddock 2006
  2. // Copyright Paul A. Bristow 2007, 2010
  3. // Use, modification and distribution are subject to the
  4. // Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt
  6. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifdef _MSC_VER
  8. # pragma warning(disable: 4512) // assignment operator could not be generated.
  9. # pragma warning(disable: 4510) // default constructor could not be generated.
  10. # pragma warning(disable: 4610) // can never be instantiated - user defined constructor required.
  11. #endif
  12. #include <boost/math/distributions/students_t.hpp>
  13. // avoid "using namespace std;" and "using namespace boost::math;"
  14. // to avoid potential ambiguity with names in std random.
  15. #include <iostream>
  16. using std::cout; using std::endl;
  17. using std::left; using std::fixed; using std::right; using std::scientific;
  18. #include <iomanip>
  19. using std::setw;
  20. using std::setprecision;
  21. void confidence_limits_on_mean(double Sm, double Sd, unsigned Sn)
  22. {
  23. //
  24. // Sm = Sample Mean.
  25. // Sd = Sample Standard Deviation.
  26. // Sn = Sample Size.
  27. //
  28. // Calculate confidence intervals for the mean.
  29. // For example if we set the confidence limit to
  30. // 0.95, we know that if we repeat the sampling
  31. // 100 times, then we expect that the true mean
  32. // will be between out limits on 95 occations.
  33. // Note: this is not the same as saying a 95%
  34. // confidence interval means that there is a 95%
  35. // probability that the interval contains the true mean.
  36. // The interval computed from a given sample either
  37. // contains the true mean or it does not.
  38. // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
  39. using boost::math::students_t;
  40. // Print out general info:
  41. cout <<
  42. "__________________________________\n"
  43. "2-Sided Confidence Limits For Mean\n"
  44. "__________________________________\n\n";
  45. cout << setprecision(7);
  46. cout << setw(40) << left << "Number of Observations" << "= " << Sn << "\n";
  47. cout << setw(40) << left << "Mean" << "= " << Sm << "\n";
  48. cout << setw(40) << left << "Standard Deviation" << "= " << Sd << "\n";
  49. //
  50. // Define a table of significance/risk levels:
  51. //
  52. double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
  53. //
  54. // Start by declaring the distribution we'll need:
  55. //
  56. students_t dist(Sn - 1);
  57. //
  58. // Print table header:
  59. //
  60. cout << "\n\n"
  61. "_______________________________________________________________\n"
  62. "Confidence T Interval Lower Upper\n"
  63. " Value (%) Value Width Limit Limit\n"
  64. "_______________________________________________________________\n";
  65. //
  66. // Now print out the data for the table rows.
  67. //
  68. for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
  69. {
  70. // Confidence value:
  71. cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
  72. // calculate T:
  73. double T = quantile(complement(dist, alpha[i] / 2));
  74. // Print T:
  75. cout << fixed << setprecision(3) << setw(10) << right << T;
  76. // Calculate width of interval (one sided):
  77. double w = T * Sd / sqrt(double(Sn));
  78. // Print width:
  79. if(w < 0.01)
  80. cout << scientific << setprecision(3) << setw(17) << right << w;
  81. else
  82. cout << fixed << setprecision(3) << setw(17) << right << w;
  83. // Print Limits:
  84. cout << fixed << setprecision(5) << setw(15) << right << Sm - w;
  85. cout << fixed << setprecision(5) << setw(15) << right << Sm + w << endl;
  86. }
  87. cout << endl;
  88. } // void confidence_limits_on_mean
  89. void single_sample_t_test(double M, double Sm, double Sd, unsigned Sn, double alpha)
  90. {
  91. //
  92. // M = true mean.
  93. // Sm = Sample Mean.
  94. // Sd = Sample Standard Deviation.
  95. // Sn = Sample Size.
  96. // alpha = Significance Level.
  97. //
  98. // A Students t test applied to a single set of data.
  99. // We are testing the null hypothesis that the true
  100. // mean of the sample is M, and that any variation is down
  101. // to chance. We can also test the alternative hypothesis
  102. // that any difference is not down to chance.
  103. // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
  104. using boost::math::students_t;
  105. // Print header:
  106. cout <<
  107. "__________________________________\n"
  108. "Student t test for a single sample\n"
  109. "__________________________________\n\n";
  110. cout << setprecision(5);
  111. cout << setw(55) << left << "Number of Observations" << "= " << Sn << "\n";
  112. cout << setw(55) << left << "Sample Mean" << "= " << Sm << "\n";
  113. cout << setw(55) << left << "Sample Standard Deviation" << "= " << Sd << "\n";
  114. cout << setw(55) << left << "Expected True Mean" << "= " << M << "\n\n";
  115. //
  116. // Now we can calculate and output some stats:
  117. //
  118. // Difference in means:
  119. double diff = Sm - M;
  120. cout << setw(55) << left << "Sample Mean - Expected Test Mean" << "= " << diff << "\n";
  121. // Degrees of freedom:
  122. unsigned v = Sn - 1;
  123. cout << setw(55) << left << "Degrees of Freedom" << "= " << v << "\n";
  124. // t-statistic:
  125. double t_stat = diff * sqrt(double(Sn)) / Sd;
  126. cout << setw(55) << left << "T Statistic" << "= " << t_stat << "\n";
  127. //
  128. // Finally define our distribution, and get the probability:
  129. //
  130. students_t dist(v);
  131. double q = cdf(complement(dist, fabs(t_stat)));
  132. cout << setw(55) << left << "Probability that difference is due to chance" << "= "
  133. << setprecision(3) << scientific << 2 * q << "\n\n";
  134. //
  135. // Finally print out results of alternative hypothesis:
  136. //
  137. cout << setw(55) << left <<
  138. "Results for Alternative Hypothesis and alpha" << "= "
  139. << setprecision(4) << fixed << alpha << "\n\n";
  140. cout << "Alternative Hypothesis Conclusion\n";
  141. cout << "Mean != " << setprecision(3) << fixed << M << " ";
  142. if(q < alpha / 2)
  143. cout << "NOT REJECTED\n";
  144. else
  145. cout << "REJECTED\n";
  146. cout << "Mean < " << setprecision(3) << fixed << M << " ";
  147. if(cdf(complement(dist, t_stat)) > alpha)
  148. cout << "NOT REJECTED\n";
  149. else
  150. cout << "REJECTED\n";
  151. cout << "Mean > " << setprecision(3) << fixed << M << " ";
  152. if(cdf(dist, t_stat) > alpha)
  153. cout << "NOT REJECTED\n";
  154. else
  155. cout << "REJECTED\n";
  156. cout << endl << endl;
  157. } // void single_sample_t_test(
  158. void single_sample_find_df(double M, double Sm, double Sd)
  159. {
  160. //
  161. // M = true mean.
  162. // Sm = Sample Mean.
  163. // Sd = Sample Standard Deviation.
  164. //
  165. using boost::math::students_t;
  166. // Print out general info:
  167. cout <<
  168. "_____________________________________________________________\n"
  169. "Estimated sample sizes required for various confidence levels\n"
  170. "_____________________________________________________________\n\n";
  171. cout << setprecision(5);
  172. cout << setw(40) << left << "True Mean" << "= " << M << "\n";
  173. cout << setw(40) << left << "Sample Mean" << "= " << Sm << "\n";
  174. cout << setw(40) << left << "Sample Standard Deviation" << "= " << Sd << "\n";
  175. //
  176. // Define a table of significance intervals:
  177. //
  178. double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
  179. //
  180. // Print table header:
  181. //
  182. cout << "\n\n"
  183. "_______________________________________________________________\n"
  184. "Confidence Estimated Estimated\n"
  185. " Value (%) Sample Size Sample Size\n"
  186. " (one sided test) (two sided test)\n"
  187. "_______________________________________________________________\n";
  188. //
  189. // Now print out the data for the table rows.
  190. //
  191. for(unsigned i = 1; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
  192. {
  193. // Confidence value:
  194. cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
  195. // calculate df for single sided test:
  196. double df = students_t::find_degrees_of_freedom(
  197. fabs(M - Sm), alpha[i], alpha[i], Sd);
  198. // convert to sample size, always one more than the degrees of freedom:
  199. double size = ceil(df) + 1;
  200. // Print size:
  201. cout << fixed << setprecision(0) << setw(16) << right << size;
  202. // calculate df for two sided test:
  203. df = students_t::find_degrees_of_freedom(
  204. fabs(M - Sm), alpha[i]/2, alpha[i], Sd);
  205. // convert to sample size:
  206. size = ceil(df) + 1;
  207. // Print size:
  208. cout << fixed << setprecision(0) << setw(16) << right << size << endl;
  209. }
  210. cout << endl;
  211. } // void single_sample_find_df
  212. int main()
  213. {
  214. //
  215. // Run tests for Heat Flow Meter data
  216. // see http://www.itl.nist.gov/div898/handbook/eda/section4/eda428.htm
  217. // The data was collected while calibrating a heat flow meter
  218. // against a known value.
  219. //
  220. confidence_limits_on_mean(9.261460, 0.2278881e-01, 195);
  221. single_sample_t_test(5, 9.261460, 0.2278881e-01, 195, 0.05);
  222. single_sample_find_df(5, 9.261460, 0.2278881e-01);
  223. //
  224. // Data for this example from:
  225. // P.K.Hou, O. W. Lau & M.C. Wong, Analyst (1983) vol. 108, p 64.
  226. // from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 54-55
  227. // J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907
  228. //
  229. // Determination of mercury by cold-vapour atomic absorption,
  230. // the following values were obtained fusing a trusted
  231. // Standard Reference Material containing 38.9% mercury,
  232. // which we assume is correct or 'true'.
  233. //
  234. confidence_limits_on_mean(37.8, 0.964365, 3);
  235. // 95% test:
  236. single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.05);
  237. // 90% test:
  238. single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.1);
  239. // parameter estimate:
  240. single_sample_find_df(38.9, 37.8, 0.964365);
  241. return 0;
  242. } // int main()
  243. /*
  244. Output:
  245. ------ Rebuild All started: Project: students_t_single_sample, Configuration: Release Win32 ------
  246. students_t_single_sample.cpp
  247. Generating code
  248. Finished generating code
  249. students_t_single_sample.vcxproj -> J:\Cpp\MathToolkit\test\Math_test\Release\students_t_single_sample.exe
  250. __________________________________
  251. 2-Sided Confidence Limits For Mean
  252. __________________________________
  253. Number of Observations = 195
  254. Mean = 9.26146
  255. Standard Deviation = 0.02278881
  256. _______________________________________________________________
  257. Confidence T Interval Lower Upper
  258. Value (%) Value Width Limit Limit
  259. _______________________________________________________________
  260. 50.000 0.676 1.103e-003 9.26036 9.26256
  261. 75.000 1.154 1.883e-003 9.25958 9.26334
  262. 90.000 1.653 2.697e-003 9.25876 9.26416
  263. 95.000 1.972 3.219e-003 9.25824 9.26468
  264. 99.000 2.601 4.245e-003 9.25721 9.26571
  265. 99.900 3.341 5.453e-003 9.25601 9.26691
  266. 99.990 3.973 6.484e-003 9.25498 9.26794
  267. 99.999 4.537 7.404e-003 9.25406 9.26886
  268. __________________________________
  269. Student t test for a single sample
  270. __________________________________
  271. Number of Observations = 195
  272. Sample Mean = 9.26146
  273. Sample Standard Deviation = 0.02279
  274. Expected True Mean = 5.00000
  275. Sample Mean - Expected Test Mean = 4.26146
  276. Degrees of Freedom = 194
  277. T Statistic = 2611.28380
  278. Probability that difference is due to chance = 0.000e+000
  279. Results for Alternative Hypothesis and alpha = 0.0500
  280. Alternative Hypothesis Conclusion
  281. Mean != 5.000 NOT REJECTED
  282. Mean < 5.000 REJECTED
  283. Mean > 5.000 NOT REJECTED
  284. _____________________________________________________________
  285. Estimated sample sizes required for various confidence levels
  286. _____________________________________________________________
  287. True Mean = 5.00000
  288. Sample Mean = 9.26146
  289. Sample Standard Deviation = 0.02279
  290. _______________________________________________________________
  291. Confidence Estimated Estimated
  292. Value (%) Sample Size Sample Size
  293. (one sided test) (two sided test)
  294. _______________________________________________________________
  295. 75.000 2 2
  296. 90.000 2 2
  297. 95.000 2 2
  298. 99.000 2 2
  299. 99.900 3 3
  300. 99.990 3 3
  301. 99.999 4 4
  302. __________________________________
  303. 2-Sided Confidence Limits For Mean
  304. __________________________________
  305. Number of Observations = 3
  306. Mean = 37.8000000
  307. Standard Deviation = 0.9643650
  308. _______________________________________________________________
  309. Confidence T Interval Lower Upper
  310. Value (%) Value Width Limit Limit
  311. _______________________________________________________________
  312. 50.000 0.816 0.455 37.34539 38.25461
  313. 75.000 1.604 0.893 36.90717 38.69283
  314. 90.000 2.920 1.626 36.17422 39.42578
  315. 95.000 4.303 2.396 35.40438 40.19562
  316. 99.000 9.925 5.526 32.27408 43.32592
  317. 99.900 31.599 17.594 20.20639 55.39361
  318. 99.990 99.992 55.673 -17.87346 93.47346
  319. 99.999 316.225 176.067 -138.26683 213.86683
  320. __________________________________
  321. Student t test for a single sample
  322. __________________________________
  323. Number of Observations = 3
  324. Sample Mean = 37.80000
  325. Sample Standard Deviation = 0.96437
  326. Expected True Mean = 38.90000
  327. Sample Mean - Expected Test Mean = -1.10000
  328. Degrees of Freedom = 2
  329. T Statistic = -1.97566
  330. Probability that difference is due to chance = 1.869e-001
  331. Results for Alternative Hypothesis and alpha = 0.0500
  332. Alternative Hypothesis Conclusion
  333. Mean != 38.900 REJECTED
  334. Mean < 38.900 NOT REJECTED
  335. Mean > 38.900 NOT REJECTED
  336. __________________________________
  337. Student t test for a single sample
  338. __________________________________
  339. Number of Observations = 3
  340. Sample Mean = 37.80000
  341. Sample Standard Deviation = 0.96437
  342. Expected True Mean = 38.90000
  343. Sample Mean - Expected Test Mean = -1.10000
  344. Degrees of Freedom = 2
  345. T Statistic = -1.97566
  346. Probability that difference is due to chance = 1.869e-001
  347. Results for Alternative Hypothesis and alpha = 0.1000
  348. Alternative Hypothesis Conclusion
  349. Mean != 38.900 REJECTED
  350. Mean < 38.900 NOT REJECTED
  351. Mean > 38.900 REJECTED
  352. _____________________________________________________________
  353. Estimated sample sizes required for various confidence levels
  354. _____________________________________________________________
  355. True Mean = 38.90000
  356. Sample Mean = 37.80000
  357. Sample Standard Deviation = 0.96437
  358. _______________________________________________________________
  359. Confidence Estimated Estimated
  360. Value (%) Sample Size Sample Size
  361. (one sided test) (two sided test)
  362. _______________________________________________________________
  363. 75.000 3 4
  364. 90.000 7 9
  365. 95.000 11 13
  366. 99.000 20 22
  367. 99.900 35 37
  368. 99.990 50 53
  369. 99.999 66 68
  370. */