students_t_example3.cpp 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. // students_t_example3.cpp
  2. // Copyright Paul A. Bristow 2006, 2007.
  3. // Use, modification and distribution are subject to the
  4. // Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt
  6. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. // Example 3 of using Student's t.
  8. // A general guide to Student's t is at
  9. // http://en.wikipedia.org/wiki/Student's_t-test
  10. // (and many other elementary and advanced statistics texts).
  11. // It says:
  12. // The t statistic was invented by William Sealy Gosset
  13. // for cheaply monitoring the quality of beer brews.
  14. // "Student" was his pen name.
  15. // Gosset was statistician for Guinness brewery in Dublin, Ireland,
  16. // hired due to Claude Guinness's innovative policy of recruiting the
  17. // best graduates from Oxford and Cambridge for applying biochemistry
  18. // and statistics to Guinness's industrial processes.
  19. // Gosset published the t test in Biometrika in 1908,
  20. // but was forced to use a pen name by his employer who regarded the fact
  21. // that they were using statistics as a trade secret.
  22. // In fact, Gosset's identity was unknown not only to fellow statisticians
  23. // but to his employer - the company insisted on the pseudonym
  24. // so that it could turn a blind eye to the breach of its rules.
  25. // The Students't distribution function is described at
  26. // http://en.wikipedia.org/wiki/Student%27s_t_distribution
  27. #include <boost/math/distributions/students_t.hpp>
  28. using boost::math::students_t; // Probability of students_t(df, t).
  29. #include <iostream>
  30. using std::cout; using std::endl;
  31. #include <iomanip>
  32. using std::setprecision; using std::setw;
  33. #include <cmath>
  34. using std::sqrt;
  35. // This example of a two-sided test is from:
  36. // B. M. Smith & M. B. Griffiths, Analyst, 1982, 107, 253,
  37. // from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 58-59
  38. // J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907
  39. // Concentrations of lead (ug/l) determined by two different methods
  40. // for each of four test portions,
  41. // the concentration of each portion is significantly different,
  42. // the values may NOT be pooled.
  43. // (Called a 'paired test' by Miller and Miller
  44. // because each portion analysed has a different concentration.)
  45. // Portion Wet oxidation Direct Extraction
  46. // 1 71 76
  47. // 2 61 68
  48. // 3 50 48
  49. // 4 60 57
  50. const int portions = 4;
  51. const int methods = 2;
  52. float data [portions][methods] = {{71, 76}, {61,68}, {50, 48}, {60, 57}};
  53. float diffs[portions];
  54. int main()
  55. {
  56. cout << "Example3 using Student's t function. " << endl;
  57. float mean_diff = 0.f;
  58. cout << "\n""Portion wet_oxidation Direct_extraction difference" << endl;
  59. for (int portion = 0; portion < portions; portion++)
  60. { // Echo data and differences.
  61. diffs[portion] = data[portion][0] - data[portion][1];
  62. mean_diff += diffs[portion];
  63. cout << setw(4) << portion << ' ' << setw(14) << data[portion][0] << ' ' << setw(18)<< data[portion][1] << ' ' << setw(9) << diffs[portion] << endl;
  64. }
  65. mean_diff /= portions;
  66. cout << "Mean difference = " << mean_diff << endl; // -1.75
  67. float sd_diffs = 0.f;
  68. for (int portion = 0; portion < portions; portion++)
  69. { // Calculate standard deviation of differences.
  70. sd_diffs +=(diffs[portion] - mean_diff) * (diffs[portion] - mean_diff);
  71. }
  72. int degrees_of_freedom = portions-1; // Use the n-1 formula.
  73. sd_diffs /= degrees_of_freedom;
  74. sd_diffs = sqrt(sd_diffs);
  75. cout << "Standard deviation of differences = " << sd_diffs << endl; // 4.99166
  76. // Standard deviation of differences = 4.99166
  77. double t = mean_diff * sqrt(static_cast<double>(portions))/ sd_diffs; // -0.70117
  78. cout << "Student's t = " << t << ", if " << degrees_of_freedom << " degrees of freedom." << endl;
  79. // Student's t = -0.70117, if 3 degrees of freedom.
  80. cout << "Probability of the means being different is "
  81. << 2.F * cdf(students_t(degrees_of_freedom), t) << "."<< endl; // 0.266846 * 2 = 0.533692
  82. // Double the probability because using a 'two-sided test' because
  83. // mean for 'Wet oxidation' could be either
  84. // greater OR LESS THAN for 'Direct extraction'.
  85. return 0;
  86. } // int main()
  87. /*
  88. Output is:
  89. Example3 using Student's t function.
  90. Portion wet_oxidation Direct_extraction difference
  91. 0 71 76 -5
  92. 1 61 68 -7
  93. 2 50 48 2
  94. 3 60 57 3
  95. Mean difference = -1.75
  96. Standard deviation of differences = 4.99166
  97. Student's t = -0.70117, if 3 degrees of freedom.
  98. Probability of the means being different is 0.533692.
  99. */