binomial_quiz_example.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. // Copyright Paul A. Bristow 2007, 2009, 2010
  2. // Copyright John Maddock 2006
  3. // Use, modification and distribution are subject to the
  4. // Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt
  6. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. // binomial_examples_quiz.cpp
  8. // Simple example of computing probabilities and quantiles for a binomial random variable
  9. // representing the correct guesses on a multiple-choice test.
  10. // source http://www.stat.wvu.edu/SRS/Modules/Binomial/test.html
  11. //[binomial_quiz_example1
  12. /*`
  13. A multiple choice test has four possible answers to each of 16 questions.
  14. A student guesses the answer to each question,
  15. so the probability of getting a correct answer on any given question is
  16. one in four, a quarter, 1/4, 25% or fraction 0.25.
  17. The conditions of the binomial experiment are assumed to be met:
  18. n = 16 questions constitute the trials;
  19. each question results in one of two possible outcomes (correct or incorrect);
  20. the probability of being correct is 0.25 and is constant if no knowledge about the subject is assumed;
  21. the questions are answered independently if the student's answer to a question
  22. in no way influences his/her answer to another question.
  23. First, we need to be able to use the binomial distribution constructor
  24. (and some std input/output, of course).
  25. */
  26. #include <boost/math/distributions/binomial.hpp>
  27. using boost::math::binomial;
  28. #include <iostream>
  29. using std::cout; using std::endl;
  30. using std::ios; using std::flush; using std::left; using std::right; using std::fixed;
  31. #include <iomanip>
  32. using std::setw; using std::setprecision;
  33. #include <exception>
  34. //][/binomial_quiz_example1]
  35. int main()
  36. {
  37. try
  38. {
  39. cout << "Binomial distribution example - guessing in a quiz." << endl;
  40. //[binomial_quiz_example2
  41. /*`
  42. The number of correct answers, X, is distributed as a binomial random variable
  43. with binomial distribution parameters: questions n and success fraction probability p.
  44. So we construct a binomial distribution:
  45. */
  46. int questions = 16; // All the questions in the quiz.
  47. int answers = 4; // Possible answers to each question.
  48. double success_fraction = 1. / answers; // If a random guess, p = 1/4 = 0.25.
  49. binomial quiz(questions, success_fraction);
  50. /*`
  51. and display the distribution parameters we used thus:
  52. */
  53. cout << "In a quiz with " << quiz.trials()
  54. << " questions and with a probability of guessing right of "
  55. << quiz.success_fraction() * 100 << " %"
  56. << " or 1 in " << static_cast<int>(1. / quiz.success_fraction()) << endl;
  57. /*`
  58. Show a few probabilities of just guessing:
  59. */
  60. cout << "Probability of getting none right is " << pdf(quiz, 0) << endl; // 0.010023
  61. cout << "Probability of getting exactly one right is " << pdf(quiz, 1) << endl;
  62. cout << "Probability of getting exactly two right is " << pdf(quiz, 2) << endl;
  63. int pass_score = 11;
  64. cout << "Probability of getting exactly " << pass_score << " answers right by chance is "
  65. << pdf(quiz, pass_score) << endl;
  66. cout << "Probability of getting all " << questions << " answers right by chance is "
  67. << pdf(quiz, questions) << endl;
  68. /*`
  69. [pre
  70. Probability of getting none right is 0.0100226
  71. Probability of getting exactly one right is 0.0534538
  72. Probability of getting exactly two right is 0.133635
  73. Probability of getting exactly 11 right is 0.000247132
  74. Probability of getting exactly all 16 answers right by chance is 2.32831e-010
  75. ]
  76. These don't give any encouragement to guessers!
  77. We can tabulate the 'getting exactly right' ( == ) probabilities thus:
  78. */
  79. cout << "\n" "Guessed Probability" << right << endl;
  80. for (int successes = 0; successes <= questions; successes++)
  81. {
  82. double probability = pdf(quiz, successes);
  83. cout << setw(2) << successes << " " << probability << endl;
  84. }
  85. cout << endl;
  86. /*`
  87. [pre
  88. Guessed Probability
  89. 0 0.0100226
  90. 1 0.0534538
  91. 2 0.133635
  92. 3 0.207876
  93. 4 0.225199
  94. 5 0.180159
  95. 6 0.110097
  96. 7 0.0524273
  97. 8 0.0196602
  98. 9 0.00582526
  99. 10 0.00135923
  100. 11 0.000247132
  101. 12 3.43239e-005
  102. 13 3.5204e-006
  103. 14 2.51457e-007
  104. 15 1.11759e-008
  105. 16 2.32831e-010
  106. ]
  107. Then we can add the probabilities of some 'exactly right' like this:
  108. */
  109. cout << "Probability of getting none or one right is " << pdf(quiz, 0) + pdf(quiz, 1) << endl;
  110. /*`
  111. [pre
  112. Probability of getting none or one right is 0.0634764
  113. ]
  114. But if more than a couple of scores are involved, it is more convenient (and may be more accurate)
  115. to use the Cumulative Distribution Function (cdf) instead:
  116. */
  117. cout << "Probability of getting none or one right is " << cdf(quiz, 1) << endl;
  118. /*`
  119. [pre
  120. Probability of getting none or one right is 0.0634764
  121. ]
  122. Since the cdf is inclusive, we can get the probability of getting up to 10 right ( <= )
  123. */
  124. cout << "Probability of getting <= 10 right (to fail) is " << cdf(quiz, 10) << endl;
  125. /*`
  126. [pre
  127. Probability of getting <= 10 right (to fail) is 0.999715
  128. ]
  129. To get the probability of getting 11 or more right (to pass),
  130. it is tempting to use ``1 - cdf(quiz, 10)`` to get the probability of > 10
  131. */
  132. cout << "Probability of getting > 10 right (to pass) is " << 1 - cdf(quiz, 10) << endl;
  133. /*`
  134. [pre
  135. Probability of getting > 10 right (to pass) is 0.000285239
  136. ]
  137. But this should be resisted in favor of using the __complements function (see __why_complements).
  138. */
  139. cout << "Probability of getting > 10 right (to pass) is " << cdf(complement(quiz, 10)) << endl;
  140. /*`
  141. [pre
  142. Probability of getting > 10 right (to pass) is 0.000285239
  143. ]
  144. And we can check that these two, <= 10 and > 10, add up to unity.
  145. */
  146. BOOST_ASSERT((cdf(quiz, 10) + cdf(complement(quiz, 10))) == 1.);
  147. /*`
  148. If we want a < rather than a <= test, because the CDF is inclusive, we must subtract one from the score.
  149. */
  150. cout << "Probability of getting less than " << pass_score
  151. << " (< " << pass_score << ") answers right by guessing is "
  152. << cdf(quiz, pass_score -1) << endl;
  153. /*`
  154. [pre
  155. Probability of getting less than 11 (< 11) answers right by guessing is 0.999715
  156. ]
  157. and similarly to get a >= rather than a > test
  158. we also need to subtract one from the score (and can again check the sum is unity).
  159. This is because if the cdf is /inclusive/,
  160. then its complement must be /exclusive/ otherwise there would be one possible
  161. outcome counted twice!
  162. */
  163. cout << "Probability of getting at least " << pass_score
  164. << "(>= " << pass_score << ") answers right by guessing is "
  165. << cdf(complement(quiz, pass_score-1))
  166. << ", only 1 in " << 1/cdf(complement(quiz, pass_score-1)) << endl;
  167. BOOST_ASSERT((cdf(quiz, pass_score -1) + cdf(complement(quiz, pass_score-1))) == 1);
  168. /*`
  169. [pre
  170. Probability of getting at least 11 (>= 11) answers right by guessing is 0.000285239, only 1 in 3505.83
  171. ]
  172. Finally we can tabulate some probabilities:
  173. */
  174. cout << "\n" "At most (<=)""\n""Guessed OK Probability" << right << endl;
  175. for (int score = 0; score <= questions; score++)
  176. {
  177. cout << setw(2) << score << " " << setprecision(10)
  178. << cdf(quiz, score) << endl;
  179. }
  180. cout << endl;
  181. /*`
  182. [pre
  183. At most (<=)
  184. Guessed OK Probability
  185. 0 0.01002259576
  186. 1 0.0634764398
  187. 2 0.1971110499
  188. 3 0.4049871101
  189. 4 0.6301861752
  190. 5 0.8103454274
  191. 6 0.9204427481
  192. 7 0.9728700437
  193. 8 0.9925302796
  194. 9 0.9983555346
  195. 10 0.9997147608
  196. 11 0.9999618928
  197. 12 0.9999962167
  198. 13 0.9999997371
  199. 14 0.9999999886
  200. 15 0.9999999998
  201. 16 1
  202. ]
  203. */
  204. cout << "\n" "At least (>)""\n""Guessed OK Probability" << right << endl;
  205. for (int score = 0; score <= questions; score++)
  206. {
  207. cout << setw(2) << score << " " << setprecision(10)
  208. << cdf(complement(quiz, score)) << endl;
  209. }
  210. /*`
  211. [pre
  212. At least (>)
  213. Guessed OK Probability
  214. 0 0.9899774042
  215. 1 0.9365235602
  216. 2 0.8028889501
  217. 3 0.5950128899
  218. 4 0.3698138248
  219. 5 0.1896545726
  220. 6 0.07955725188
  221. 7 0.02712995629
  222. 8 0.00746972044
  223. 9 0.001644465374
  224. 10 0.0002852391917
  225. 11 3.810715862e-005
  226. 12 3.783265129e-006
  227. 13 2.628657967e-007
  228. 14 1.140870154e-008
  229. 15 2.328306437e-010
  230. 16 0
  231. ]
  232. We now consider the probabilities of *ranges* of correct guesses.
  233. First, calculate the probability of getting a range of guesses right,
  234. by adding the exact probabilities of each from low ... high.
  235. */
  236. int low = 3; // Getting at least 3 right.
  237. int high = 5; // Getting as most 5 right.
  238. double sum = 0.;
  239. for (int i = low; i <= high; i++)
  240. {
  241. sum += pdf(quiz, i);
  242. }
  243. cout.precision(4);
  244. cout << "Probability of getting between "
  245. << low << " and " << high << " answers right by guessing is "
  246. << sum << endl; // 0.61323
  247. /*`
  248. [pre
  249. Probability of getting between 3 and 5 answers right by guessing is 0.6132
  250. ]
  251. Or, usually better, we can use the difference of cdfs instead:
  252. */
  253. cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
  254. << cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 0.61323
  255. /*`
  256. [pre
  257. Probability of getting between 3 and 5 answers right by guessing is 0.6132
  258. ]
  259. And we can also try a few more combinations of high and low choices:
  260. */
  261. low = 1; high = 6;
  262. cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
  263. << cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 1 and 6 P= 0.91042
  264. low = 1; high = 8;
  265. cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
  266. << cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 1 <= x 8 P = 0.9825
  267. low = 4; high = 4;
  268. cout << "Probability of getting between " << low << " and " << high << " answers right by guessing is "
  269. << cdf(quiz, high) - cdf(quiz, low - 1) << endl; // 4 <= x 4 P = 0.22520
  270. /*`
  271. [pre
  272. Probability of getting between 1 and 6 answers right by guessing is 0.9104
  273. Probability of getting between 1 and 8 answers right by guessing is 0.9825
  274. Probability of getting between 4 and 4 answers right by guessing is 0.2252
  275. ]
  276. [h4 Using Binomial distribution moments]
  277. Using moments of the distribution, we can say more about the spread of results from guessing.
  278. */
  279. cout << "By guessing, on average, one can expect to get " << mean(quiz) << " correct answers." << endl;
  280. cout << "Standard deviation is " << standard_deviation(quiz) << endl;
  281. cout << "So about 2/3 will lie within 1 standard deviation and get between "
  282. << ceil(mean(quiz) - standard_deviation(quiz)) << " and "
  283. << floor(mean(quiz) + standard_deviation(quiz)) << " correct." << endl;
  284. cout << "Mode (the most frequent) is " << mode(quiz) << endl;
  285. cout << "Skewness is " << skewness(quiz) << endl;
  286. /*`
  287. [pre
  288. By guessing, on average, one can expect to get 4 correct answers.
  289. Standard deviation is 1.732
  290. So about 2/3 will lie within 1 standard deviation and get between 3 and 5 correct.
  291. Mode (the most frequent) is 4
  292. Skewness is 0.2887
  293. ]
  294. [h4 Quantiles]
  295. The quantiles (percentiles or percentage points) for a few probability levels:
  296. */
  297. cout << "Quartiles " << quantile(quiz, 0.25) << " to "
  298. << quantile(complement(quiz, 0.25)) << endl; // Quartiles
  299. cout << "1 standard deviation " << quantile(quiz, 0.33) << " to "
  300. << quantile(quiz, 0.67) << endl; // 1 sd
  301. cout << "Deciles " << quantile(quiz, 0.1) << " to "
  302. << quantile(complement(quiz, 0.1))<< endl; // Deciles
  303. cout << "5 to 95% " << quantile(quiz, 0.05) << " to "
  304. << quantile(complement(quiz, 0.05))<< endl; // 5 to 95%
  305. cout << "2.5 to 97.5% " << quantile(quiz, 0.025) << " to "
  306. << quantile(complement(quiz, 0.025)) << endl; // 2.5 to 97.5%
  307. cout << "2 to 98% " << quantile(quiz, 0.02) << " to "
  308. << quantile(complement(quiz, 0.02)) << endl; // 2 to 98%
  309. cout << "If guessing then percentiles 1 to 99% will get " << quantile(quiz, 0.01)
  310. << " to " << quantile(complement(quiz, 0.01)) << " right." << endl;
  311. /*`
  312. Notice that these output integral values because the default policy is `integer_round_outwards`.
  313. [pre
  314. Quartiles 2 to 5
  315. 1 standard deviation 2 to 5
  316. Deciles 1 to 6
  317. 5 to 95% 0 to 7
  318. 2.5 to 97.5% 0 to 8
  319. 2 to 98% 0 to 8
  320. ]
  321. */
  322. //] [/binomial_quiz_example2]
  323. //[discrete_quantile_real
  324. /*`
  325. Quantiles values are controlled by the __understand_dis_quant quantile policy chosen.
  326. The default is `integer_round_outwards`,
  327. so the lower quantile is rounded down, and the upper quantile is rounded up.
  328. But we might believe that the real values tell us a little more - see __math_discrete.
  329. We could control the policy for *all* distributions by
  330. #define BOOST_MATH_DISCRETE_QUANTILE_POLICY real
  331. at the head of the program would make this policy apply
  332. to this *one, and only*, translation unit.
  333. Or we can now create a (typedef for) policy that has discrete quantiles real
  334. (here avoiding any 'using namespaces ...' statements):
  335. */
  336. using boost::math::policies::policy;
  337. using boost::math::policies::discrete_quantile;
  338. using boost::math::policies::real;
  339. using boost::math::policies::integer_round_outwards; // Default.
  340. typedef boost::math::policies::policy<discrete_quantile<real> > real_quantile_policy;
  341. /*`
  342. Add a custom binomial distribution called ``real_quantile_binomial`` that uses ``real_quantile_policy``
  343. */
  344. using boost::math::binomial_distribution;
  345. typedef binomial_distribution<double, real_quantile_policy> real_quantile_binomial;
  346. /*`
  347. Construct an object of this custom distribution:
  348. */
  349. real_quantile_binomial quiz_real(questions, success_fraction);
  350. /*`
  351. And use this to show some quantiles - that now have real rather than integer values.
  352. */
  353. cout << "Quartiles " << quantile(quiz, 0.25) << " to "
  354. << quantile(complement(quiz_real, 0.25)) << endl; // Quartiles 2 to 4.6212
  355. cout << "1 standard deviation " << quantile(quiz_real, 0.33) << " to "
  356. << quantile(quiz_real, 0.67) << endl; // 1 sd 2.6654 4.194
  357. cout << "Deciles " << quantile(quiz_real, 0.1) << " to "
  358. << quantile(complement(quiz_real, 0.1))<< endl; // Deciles 1.3487 5.7583
  359. cout << "5 to 95% " << quantile(quiz_real, 0.05) << " to "
  360. << quantile(complement(quiz_real, 0.05))<< endl; // 5 to 95% 0.83739 6.4559
  361. cout << "2.5 to 97.5% " << quantile(quiz_real, 0.025) << " to "
  362. << quantile(complement(quiz_real, 0.025)) << endl; // 2.5 to 97.5% 0.42806 7.0688
  363. cout << "2 to 98% " << quantile(quiz_real, 0.02) << " to "
  364. << quantile(complement(quiz_real, 0.02)) << endl; // 2 to 98% 0.31311 7.7880
  365. cout << "If guessing, then percentiles 1 to 99% will get " << quantile(quiz_real, 0.01)
  366. << " to " << quantile(complement(quiz_real, 0.01)) << " right." << endl;
  367. /*`
  368. [pre
  369. Real Quantiles
  370. Quartiles 2 to 4.621
  371. 1 standard deviation 2.665 to 4.194
  372. Deciles 1.349 to 5.758
  373. 5 to 95% 0.8374 to 6.456
  374. 2.5 to 97.5% 0.4281 to 7.069
  375. 2 to 98% 0.3131 to 7.252
  376. If guessing then percentiles 1 to 99% will get 0 to 7.788 right.
  377. ]
  378. */
  379. //] [/discrete_quantile_real]
  380. }
  381. catch(const std::exception& e)
  382. { // Always useful to include try & catch blocks because
  383. // default policies are to throw exceptions on arguments that cause
  384. // errors like underflow, overflow.
  385. // Lacking try & catch blocks, the program will abort without a message below,
  386. // which may give some helpful clues as to the cause of the exception.
  387. std::cout <<
  388. "\n""Message from thrown exception was:\n " << e.what() << std::endl;
  389. }
  390. return 0;
  391. } // int main()
  392. /*
  393. Output is:
  394. BAutorun "i:\boost-06-05-03-1300\libs\math\test\Math_test\debug\binomial_quiz_example.exe"
  395. Binomial distribution example - guessing in a quiz.
  396. In a quiz with 16 questions and with a probability of guessing right of 25 % or 1 in 4
  397. Probability of getting none right is 0.0100226
  398. Probability of getting exactly one right is 0.0534538
  399. Probability of getting exactly two right is 0.133635
  400. Probability of getting exactly 11 answers right by chance is 0.000247132
  401. Probability of getting all 16 answers right by chance is 2.32831e-010
  402. Guessed Probability
  403. 0 0.0100226
  404. 1 0.0534538
  405. 2 0.133635
  406. 3 0.207876
  407. 4 0.225199
  408. 5 0.180159
  409. 6 0.110097
  410. 7 0.0524273
  411. 8 0.0196602
  412. 9 0.00582526
  413. 10 0.00135923
  414. 11 0.000247132
  415. 12 3.43239e-005
  416. 13 3.5204e-006
  417. 14 2.51457e-007
  418. 15 1.11759e-008
  419. 16 2.32831e-010
  420. Probability of getting none or one right is 0.0634764
  421. Probability of getting none or one right is 0.0634764
  422. Probability of getting <= 10 right (to fail) is 0.999715
  423. Probability of getting > 10 right (to pass) is 0.000285239
  424. Probability of getting > 10 right (to pass) is 0.000285239
  425. Probability of getting less than 11 (< 11) answers right by guessing is 0.999715
  426. Probability of getting at least 11(>= 11) answers right by guessing is 0.000285239, only 1 in 3505.83
  427. At most (<=)
  428. Guessed OK Probability
  429. 0 0.01002259576
  430. 1 0.0634764398
  431. 2 0.1971110499
  432. 3 0.4049871101
  433. 4 0.6301861752
  434. 5 0.8103454274
  435. 6 0.9204427481
  436. 7 0.9728700437
  437. 8 0.9925302796
  438. 9 0.9983555346
  439. 10 0.9997147608
  440. 11 0.9999618928
  441. 12 0.9999962167
  442. 13 0.9999997371
  443. 14 0.9999999886
  444. 15 0.9999999998
  445. 16 1
  446. At least (>)
  447. Guessed OK Probability
  448. 0 0.9899774042
  449. 1 0.9365235602
  450. 2 0.8028889501
  451. 3 0.5950128899
  452. 4 0.3698138248
  453. 5 0.1896545726
  454. 6 0.07955725188
  455. 7 0.02712995629
  456. 8 0.00746972044
  457. 9 0.001644465374
  458. 10 0.0002852391917
  459. 11 3.810715862e-005
  460. 12 3.783265129e-006
  461. 13 2.628657967e-007
  462. 14 1.140870154e-008
  463. 15 2.328306437e-010
  464. 16 0
  465. Probability of getting between 3 and 5 answers right by guessing is 0.6132
  466. Probability of getting between 3 and 5 answers right by guessing is 0.6132
  467. Probability of getting between 1 and 6 answers right by guessing is 0.9104
  468. Probability of getting between 1 and 8 answers right by guessing is 0.9825
  469. Probability of getting between 4 and 4 answers right by guessing is 0.2252
  470. By guessing, on average, one can expect to get 4 correct answers.
  471. Standard deviation is 1.732
  472. So about 2/3 will lie within 1 standard deviation and get between 3 and 5 correct.
  473. Mode (the most frequent) is 4
  474. Skewness is 0.2887
  475. Quartiles 2 to 5
  476. 1 standard deviation 2 to 5
  477. Deciles 1 to 6
  478. 5 to 95% 0 to 7
  479. 2.5 to 97.5% 0 to 8
  480. 2 to 98% 0 to 8
  481. If guessing then percentiles 1 to 99% will get 0 to 8 right.
  482. Quartiles 2 to 4.621
  483. 1 standard deviation 2.665 to 4.194
  484. Deciles 1.349 to 5.758
  485. 5 to 95% 0.8374 to 6.456
  486. 2.5 to 97.5% 0.4281 to 7.069
  487. 2 to 98% 0.3131 to 7.252
  488. If guessing, then percentiles 1 to 99% will get 0 to 7.788 right.
  489. */