>> % Loading the data into matlab >> load temp.dat >> % determining the number of data points >> [n,m] = size(temp) n = 101 m = 1 >> % Based on the summary statistics and shape of histogram >> % one possible distribution that might fit the data is the >> % Normal density - determining the MLE's for the Normal >> % density parameters. >> mu = mean(temp) mu = 5.2876 >> v = var(temp) v = 22.2008 >> foo = v * (n-1)/n foo = 21.9810 >> sigma = sqrt(foo) sigma = 4.6884 >> % Hence from the data the Normal (5.28, 4.688) is the most likely fit >> % Comparing summary stats >> % for data in temp.dat >> %mean >> mu mu = 5.2876 >> % variance >> v v = 22.2008 >> irange = iqr(temp) irange = 6.4281 >> % From the Normal have >> mu mu = 5.2876 >> vn = sigma^2 vn = 21.9810 >> nirange = norminv(.75, mu,sigma) - norminv(.25,mu,sigma) nirange = 6.3245 >> % Coefficient of variation >> cov = std(temp)/mu cov = 0.8911 >> %Normal cov >> ncov = sigma/mu ncov = 0.8867 >> % Constructing a P-P plot >> sorttemp = sort(temp); >> ptheory = normcdf(sorttemp,mu,sigma); >> for i = 1:n ptemp(i) = (i-.5)/n; end >> plot (ptemp,ptheory,ptemp,ptemp) >> title('P-P plot of Normal versus sample data') >> xlabel('Sample Data probabilities') >> ylabel('Normal Distribution probabilities') >> print p-p.ps >> % Constructing a Q-Q plot >> xtheory = norminv(ptemp,mu,sigma); >> plot (sorttemp,xtheory,sorttemp,sorttemp) >> title('Q-Q plot of Normal quantiles versus data quantiles') >> xlabel('Sample Data quantiles') >> ylabel('Normal Distribution quantiles') >> print q-q.ps >> % Notice that botht he Q-Q plot and P-P plot indicate that >> % the Normal is a pretty good fit to the data >> % An overlay plot of the Normal CDF and data CDF is given by >> plot(sorttemp,ptemp,sorttemp,ptheory) >> title('Plot of Normal Distribution and Data Distribution') >> xlabel('x') >> ylabel('F(x)') >> print distplot.ps >> % plot of f(x) and histogram of data >> k = 7; >> y = normpdf(sorttemp,mu,sigma); >> [nc,x] = hist(temp,k); >> nc = nc/n; >> bar(x,nc) >> hold on >> plot(sorttemp,y) >> print overlay.ps >> % Plot histogram of Normal and data >> nnc(1) = normcdf(x(1),mu,sigma); >> for j = 2:k nnc(j) = normcdf(x(j),mu,sigma) - normcdf(x(j-1),mu,sigma); end >> hold off >> bar(x,nc) >> hold on >> bar(x,nnc,'+') >> bar(x,nnc,'-') >> title('Histograms of Normal and Sample Data') >> xlabel('x') >> ylabel('Frequencies') >> print histc2.ps >> % Performing ChiSquare test >> % Determining the number of observations in each cell of histogram >> nc = nc*n; >> nnc = nnc*n; >> nc nc = 3.0000 6.0000 16.0000 26.0000 25.0000 18.0000 7.0000 >> nnc nnc = 0.9073 3.7819 11.4640 22.0936 27.0855 21.1263 10.4817 >> % Note that this is a poor choice of cells for the Chi-Square test >> % since there are cells with less than 5 observations expected >> % I will use this cell width anyway to illustrate the computations >> chisum = 0; >> for i = 1:k chisum = chisum + ((nc(i)-nnc(i))^2)/nnc(i); end >> chisum chisum = 10.3931 >> % comparing with chi-squared test value >> % there are 7 - 2- 1 = 4 degrees of freedom >> test = chi2inv(.95,4) test = 9.4877 >> % Notice that the chi-squard value is less then the error sum >> % value thus one WOULD REJECT the hypothesis that the data is >> % Normally distributed - however the values are close and >> % if one goes back and recomputes with differenct cell widths >> % it may pass - also would be a good idea to collects a LARGER >> % Sample of the data and then retest >> % may pass the Chi-Squared test - (for this example it will!) >> exit 31049 flops.