%% Import Data
A = importdata('5110\Lesson 6\LahomaTemp.dat'); % Get data into Matlab.
%% Replace missing values
A.data(A.data < -990) = 29.7; % Replace missing values for 29.7 Celsius
%% Mean temperature
AvgT = mean(A.data(:,2:3),2); % Mean temperature
%% Transform Date to YYYY, MM, DD, ...
[yyyy,mm] = datevec(A.textdata(2:end,1)); % Get year (yyyy, e.g 1994) and month (mm e.g. 7 or 10).
%% Isolate mean temperature only for July
AvgTJulyDaily = AvgT(mm == 7); % Use of logical indexing to retireve values in AvgT that only match with rows in mm==7.
%% Descriptive Stats
% Also consider using nanmean, nanmax, nanmin, nanmedian, etc...(check
% help) when you have data with nan.
TmaxAbsoluteJul = max(AvgTJulyDaily);
TminAbsoluteJul = min(AvgTJulyDaily);
TmeanJuly = mean(AvgTJulyDaily);
TmedianJuly = median(AvgTJulyDaily);
TmodeJuly = mode(AvgTJulyDaily);
TstdJuly = std(AvgTJulyDaily);
%% Histogram
subplot(2,2,1),histfit (AvgTJulyDaily) % Subplot to divide figure into four plots. A figure and a plot is not the same thing.Write figure in your command window to see.
% Consider also using hist if you don't want the normal pdf.
% you can divide your figure in as many parts as you wish, as long as you
% can see something cler in it!!
title('Lahoma July Mean Air T Histogram','FontSize',14)
%% Calcualting the mean for each month.
% The filter function is similar to a running moving average in this case
%(but more powerful and flexible). See doc filter and run example.
% We will only select values 31, 62, 93, ...,589. This are the cells that
% have the average for the preceding 31 days (month of July) for each of
% the 19 years.
DaysJuly = 31; % Number of days in the month of July.
windowSize = DaysJuly; % Window size in this case is similar to the period of the moving average.
y = filter(ones(1,windowSize)/windowSize,1,AvgTJulyDaily); % I encourage you to read the help of this function. It is basically the sum of each value over the window size.
MonthEnd = 31:31:589; % creating a vector with days 31, 62, 93,..., and 589.
%% Mean Temperature for the month of July for each of the 19 years in the dataset.
TavgJulyMonthly = y(MonthEnd); % Extracting each of the 19 years mean temperatures for July.
%% Checkpoint--> Plot Data
%figure(2)
x = unique(yyyy); % Get unique years. At this point we have 19 TavgJulyMonthly values for July.
% If we want to plot each value as a function of years, we also need to
% have a vector with 19 values corresponding to years. This is done by
% using the "unique" function.
subplot(2,2,2),plot(x,TavgJulyMonthly,'.');hold on
%% Linear Regression using fit, probably the easiest and most popular.
[fitObject,gof,output] = fit(x,TavgJulyMonthly, 'poly1'); % Model fitted is y = p1*x+p2, where p1 is the slope and p2 is the intersect.
% you can check in the fit properties on how to select other built-in
% models in Matlab, or how to fit data using a custom model (see fit and
% fitoptions)
plot(fitObject);
ylim([20 35]); % range of y axis
xlabel(' ') % empty label of x axis
ylabel('Temperature (Celsius)','FontSize',12) % y axis label.
legend boxoff % no box around legend
title('Lahoma July Mean Air T (19-yrs) fit-->poly1','FontSize',14)
text(1991,34,['r^2',num2str(gof.rsquare(1,1))] ) % insert text into graph to show rmse
%% Second alternative. A different way of creating a linear fit.
mdl = LinearModel.fit(x,TavgJulyMonthly); %creates a linear model.
subplot(2,2,3),plot(mdl)
ylim([20 35]);
xlabel(' ')
ylabel('Temperature (Celsius)','FontSize',12)
%legend('Location','NorthWest')
legend boxoff
title('Lahoma July Mean Air T (19-yrs) LinearModel.fit','FontSize',14)
%% Plot residuals
subplot(2,2,4),plotResiduals(mdl,'probability','MarkerSize',10) % Check distribution of residuals.
xlabel('Residuals','FontSize',12)
ylabel('Probability','FontSize',12)
title('Normal Probability Plot of Residuals','FontSize',14)