diff --git a/report/Assignment.tex b/report/Assignment.tex index c99d498..c80c985 100644 --- a/report/Assignment.tex +++ b/report/Assignment.tex @@ -38,6 +38,11 @@ \sectionfont{\color{report_main}} \subsectionfont{\color{report_third}} +%% Add pagebreak before each section +\let\oldsection\section +\renewcommand\section{\clearpage\oldsection} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % This is where the actual document starts % @@ -103,26 +108,36 @@ Hendrik Marcel W Tillemans\\ % You can just write text in here as you would in any other word processor. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Question 1} - -This my answer to question 1. - -\subsection{Example} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\pagebreak - -creates a page break. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \section{Simulation Study} + \subsection{Question 1.2} Are the estimates of $\beta_0$, $\beta_1$ and $\beta_2$ close to their true values? Why (not)? +We investigate a linear model with noise + +\[y=\beta_0 + \beta_1 x1 + \beta_2 x2 + u\] + +where + +\[x1 \sim \mathcal{N}(3,\,6)\] +\[x2 \sim \mathcal{N}(3,\,6)\] +\[u \sim \mathcal{N}(0,\,3)\] + +In figure \ref{fig::plot_1_1} we have a 3D representation of the generated model. + +\begin{figure}[hb] +\includegraphics[width=0.6\paperwidth]{../figures/question_1_1} +\caption{Generated points for Question 1.1.} +\label{fig::plot_1_1} +\end{figure} + + + +\subsection{1.2: Linear Fit on Generated Data} + \begin{table}[h] \input{table_1_2} \caption{Linear Fit on Generated Data} @@ -167,6 +182,12 @@ Linear Regression mode. \label{tab::table_1_6} \end{table} +\begin{figure}[hb] +\includegraphics[width=0.6\paperwidth]{../figures/question_1_6} +\caption{Generated points for Question 1.6.} +\label{fig::plot_1_6} +\end{figure} + \section{examples} Some greek letters: diff --git a/report/sample_table.tex b/report/sample_table.tex new file mode 100644 index 0000000..6912874 --- /dev/null +++ b/report/sample_table.tex @@ -0,0 +1,14 @@ +\begin{tabular}{lrrrr} +\toprule + & Petal Width & Petal Length & Sepal Width & Sepal Length \\ +\midrule +count & 150.000000 & 150.000000 & 150.000000 & 150.000000 \\ +mean & 5.843333 & 3.057333 & 3.758000 & 1.199333 \\ +std & 0.828066 & 0.435866 & 1.765298 & 0.762238 \\ +min & 4.300000 & 2.000000 & 1.000000 & 0.100000 \\ +25% & 5.100000 & 2.800000 & 1.600000 & 0.300000 \\ +50% & 5.800000 & 3.000000 & 4.350000 & 1.300000 \\ +75% & 6.400000 & 3.300000 & 5.100000 & 1.800000 \\ +max & 7.900000 & 4.400000 & 6.900000 & 2.500000 \\ +\bottomrule +\end{tabular} diff --git a/scripts/simulation.py b/scripts/simulation.py index b9c0e8d..d656c24 100644 --- a/scripts/simulation.py +++ b/scripts/simulation.py @@ -65,6 +65,22 @@ np.random.seed(group_seed) print_line_length = 90 print_line_start = 5 + +# ----------------------------------------------------------------------------- +# Utility Functions for the Simulation +# ----------------------------------------------------------------------------- + +def results_to_latex_table_file(file_name, results, beta): + """ + This function takes a results object from statsmodels and writes it to a latex table file. + """ + d = {'True': beta, + 'Estimated': results.params, + 'Std Err': results.bse, + 't-Stat': results.tvalues} + df = pd.DataFrame(data = d) + data_frame_to_latex_table_file(REPORT_DIR + file_name, df) + # ----------------------------------------------------------------------------- # 1.1 # ----------------------------------------------------------------------------- @@ -90,6 +106,21 @@ x2 = rng.normal(2, 5, (num_obs,)) # y y = beta[0] + beta[1]*x1 + beta[2]*x2 + u + +# plot the resulting data +fig = plt.figure() +ax = fig.add_subplot(projection='3d') + +ax.scatter(x1, x2, y, marker='o') + +ax.set_xlabel('x1') +ax.set_ylabel('x2') +ax.set_zlabel('y') + +plt.savefig(FIGURE_DIR + "question_1_1.png") +plt.show() + + # ----------------------------------------------------------------------------- # 1.2 # ----------------------------------------------------------------------------- @@ -101,12 +132,7 @@ X = np.array([np.ones(num_obs), x1, x2]).T m = sm.OLS(y, X) # results = results = m.fit() -d = {'True': beta, - 'Estimated': results.params, - 'Std Err': results.bse, - 't-Stat': results.tvalues} -df = pd.DataFrame(data = d) -data_frame_to_latex_table_file(REPORT_DIR + 'table_1_2.tex', df) +results_to_latex_table_file('table_1_2.tex', results, beta) # ----------------------------------------------------------------------------- # 1.3 @@ -119,12 +145,8 @@ X = np.array([np.ones(num_obs), x1]).T m = sm.OLS(y, X) # results = results = m.fit() -d = {'True': beta[0:2], - 'Estimated': results.params, - 'Std Err': results.bse, - 't-Stat': results.tvalues} -df = pd.DataFrame(data = d) -data_frame_to_latex_table_file(REPORT_DIR + 'table_1_3.tex', df) +results_to_latex_table_file('table_1_3.tex', results, beta[0:2]) + # ----------------------------------------------------------------------------- # 1.4 @@ -141,12 +163,7 @@ X = np.array([np.ones(num_obs), x1, x2_new]).T m = sm.OLS(y_new, X) # results = results = m.fit() -d = {'True': beta, - 'Estimated': results.params, - 'Std Err': results.bse, - 't-Stat': results.tvalues} -df = pd.DataFrame(data = d) -data_frame_to_latex_table_file(REPORT_DIR + 'table_1_4.tex', df) +results_to_latex_table_file('table_1_4.tex', results, beta) # ----------------------------------------------------------------------------- # 1.5 @@ -159,30 +176,31 @@ X = np.array([np.ones(num_obs), x1]).T m = sm.OLS(y_new, X) # results = results = m.fit() -d = {'True': beta[0:2], - 'Estimated': results.params, - 'Std Err': results.bse, - 't-Stat': results.tvalues} -df = pd.DataFrame(data = d) -data_frame_to_latex_table_file(REPORT_DIR + 'table_1_5.tex', df) +results_to_latex_table_file('table_1_5.tex', results, beta[0:2]) # ----------------------------------------------------------------------------- # 1.6 # ----------------------------------------------------------------------------- -x1 = rng.normal(3, 1, (num_obs,)) -y = beta[0] + beta[1]*x1 + beta[2]*x2 + u +# x1 --> x1_new so we can compare to the original x1 from 1.2 +x1_new = rng.normal(3, 1, (num_obs,)) +y_new = beta[0] + beta[1]*x1_new + beta[2]*x2 + u # X -X = np.array([np.ones(num_obs), x1, x2]).T +X = np.array([np.ones(num_obs), x1_new, x2]).T # m -m = sm.OLS(y, X) +m = sm.OLS(y_new, X) # results = results = m.fit() -d = {'True': beta, - 'Estimated': results.params, - 'Std Err': results.bse, - 't-Stat': results.tvalues} -df = pd.DataFrame(data = d) -data_frame_to_latex_table_file(REPORT_DIR + 'table_1_6.tex', df) \ No newline at end of file +results_to_latex_table_file('table_1_6.tex', results, beta) + +fig = plt.figure() +ax1 = fig.add_subplot(111) + +ax1.scatter(x1, y, c='b', marker="s", label='question 1.1') +ax1.scatter(x1_new, y_new, c='r', marker="o", label='question 1.6') +plt.legend(loc='upper left') +plt.savefig(FIGURE_DIR + "question_1_6.png") +plt.show() +