diff --git a/figures/question_2_2_lwage.png b/figures/question_2_2_lwage.png new file mode 100644 index 0000000..27c0e12 Binary files /dev/null and b/figures/question_2_2_lwage.png differ diff --git a/figures/question_2_2_wage.png b/figures/question_2_2_wage.png new file mode 100644 index 0000000..a25c48e Binary files /dev/null and b/figures/question_2_2_wage.png differ diff --git a/report/Assignment.tex b/report/Assignment.tex index ef4250c..75ffabc 100644 --- a/report/Assignment.tex +++ b/report/Assignment.tex @@ -303,78 +303,45 @@ A \Rightarrow B \subsection{Question 2.1} -For instance, here the file df\_table.tex is used print the actual numbers -in the table. +We retain 2510 observations. \begin{table}[ht] \centering \input{summary_stats} \caption{Generate Data with Small Variance on x1} -\label{tab::table_2_1} +\label{tab::summary_stats} \end{table} -\subsection{Question 4: Some graphs} +\subsection{Question 2.2} \begin{figure} -\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_y} -\caption{This is a Figure coming straight from Python.} -\label{fig::example_data} +\includegraphics[width=0.6\paperwidth]{../figures/question_2_2_wage} +\caption{Histogram wage} +\label{fig::question_2_2_wage} \end{figure} +\begin{figure} +\includegraphics[width=0.6\paperwidth]{../figures/question_2_2_lwage} +\caption{Histogram lwage} +\label{fig::question_2_2_lwage} +\end{figure} -\subsection{Question 5} +The lwage histogram in fig \ref{fig::question_2_2_lwage} is nicely centered so there is no need to remove any outliners. This is also close to a normal distribution. The wage historgam in fig \ref{fig::question_2_2_wage} is not symmetrical but is leaning to the left. Clealy not normal distributed. -Equation example with matrices: +\subsection{Question 2.3} -\begin{equation}\label{eq::wald_test} -H_0: \beta_1 = - \beta_2; \beta_3=0; \beta_2 + 2\beta_4 = 2 -\quad H_1: \neg H_0 -\end{equation} - -can be written in matrix form as: -\begin{equation}\label{eq::matrix_form} -\begin{bmatrix} -1 & 1 & 0 & 0 \\ -0 & 0 & 1 & 0 \\ -0 & 1 & 0 & 2 -\end{bmatrix} -\begin{bmatrix} -\beta_1 \\ -\beta_2 \\ -\beta_3 \\ -\beta_4 -\end{bmatrix} = -\begin{bmatrix} -0 \\ -0 \\ -2 -\end{bmatrix} -\end{equation} - -In equation \eqref{eq::wald_test} we see that... and in equation \eqref{eq::matrix_form} we see that - -\subsection{Question 6} - - -$\beta$ - -\begin{table} -\input{summary} -\caption{This tables has the estimates summary} -\label{tab::estimation_results_summary} +\begin{table}[ht] +\centering +\input{table_2_3} +\caption{Correlation matrix} +\label{tab::table_2_3} \end{table} -Table \ref{tab::estimation_results_summary} has the full summary. +We can see that there is a positive correlation between wage and school. It means that people who go longer to school will get a higher wage. There is a negative correlation between age and school. The younger generation is higher educated than older generation. Chinese citizens are better payed than malay, indian citizens have a negative correlation with wage. -\begin{table} -\input{results_coef} -\caption{This tables has the estimates summary} -\label{tab::estimation_results_coef} -\end{table} +\subsection{Question 2.4} -Table \ref{tab::estimation_results_coef} has the only the coefficient -results. \end{document} diff --git a/scripts/empirical.py b/scripts/empirical.py index fb73ebb..0e91d70 100644 --- a/scripts/empirical.py +++ b/scripts/empirical.py @@ -99,6 +99,8 @@ data = data[data['paidwork']==1] data['school'] = data['yprim']+data['ysec'] data['wage'] = np.exp(data['lwage']) data_summary = data.describe() +new_names = ['count', 'mean', 'std', 'min', '25pct', '50pct', '75pct', 'max'] +data_summary.index = new_names # print to screen print(data_summary.T) @@ -113,6 +115,12 @@ data_frame_to_latex_table_file(report_dir + 'summary_stats.tex', print_question('Question 2.2: Plot histogram wage / lwage') +plt.hist(data['wage'],bins=21) +plt.savefig(figure_dir + "question_2_2_wage.png") +plt.show() +plt.hist(data['lwage'],bins=21) +plt.savefig(figure_dir + "question_2_2_lwage.png") +plt.show() # ----------------------------------------------------------------------------- # Question 2.3 @@ -120,6 +128,10 @@ print_question('Question 2.2: Plot histogram wage / lwage') print_question('Question 2.3: Sample correlations') +df = data [['wage', 'age', 'school', 'men', 'malay', 'chinese', 'indian']] +corr = df.corr() +data_frame_to_latex_table_file(report_dir + 'table_2_3.tex', + corr) # ----------------------------------------------------------------------------- # Question 2.4