question 2.3

2024-12-30 22:05:03 +01:00 · 2024-12-30 22:05:03 +01:00 · f5f4d1c3e6
commit f5f4d1c3e6
parent 9d8f09b035
4 changed files with 32 additions and 53 deletions
--- a/figures/question_2_2_lwage.png
+++ b/figures/question_2_2_lwage.png
--- a/figures/question_2_2_wage.png
+++ b/figures/question_2_2_wage.png
--- a/report/Assignment.tex
+++ b/report/Assignment.tex
@ -303,78 +303,45 @@ A \Rightarrow B

 \subsection{Question 2.1}

-For instance, here the file df\_table.tex is used print the actual numbers 
-in the table.
+We retain 2510 observations.

 \begin{table}[ht]
 \centering
 \input{summary_stats}
 \caption{Generate Data with Small Variance on x1}
-\label{tab::table_2_1}
+\label{tab::summary_stats}
 \end{table}


-\subsection{Question 4: Some graphs}
+\subsection{Question 2.2}

 \begin{figure}
-\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_y}
-\caption{This is a Figure coming straight from Python.}
-\label{fig::example_data}
+\includegraphics[width=0.6\paperwidth]{../figures/question_2_2_wage}
+\caption{Histogram wage}
+\label{fig::question_2_2_wage}
 \end{figure}

+\begin{figure}
+\includegraphics[width=0.6\paperwidth]{../figures/question_2_2_lwage}
+\caption{Histogram lwage}
+\label{fig::question_2_2_lwage}
+\end{figure}

-\subsection{Question 5}
+The lwage histogram in fig \ref{fig::question_2_2_lwage} is nicely centered so there is no need to remove any outliners. This is also close to a normal distribution. The wage historgam in fig \ref{fig::question_2_2_wage} is not symmetrical but is leaning to the left. Clealy not normal distributed. 

-Equation example with matrices:
+\subsection{Question 2.3}

-\begin{equation}\label{eq::wald_test}
-H_0: \beta_1 = - \beta_2; \beta_3=0; \beta_2 + 2\beta_4 = 2
-\quad H_1: \neg H_0
-\end{equation}
-
-can be written in matrix form as:
-\begin{equation}\label{eq::matrix_form}
-\begin{bmatrix}
-1 & 1 & 0 & 0 \\
-0 & 0 & 1 & 0 \\
-0 & 1 & 0 & 2
-\end{bmatrix}
-\begin{bmatrix}
-\beta_1 \\
-\beta_2 \\
-\beta_3 \\
-\beta_4
-\end{bmatrix} = 
-\begin{bmatrix}
-0 \\
-0 \\
-2
-\end{bmatrix}
-\end{equation}
-
-In equation \eqref{eq::wald_test} we see that... and in equation \eqref{eq::matrix_form} we see that
-
-\subsection{Question 6}
-
-
-$\beta$
-
-\begin{table}
-\input{summary}
-\caption{This tables has the estimates summary}
-\label{tab::estimation_results_summary}
+\begin{table}[ht]
+\centering
+\input{table_2_3}
+\caption{Correlation matrix}
+\label{tab::table_2_3}
 \end{table}

-Table \ref{tab::estimation_results_summary} has the full summary.
+We can see that there is a positive correlation between wage and school. It means that people who go longer to school will get a higher wage. There is a negative correlation between age and school. The younger generation is higher educated than older generation. Chinese citizens are better payed than malay, indian citizens have a negative correlation with wage. 

-\begin{table}
-\input{results_coef}
-\caption{This tables has the estimates summary}
-\label{tab::estimation_results_coef}
-\end{table}
+\subsection{Question 2.4}

-Table \ref{tab::estimation_results_coef} has the only the coefficient 
-results.


 \end{document}
--- a/scripts/empirical.py
+++ b/scripts/empirical.py
@ -99,6 +99,8 @@ data = data[data['paidwork']==1]
 data['school'] = data['yprim']+data['ysec']
 data['wage'] = np.exp(data['lwage'])
 data_summary = data.describe() 
+new_names = ['count', 'mean', 'std', 'min', '25pct', '50pct', '75pct', 'max']
+data_summary.index = new_names

 # print to screen
 print(data_summary.T)
@ -113,6 +115,12 @@ data_frame_to_latex_table_file(report_dir + 'summary_stats.tex',

 print_question('Question 2.2: Plot histogram wage / lwage')

+plt.hist(data['wage'],bins=21)
+plt.savefig(figure_dir + "question_2_2_wage.png")
+plt.show()
+plt.hist(data['lwage'],bins=21)
+plt.savefig(figure_dir + "question_2_2_lwage.png")
+plt.show()

 # -----------------------------------------------------------------------------
 # Question 2.3
@ -120,6 +128,10 @@ print_question('Question 2.2: Plot histogram wage / lwage')

 print_question('Question 2.3: Sample correlations')

+df = data [['wage', 'age', 'school', 'men', 'malay', 'chinese', 'indian']]
+corr = df.corr()
+data_frame_to_latex_table_file(report_dir + 'table_2_3.tex',                               
+                               corr)

 # -----------------------------------------------------------------------------
 # Question 2.4