Compare commits
2 commits
9d8f09b035
...
a8233a3075
Author | SHA1 | Date | |
---|---|---|---|
a8233a3075 | |||
f5f4d1c3e6 |
8 changed files with 112 additions and 53 deletions
BIN
figures/question_2_2_lwage.png
Normal file
BIN
figures/question_2_2_lwage.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 8.5 KiB |
BIN
figures/question_2_2_wage.png
Normal file
BIN
figures/question_2_2_wage.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
|
@ -303,78 +303,45 @@ A \Rightarrow B
|
||||||
|
|
||||||
\subsection{Question 2.1}
|
\subsection{Question 2.1}
|
||||||
|
|
||||||
For instance, here the file df\_table.tex is used print the actual numbers
|
We retain 2510 observations.
|
||||||
in the table.
|
|
||||||
|
|
||||||
\begin{table}[ht]
|
\begin{table}[ht]
|
||||||
\centering
|
\centering
|
||||||
\input{summary_stats}
|
\input{summary_stats}
|
||||||
\caption{Generate Data with Small Variance on x1}
|
\caption{Generate Data with Small Variance on x1}
|
||||||
\label{tab::table_2_1}
|
\label{tab::summary_stats}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
|
|
||||||
\subsection{Question 4: Some graphs}
|
\subsection{Question 2.2}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_y}
|
\includegraphics[width=0.6\paperwidth]{../figures/question_2_2_wage}
|
||||||
\caption{This is a Figure coming straight from Python.}
|
\caption{Histogram wage}
|
||||||
\label{fig::example_data}
|
\label{fig::question_2_2_wage}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includegraphics[width=0.6\paperwidth]{../figures/question_2_2_lwage}
|
||||||
|
\caption{Histogram lwage}
|
||||||
|
\label{fig::question_2_2_lwage}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
\subsection{Question 5}
|
The lwage histogram in fig \ref{fig::question_2_2_lwage} is nicely centered so there is no need to remove any outliners. This is also close to a normal distribution. The wage historgam in fig \ref{fig::question_2_2_wage} is not symmetrical but is leaning to the left. Clealy not normal distributed.
|
||||||
|
|
||||||
Equation example with matrices:
|
\subsection{Question 2.3}
|
||||||
|
|
||||||
\begin{equation}\label{eq::wald_test}
|
\begin{table}[ht]
|
||||||
H_0: \beta_1 = - \beta_2; \beta_3=0; \beta_2 + 2\beta_4 = 2
|
\centering
|
||||||
\quad H_1: \neg H_0
|
\input{table_2_3}
|
||||||
\end{equation}
|
\caption{Correlation matrix}
|
||||||
|
\label{tab::table_2_3}
|
||||||
can be written in matrix form as:
|
|
||||||
\begin{equation}\label{eq::matrix_form}
|
|
||||||
\begin{bmatrix}
|
|
||||||
1 & 1 & 0 & 0 \\
|
|
||||||
0 & 0 & 1 & 0 \\
|
|
||||||
0 & 1 & 0 & 2
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
\beta_1 \\
|
|
||||||
\beta_2 \\
|
|
||||||
\beta_3 \\
|
|
||||||
\beta_4
|
|
||||||
\end{bmatrix} =
|
|
||||||
\begin{bmatrix}
|
|
||||||
0 \\
|
|
||||||
0 \\
|
|
||||||
2
|
|
||||||
\end{bmatrix}
|
|
||||||
\end{equation}
|
|
||||||
|
|
||||||
In equation \eqref{eq::wald_test} we see that... and in equation \eqref{eq::matrix_form} we see that
|
|
||||||
|
|
||||||
\subsection{Question 6}
|
|
||||||
|
|
||||||
|
|
||||||
$\beta$
|
|
||||||
|
|
||||||
\begin{table}
|
|
||||||
\input{summary}
|
|
||||||
\caption{This tables has the estimates summary}
|
|
||||||
\label{tab::estimation_results_summary}
|
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
Table \ref{tab::estimation_results_summary} has the full summary.
|
We can see that there is a positive correlation between wage and school. It means that people who go longer to school will get a higher wage. There is a negative correlation between age and school. The younger generation is higher educated than older generation. Chinese citizens are better payed than malay, indian citizens have a negative correlation with wage.
|
||||||
|
|
||||||
\begin{table}
|
\subsection{Question 2.4}
|
||||||
\input{results_coef}
|
|
||||||
\caption{This tables has the estimates summary}
|
|
||||||
\label{tab::estimation_results_coef}
|
|
||||||
\end{table}
|
|
||||||
|
|
||||||
Table \ref{tab::estimation_results_coef} has the only the coefficient
|
|
||||||
results.
|
|
||||||
|
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
20
report/question_2_3
Normal file
20
report/question_2_3
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
\begin{tabular}{lrrrrrrrr}
|
||||||
|
\toprule
|
||||||
|
& count & mean & std & min & 25% & 50% & 75% & max \\
|
||||||
|
\midrule
|
||||||
|
paidwork & 2510.000000 & 1.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
lwage & 2510.000000 & 0.780391 & 0.737255 & -3.336058 & 0.299333 & 0.766255 & 1.241741 & 4.208274 \\
|
||||||
|
men & 2510.000000 & 0.624303 & 0.484398 & 0.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
malay & 2510.000000 & 0.435458 & 0.495919 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
chinese & 2510.000000 & 0.272510 & 0.445334 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
indian & 2510.000000 & 0.292032 & 0.454793 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
age & 2510.000000 & 33.025101 & 10.699703 & 15.000000 & 25.000000 & 31.000000 & 39.000000 & 65.000000 \\
|
||||||
|
agesq & 2510.000000 & 12.050953 & 7.977792 & 2.250000 & 6.250000 & 9.610000 & 15.210000 & 42.250000 \\
|
||||||
|
gexpr & 2510.000000 & 18.933865 & 12.482897 & 0.000000 & 9.000000 & 16.000000 & 26.000000 & 59.000000 \\
|
||||||
|
gexprsq & 2510.000000 & 5.142518 & 6.277625 & 0.000000 & 0.810000 & 2.560000 & 6.760000 & 34.810001 \\
|
||||||
|
yprim & 2510.000000 & 5.277291 & 1.711691 & 0.000000 & 6.000000 & 6.000000 & 6.000000 & 6.000000 \\
|
||||||
|
ysec & 2510.000000 & 2.813944 & 2.704680 & 0.000000 & 0.000000 & 3.000000 & 5.000000 & 14.000000 \\
|
||||||
|
school & 2510.000000 & 8.091235 & 3.783405 & 0.000000 & 6.000000 & 9.000000 & 11.000000 & 20.000000 \\
|
||||||
|
wage & 2510.000000 & 2.903078 & 2.886990 & 0.035577 & 1.348958 & 2.151692 & 3.461635 & 67.240379 \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
20
report/summary_stats.tex
Normal file
20
report/summary_stats.tex
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
\begin{tabular}{lrrrrrrrr}
|
||||||
|
\toprule
|
||||||
|
& count & mean & std & min & 25pct & 50pct & 75pct & max \\
|
||||||
|
\midrule
|
||||||
|
paidwork & 2510.000000 & 1.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
lwage & 2510.000000 & 0.780391 & 0.737255 & -3.336058 & 0.299333 & 0.766255 & 1.241741 & 4.208274 \\
|
||||||
|
men & 2510.000000 & 0.624303 & 0.484398 & 0.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
malay & 2510.000000 & 0.435458 & 0.495919 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
chinese & 2510.000000 & 0.272510 & 0.445334 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
indian & 2510.000000 & 0.292032 & 0.454793 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
age & 2510.000000 & 33.025101 & 10.699703 & 15.000000 & 25.000000 & 31.000000 & 39.000000 & 65.000000 \\
|
||||||
|
agesq & 2510.000000 & 12.050953 & 7.977792 & 2.250000 & 6.250000 & 9.610000 & 15.210000 & 42.250000 \\
|
||||||
|
gexpr & 2510.000000 & 18.933865 & 12.482897 & 0.000000 & 9.000000 & 16.000000 & 26.000000 & 59.000000 \\
|
||||||
|
gexprsq & 2510.000000 & 5.142518 & 6.277625 & 0.000000 & 0.810000 & 2.560000 & 6.760000 & 34.810001 \\
|
||||||
|
yprim & 2510.000000 & 5.277291 & 1.711691 & 0.000000 & 6.000000 & 6.000000 & 6.000000 & 6.000000 \\
|
||||||
|
ysec & 2510.000000 & 2.813944 & 2.704680 & 0.000000 & 0.000000 & 3.000000 & 5.000000 & 14.000000 \\
|
||||||
|
school & 2510.000000 & 8.091235 & 3.783405 & 0.000000 & 6.000000 & 9.000000 & 11.000000 & 20.000000 \\
|
||||||
|
wage & 2510.000000 & 2.903078 & 2.886990 & 0.035577 & 1.348958 & 2.151692 & 3.461635 & 67.240379 \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
20
report/table_2_3
Normal file
20
report/table_2_3
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
\begin{tabular}{lrrrrrrrr}
|
||||||
|
\toprule
|
||||||
|
& count & mean & std & min & 25% & 50% & 75% & max \\
|
||||||
|
\midrule
|
||||||
|
paidwork & 2510.000000 & 1.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
lwage & 2510.000000 & 0.780391 & 0.737255 & -3.336058 & 0.299333 & 0.766255 & 1.241741 & 4.208274 \\
|
||||||
|
men & 2510.000000 & 0.624303 & 0.484398 & 0.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
malay & 2510.000000 & 0.435458 & 0.495919 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
chinese & 2510.000000 & 0.272510 & 0.445334 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
indian & 2510.000000 & 0.292032 & 0.454793 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
age & 2510.000000 & 33.025101 & 10.699703 & 15.000000 & 25.000000 & 31.000000 & 39.000000 & 65.000000 \\
|
||||||
|
agesq & 2510.000000 & 12.050953 & 7.977792 & 2.250000 & 6.250000 & 9.610000 & 15.210000 & 42.250000 \\
|
||||||
|
gexpr & 2510.000000 & 18.933865 & 12.482897 & 0.000000 & 9.000000 & 16.000000 & 26.000000 & 59.000000 \\
|
||||||
|
gexprsq & 2510.000000 & 5.142518 & 6.277625 & 0.000000 & 0.810000 & 2.560000 & 6.760000 & 34.810001 \\
|
||||||
|
yprim & 2510.000000 & 5.277291 & 1.711691 & 0.000000 & 6.000000 & 6.000000 & 6.000000 & 6.000000 \\
|
||||||
|
ysec & 2510.000000 & 2.813944 & 2.704680 & 0.000000 & 0.000000 & 3.000000 & 5.000000 & 14.000000 \\
|
||||||
|
school & 2510.000000 & 8.091235 & 3.783405 & 0.000000 & 6.000000 & 9.000000 & 11.000000 & 20.000000 \\
|
||||||
|
wage & 2510.000000 & 2.903078 & 2.886990 & 0.035577 & 1.348958 & 2.151692 & 3.461635 & 67.240379 \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
20
report/table_2_3.tex
Normal file
20
report/table_2_3.tex
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
\begin{tabular}{lrrrrrrrr}
|
||||||
|
\toprule
|
||||||
|
& count & mean & std & min & 25\% & 50\% & 75\% & max \\
|
||||||
|
\midrule
|
||||||
|
paidwork & 2510.000000 & 1.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
lwage & 2510.000000 & 0.780391 & 0.737255 & -3.336058 & 0.299333 & 0.766255 & 1.241741 & 4.208274 \\
|
||||||
|
men & 2510.000000 & 0.624303 & 0.484398 & 0.000000 & 0.000000 & 1.000000 & 1.000000 & 1.000000 \\
|
||||||
|
malay & 2510.000000 & 0.435458 & 0.495919 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
chinese & 2510.000000 & 0.272510 & 0.445334 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
indian & 2510.000000 & 0.292032 & 0.454793 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 1.000000 \\
|
||||||
|
age & 2510.000000 & 33.025101 & 10.699703 & 15.000000 & 25.000000 & 31.000000 & 39.000000 & 65.000000 \\
|
||||||
|
agesq & 2510.000000 & 12.050953 & 7.977792 & 2.250000 & 6.250000 & 9.610000 & 15.210000 & 42.250000 \\
|
||||||
|
gexpr & 2510.000000 & 18.933865 & 12.482897 & 0.000000 & 9.000000 & 16.000000 & 26.000000 & 59.000000 \\
|
||||||
|
gexprsq & 2510.000000 & 5.142518 & 6.277625 & 0.000000 & 0.810000 & 2.560000 & 6.760000 & 34.810001 \\
|
||||||
|
yprim & 2510.000000 & 5.277291 & 1.711691 & 0.000000 & 6.000000 & 6.000000 & 6.000000 & 6.000000 \\
|
||||||
|
ysec & 2510.000000 & 2.813944 & 2.704680 & 0.000000 & 0.000000 & 3.000000 & 5.000000 & 14.000000 \\
|
||||||
|
school & 2510.000000 & 8.091235 & 3.783405 & 0.000000 & 6.000000 & 9.000000 & 11.000000 & 20.000000 \\
|
||||||
|
wage & 2510.000000 & 2.903078 & 2.886990 & 0.035577 & 1.348958 & 2.151692 & 3.461635 & 67.240379 \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
|
@ -99,6 +99,8 @@ data = data[data['paidwork']==1]
|
||||||
data['school'] = data['yprim']+data['ysec']
|
data['school'] = data['yprim']+data['ysec']
|
||||||
data['wage'] = np.exp(data['lwage'])
|
data['wage'] = np.exp(data['lwage'])
|
||||||
data_summary = data.describe()
|
data_summary = data.describe()
|
||||||
|
new_names = ['count', 'mean', 'std', 'min', '25pct', '50pct', '75pct', 'max']
|
||||||
|
data_summary.index = new_names
|
||||||
|
|
||||||
# print to screen
|
# print to screen
|
||||||
print(data_summary.T)
|
print(data_summary.T)
|
||||||
|
@ -113,6 +115,12 @@ data_frame_to_latex_table_file(report_dir + 'summary_stats.tex',
|
||||||
|
|
||||||
print_question('Question 2.2: Plot histogram wage / lwage')
|
print_question('Question 2.2: Plot histogram wage / lwage')
|
||||||
|
|
||||||
|
plt.hist(data['wage'],bins=21)
|
||||||
|
plt.savefig(figure_dir + "question_2_2_wage.png")
|
||||||
|
plt.show()
|
||||||
|
plt.hist(data['lwage'],bins=21)
|
||||||
|
plt.savefig(figure_dir + "question_2_2_lwage.png")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Question 2.3
|
# Question 2.3
|
||||||
|
@ -120,6 +128,10 @@ print_question('Question 2.2: Plot histogram wage / lwage')
|
||||||
|
|
||||||
print_question('Question 2.3: Sample correlations')
|
print_question('Question 2.3: Sample correlations')
|
||||||
|
|
||||||
|
df = data [['wage', 'age', 'school', 'men', 'malay', 'chinese', 'indian']]
|
||||||
|
corr = df.corr()
|
||||||
|
data_frame_to_latex_table_file(report_dir + 'table_2_3.tex',
|
||||||
|
corr)
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Question 2.4
|
# Question 2.4
|
||||||
|
|
Loading…
Reference in a new issue