simulation code done

This commit is contained in:
Hendrik Tillemans 2024-12-30 00:35:42 +01:00
parent 1de99aa3f9
commit 3ff77c880c
22 changed files with 1638 additions and 27 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
figures/vub.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 KiB

303
report/Assignment.tex Normal file
View file

@ -0,0 +1,303 @@
\documentclass[12pt]{article}
\usepackage{natbib}
\usepackage{url}
\usepackage[utf8x]{inputenc}
\usepackage{mathtools}%
\usepackage{graphicx}
\usepackage{parskip}
\usepackage{xcolor}%
\usepackage{fancyhdr}
\usepackage{vmargin}
\usepackage{booktabs}%
\usepackage{sectsty}% for coloring sections
\setmarginsrb{3 cm}{2.5 cm}{3 cm}{2.5 cm}{1 cm}{1.5 cm}{1 cm}{1.5 cm}
% define your own custom colors
% If you want to change the colors you would need to update the RGB code in the
% last brackets. Better not change the name of the color as it is used elsewhere
\definecolor{report_main}{HTML}{200045}
\definecolor{report_second}{HTML}{F39912}
\definecolor{report_third}{HTML}{8B0010}
\title{\color{report_main}{Assignment Econometrics 2024}} % Title
\author{Hendrik Marcel W Tillemans} % Author
\date{\today} % Date
\makeatletter
\let\thetitle\@title
\let\theauthor\@author
\let\thedate\@date
\makeatother
\pagestyle{fancy}
\fancyhf{}
\rhead{\theauthor} % header on the right
\lhead{\thetitle} % header on the left
\cfoot{\thepage} % footer in the center
\sectionfont{\color{report_main}}
\subsectionfont{\color{report_third}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is where the actual document starts
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This section details the group information
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{titlepage}
\centering
\vspace*{0.5 cm}
\includegraphics[scale = 0.95]{../figures/vub.png}\\[1.0 cm] % University Logo
\textsc{\LARGE \newline\newline Free University Brussels}\\[2.0 cm] % University Name
\textsc{\Large \color{report_main}{Class: Econometrics}}\\[0.5 cm] % Course Code
\rule{\linewidth}{0.2 mm} \\[0.4 cm]
{ \huge \bfseries \thetitle}\\
\rule{\linewidth}{0.2 mm} \\[1.5 cm]
\begin{minipage}{0.5\textwidth}
\begin{flushleft} \large
\emph{Professor:}\\
Jeroen Kerkhof\\
Faculty of Economic Sciences\\
\end{flushleft}
\end{minipage}~
\begin{minipage}{0.4\textwidth}
\begin{flushright} \large
\emph{Group:} \\
Hendrik Marcel W Tillemans\\
\end{flushright}
\end{minipage}\\[2 cm]
% takes the current date
\thedate
\end{titlepage}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This details the inclusion (or not) of the table of contents
% and list of figures and tables.
% You can add/remove page breaks as you seem fit.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\tableofcontents
\pagebreak
\listoffigures
\listoftables
\pagebreak
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is the start of the actual document content
% You can just write text in here as you would in any other word processor.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Question 1}
This my answer to question 1.
\subsection{Example}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\pagebreak
creates a page break.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Simulation Study}
\subsection{1.2: Linear Fit on Generated Data}
\begin{table}[h]
\input{table_1_2}
\caption{Linear Fit on Generated Data}
\label{tab::table_1_2}
\end{table}
\subsection{1.3: Linear Fit with 1 Variable}
\begin{table}[h]
\input{table_1_3}
\caption{Linear Fit with 1 Variable}
\label{tab::table_1_3}
\end{table}
\subsection{1.4: New Linear Fit on Generated Data}
\begin{table}[h]
\input{table_1_4}
\caption{New Linear Fit on Generated Data}
\label{tab::table_1_4}
\end{table}
\subsection{1.5: New Linear Fit with 1 Variable}
\begin{table}[h]
\input{table_1_5}
\caption{Linear Fit with 1 Variable}
\label{tab::table_1_5}
\end{table}
\subsection{1.6: Generate Data with Small Variance on x1}
\begin{table}[h]
\input{table_1_6}
\caption{Generate Data with Small Variance on x1}
\label{tab::table_1_6}
\end{table}
\section{examples}
Some greek letters:
$\alpha$
$\beta$
$\gamma$
$\theta$
$\varepsilon$
$\pi$
$\lambda$
$\tau$
$x=x+27$
x=x+27
$A \Longrightarrow B$
$\underbrace{abs}_{test}$
sub and superscript
$\beta_0$
$\sum_{i=1}^{n} i$
In an equation:
\begin{equation}
\sum_{j=1}{n} j^2 \beta
\end{equation}
Equation without number
\begin{equation*}
A \Rightarrow B
\end{equation*}
\section{Empirical Investigation}
Here is some example code to create tables and graphs from the
Python script. In order for this to work you would first need
to run the script non\_linear\_models\_example\_report.py. Running that
file (using the recommended file structure) creates some figures
in the figures folder and some tables in .tex files in the report folder.
\subsection{Question 3}
For instance, here the file df\_table.tex is used print the actual numbers
in the table.
\begin{table}[h]
\input{df_table}
\caption{This tables has the estimates for ...}
\label{tab::estimation_results}
\end{table}
\subsection{Question 4: Some graphs}
\begin{figure}
\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_y}
\caption{This is a Figure coming straight from Python.}
\label{fig::example_data}
\end{figure}
In Figure \ref{fig::example_data} we see the data.
\begin{figure}
\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_linear}
\caption{This is a linear fit on a quadratic model.}
\label{fig::example_quadratic_linear}
\end{figure}
In Figure \ref{fig::example_quadratic_linear} we see a linear fit.
\begin{figure}
\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_quadratic}
\caption{This is quadratic fit on a quadratic model.}
\label{fig::example_quadratic_quadratic}
\end{figure}
In Figure \ref{fig::example_quadratic_quadratic} we see that
\subsection{Question 5}
Equation example with matrices:
\begin{equation}\label{eq::wald_test}
H_0: \beta_1 = - \beta_2; \beta_3=0; \beta_2 + 2\beta_4 = 2
\quad H_1: \neg H_0
\end{equation}
can be written in matrix form as:
\begin{equation}\label{eq::matrix_form}
\begin{bmatrix}
1 & 1 & 0 & 0 \\
0 & 0 & 1 & 0 \\
0 & 1 & 0 & 2
\end{bmatrix}
\begin{bmatrix}
\beta_1 \\
\beta_2 \\
\beta_3 \\
\beta_4
\end{bmatrix} =
\begin{bmatrix}
0 \\
0 \\
2
\end{bmatrix}
\end{equation}
In equation \eqref{eq::wald_test} we see that... and in equation \eqref{eq::matrix_form} we see that
\subsection{Question 6}
$\beta$
\begin{table}
\input{summary}
\caption{This tables has the estimates summary}
\label{tab::estimation_results_summary}
\end{table}
Table \ref{tab::estimation_results_summary} has the full summary.
\begin{table}
\input{results_coef}
\caption{This tables has the estimates summary}
\label{tab::estimation_results_coef}
\end{table}
Table \ref{tab::estimation_results_coef} has the only the coefficient
results.
\end{document}

9
report/df_table.tex Normal file
View file

@ -0,0 +1,9 @@
\begin{tabular}{lrrrrr}
\toprule
& coeff & tvals & pvals & conf\_lower & conf\_higher \\
\midrule
0 & 17.880000 & 1.730000 & 0.090000 & -2.800000 & 38.560000 \\
1 & 0.390000 & 0.560000 & 0.580000 & -0.990000 & 1.770000 \\
2 & -0.490000 & -18.290000 & 0.000000 & -0.550000 & -0.440000 \\
\bottomrule
\end{tabular}

View file

@ -0,0 +1,263 @@
\documentclass[12pt]{article}
\usepackage{natbib}
\usepackage{url}
\usepackage[utf8x]{inputenc}
\usepackage{mathtools}%
\usepackage{graphicx}
\usepackage{parskip}
\usepackage{xcolor}%
\usepackage{fancyhdr}
\usepackage{vmargin}
\usepackage{booktabs}%
\usepackage{sectsty}% for coloring sections
\setmarginsrb{3 cm}{2.5 cm}{3 cm}{2.5 cm}{1 cm}{1.5 cm}{1 cm}{1.5 cm}
% define your own custom colors
% If you want to change the colors you would need to update the RGB code in the
% last brackets. Better not change the name of the color as it is used elsewhere
\definecolor{report_main}{HTML}{200045}
\definecolor{report_second}{HTML}{F39912}
\definecolor{report_third}{HTML}{8B0010}
\title{\color{report_main}{Assignment Econometrics 2024}} % Title
\author{Hendrik Marcel W Tillemans} % Author
\date{\today} % Date
\makeatletter
\let\thetitle\@title
\let\theauthor\@author
\let\thedate\@date
\makeatother
\pagestyle{fancy}
\fancyhf{}
\rhead{\theauthor} % header on the right
\lhead{\thetitle} % header on the left
\cfoot{\thepage} % footer in the center
\sectionfont{\color{report_main}}
\subsectionfont{\color{report_third}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is where the actual document starts
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This section details the group information
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{titlepage}
\centering
\vspace*{0.5 cm}
\includegraphics[scale = 0.95]{../figures/vub.png}\\[1.0 cm] % University Logo
\textsc{\LARGE \newline\newline Free University Brussels}\\[2.0 cm] % University Name
\textsc{\Large \color{report_main}{Class: Econometrics}}\\[0.5 cm] % Course Code
\rule{\linewidth}{0.2 mm} \\[0.4 cm]
{ \huge \bfseries \thetitle}\\
\rule{\linewidth}{0.2 mm} \\[1.5 cm]
\begin{minipage}{0.5\textwidth}
\begin{flushleft} \large
\emph{Professor:}\\
Jeroen Kerkhof\\
Faculty of Economic Sciences\\
\end{flushleft}
\end{minipage}~
\begin{minipage}{0.4\textwidth}
\begin{flushright} \large
\emph{Group:} \\
Hendrik Marcel W Tillemans
\end{flushright}
\end{minipage}\\[2 cm]
% takes the current date
\thedate
\end{titlepage}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This details the inclusion (or not) of the table of contents
% and list of figures and tables.
% You can add/remove page breaks as you seem fit.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\tableofcontents
\pagebreak
\listoffigures
\listoftables
\pagebreak
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is the start of the actual document content
% You can just write text in here as you would in any other word processor.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Question 1}
This my answer to question 1.
\subsection{Example}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\pagebreak
creates a page break.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Simlulation Study}
Here
Some greek letters:
$\alpha$
$\beta$
$\gamma$
$\theta$
$\varepsilon$
$\pi$
$\lambda$
$\tau$
$x=x+27$
x=x+27
$A \Longrightarrow B$
$\underbrace{abs}_{test}$
sub and superscript
$\beta_0$
$\sum_{i=1}^{n} i$
In an equation:
\begin{equation}
\sum_{j=1}{n} j^2 \beta
\end{equation}
Equation without number
\begin{equation*}
A \Rightarrow B
\end{equation*}
\section{Empirical Investigation}
Here is some example code to create tables and graphs from the
Python script. In order for this to work you would first need
to run the script non\_linear\_models\_example\_report.py. Running that
file (using the recommended file structure) creates some figures
in the figures folder and some tables in .tex files in the report folder.
\subsection{Question 3}
For instance, here the file df\_table.tex is used print the actual numbers
in the table.
\begin{table}[h]
\input{df_table}
\caption{This tables has the estimates for ...}
\label{tab::estimation_results}
\end{table}
\subsection{Question 4: Some graphs}
\begin{figure}
\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_y}
\caption{This is a Figure coming straight from Python.}
\label{fig::example_data}
\end{figure}
In Figure \ref{fig::example_data} we see the data.
\begin{figure}
\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_linear}
\caption{This is a linear fit on a quadratic model.}
\label{fig::example_quadratic_linear}
\end{figure}
In Figure \ref{fig::example_quadratic_linear} we see a linear fit.
\begin{figure}
\includegraphics[width=0.6\paperwidth]{../figures/quadratic_model_quadratic}
\caption{This is quadratic fit on a quadratic model.}
\label{fig::example_quadratic_quadratic}
\end{figure}
In Figure \ref{fig::example_quadratic_quadratic} we see that
\subsection{Question 5}
Equation example with matrices:
\begin{equation}\label{eq::wald_test}
H_0: \beta_1 = - \beta_2; \beta_3=0; \beta_2 + 2\beta_4 = 2
\quad H_1: \neg H_0
\end{equation}
can be written in matrix form as:
\begin{equation}\label{eq::matrix_form}
\begin{bmatrix}
1 & 1 & 0 & 0 \\
0 & 0 & 1 & 0 \\
0 & 1 & 0 & 2
\end{bmatrix}
\begin{bmatrix}
\beta_1 \\
\beta_2 \\
\beta_3 \\
\beta_4
\end{bmatrix} =
\begin{bmatrix}
0 \\
0 \\
2
\end{bmatrix}
\end{equation}
In equation \eqref{eq::wald_test} we see that... and in equation \eqref{eq::matrix_form} we see that
\subsection{Question 6}
$\beta$
\begin{table}
\input{summary}
\caption{This tables has the estimates summary}
\label{tab::estimation_results_summary}
\end{table}
Table \ref{tab::estimation_results_summary} has the full summary.
\begin{table}
\input{results_coef}
\caption{This tables has the estimates summary}
\label{tab::estimation_results_coef}
\end{table}
Table \ref{tab::estimation_results_coef} has the only the coefficient
results.
\end{document}

9
report/results_coef.tex Normal file
View file

@ -0,0 +1,9 @@
\begin{tabular}{lrrrrrr}
\toprule
& Coef. & Std.Err. & t & P>|t| & [0.025 & 0.975] \\
\midrule
const & 17.880598 & 10.325727 & 1.731655 & 0.088744 & -2.796314 & 38.557509 \\
x1 & 0.387451 & 0.688193 & 0.562997 & 0.575645 & -0.990632 & 1.765533 \\
x2 & -0.494529 & 0.027036 & -18.291331 & 0.000000 & -0.548668 & -0.440390 \\
\bottomrule
\end{tabular}

34
report/summary.tex Normal file
View file

@ -0,0 +1,34 @@
\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:} & y & \textbf{ R-squared: } & 0.936 \\
\textbf{Model:} & OLS & \textbf{ Adj. R-squared: } & 0.934 \\
\textbf{Method:} & Least Squares & \textbf{ F-statistic: } & 416.4 \\
\textbf{Date:} & Sun, 29 Dec 2024 & \textbf{ Prob (F-statistic):} & 9.69e-35 \\
\textbf{Time:} & 21:32:34 & \textbf{ Log-Likelihood: } & -327.30 \\
\textbf{No. Observations:} & 60 & \textbf{ AIC: } & 660.6 \\
\textbf{Df Residuals:} & 57 & \textbf{ BIC: } & 666.9 \\
\textbf{Df Model:} & 2 & \textbf{ } & \\
\textbf{Covariance Type:} & nonrobust & \textbf{ } & \\
\bottomrule
\end{tabular}
\begin{tabular}{lcccccc}
& \textbf{coef} & \textbf{std err} & \textbf{t} & \textbf{P$> |$t$|$} & \textbf{[0.025} & \textbf{0.975]} \\
\midrule
\textbf{const} & 17.8806 & 10.326 & 1.732 & 0.089 & -2.796 & 38.558 \\
\textbf{x1} & 0.3875 & 0.688 & 0.563 & 0.576 & -0.991 & 1.766 \\
\textbf{x2} & -0.4945 & 0.027 & -18.291 & 0.000 & -0.549 & -0.440 \\
\bottomrule
\end{tabular}
\begin{tabular}{lclc}
\textbf{Omnibus:} & 0.223 & \textbf{ Durbin-Watson: } & 2.169 \\
\textbf{Prob(Omnibus):} & 0.894 & \textbf{ Jarque-Bera (JB): } & 0.397 \\
\textbf{Skew:} & -0.106 & \textbf{ Prob(JB): } & 0.820 \\
\textbf{Kurtosis:} & 2.662 & \textbf{ Cond. No. } & 837. \\
\bottomrule
\end{tabular}
%\caption{OLS Regression Results}
\end{center}
Notes: \newline
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

9
report/table_1_2.tex Normal file
View file

@ -0,0 +1,9 @@
\begin{tabular}{lrrrr}
\toprule
& True & Estimated & Std Err & t-Stat \\
\midrule
0 & 3.000000 & 2.962797 & 0.117377 & 25.241713 \\
1 & -4.000000 & -4.018728 & 0.015964 & -251.729125 \\
2 & 2.000000 & 1.985917 & 0.020033 & 99.132074 \\
\bottomrule
\end{tabular}

8
report/table_1_3.tex Normal file
View file

@ -0,0 +1,8 @@
\begin{tabular}{lrrrr}
\toprule
& True & Estimated & Std Err & t-Stat \\
\midrule
0 & 3.000000 & 7.040489 & 0.362044 & 19.446476 \\
1 & -4.000000 & -3.986195 & 0.052565 & -75.833855 \\
\bottomrule
\end{tabular}

9
report/table_1_4.tex Normal file
View file

@ -0,0 +1,9 @@
\begin{tabular}{lrrrr}
\toprule
& True & Estimated & Std Err & t-Stat \\
\midrule
0 & 3.000000 & 3.062084 & 0.164930 & 18.565919 \\
1 & -4.000000 & -4.006083 & 0.020178 & -198.535392 \\
2 & 2.000000 & 1.974439 & 0.024519 & 80.526341 \\
\bottomrule
\end{tabular}

8
report/table_1_5.tex Normal file
View file

@ -0,0 +1,8 @@
\begin{tabular}{lrrrr}
\toprule
& True & Estimated & Std Err & t-Stat \\
\midrule
0 & 3.000000 & 12.965134 & 0.300905 & 43.087097 \\
1 & -4.000000 & -3.011484 & 0.043688 & -68.931429 \\
\bottomrule
\end{tabular}

9
report/table_1_6.tex Normal file
View file

@ -0,0 +1,9 @@
\begin{tabular}{lrrrr}
\toprule
& True & Estimated & Std Err & t-Stat \\
\midrule
0 & 3.000000 & 3.107269 & 0.320425 & 9.697336 \\
1 & -4.000000 & -4.067205 & 0.100288 & -40.555170 \\
2 & 2.000000 & 1.985469 & 0.020038 & 99.084266 \\
\bottomrule
\end{tabular}

View file

@ -0,0 +1,207 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Empirical assignment 2024 - 2025
STARTER FILE
"""
import os
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import pandas as pd
from helper import print_question, data_frame_to_latex_table_file
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Start of Script for Empirical assignment Econometrics
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Set the folders for output of graphs and tables
# -----------------------------------------------------------------------------
# for the figures
data_dir = '../data/'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
# for the figures
figure_dir = '../figures/'
if not os.path.exists(figure_dir):
os.makedirs(figure_dir)
# for the latex document
report_dir = '../report/'
if not os.path.exists(report_dir):
os.makedirs(report_dir)
# -----------------------------------------------------------------------------
# Here we set the seed for our group to your group number
# -----------------------------------------------------------------------------
# first birthday
bd_1 = 3112
# second birthday
bd_2 = 3112
group_seed = bd_1 * bd_2
# set the seed
np.random.seed(group_seed)
# -----------------------------------------------------------------------------
# set the random number generator and seed
# -----------------------------------------------------------------------------
# set the seed and the random number generator for reproducible results
rng = np.random.default_rng(group_seed)
# setting for output printing
print_line_length = 90
print_line_start = 5
# number of x points
num_points = 60
# -----------------------------------------------------------------------------
# Load data 2.1
# -----------------------------------------------------------------------------
data_full = pd.read_stata(data_dir + 'assignment2025.dta')
num_obs = int(0.75 * data_full.shape[0])
# select 75% observations randomly ( the rng uses your seed )
observations = rng.choice (len(data_full), num_obs , replace =False)
# select the observationsfor your group
data = data_full.iloc[observations , :].copy()
#data = data_full
# -----------------------------------------------------------------------------
# Descriptive statistics 2.1
# -----------------------------------------------------------------------------
print_question('Question 2.1: Descriptive Statistics')
# compute the summary statistics
# data_summary = TODO
# print to screen
# print(data_summary.T) [uncomment]
# export the summary statistics to a file
# data_frame_to_latex_table_file(report_dir + 'summmary_stats.tex',
# data_summary.T) [uncomment]
# -----------------------------------------------------------------------------
# Question 2.2
# -----------------------------------------------------------------------------
print_question('Question 2.2: Plot histogram wage / lwage')
# -----------------------------------------------------------------------------
# Question 2.3
# -----------------------------------------------------------------------------
print_question('Question 2.3: Sample correlations')
# -----------------------------------------------------------------------------
# Question 2.4
# -----------------------------------------------------------------------------
print_question('Question 2.4: Estimate lwage model')
# explanatory variables for question 2.4
# x_vars_24 = data[['smcity', 'AA']] # TODO
# add a constant
# X_24 = sm.add_constant(x_vars_24) [uncomment]
# set-up model
# model_24 = sm.OLS(,) #TODO
# estimate the model
# results_24 = model_24. #TODO
# print the OLS output
# print(results_24.summary()) [uncomment]
# export the coefficients part of the summary to a table
# data_frame_to_latex_table_file(report_dir + 'results_24.tex',
# results_24.summary2().tables[1])
# -----------------------------------------------------------------------------
# Question 2.5
# -----------------------------------------------------------------------------
print_question('Question 2.5: Adding school')
# -----------------------------------------------------------------------------
# Question 2.6
# -----------------------------------------------------------------------------
print_question('Question 2.6: Adding age')
# -----------------------------------------------------------------------------
# Question 2.7
# -----------------------------------------------------------------------------
print_question('Question 2.7: Create the woman variable')
# -----------------------------------------------------------------------------
# Question 2.8
# -----------------------------------------------------------------------------
print_question('Question 2.8: lwage model')
# -----------------------------------------------------------------------------
# Question 2.9
# -----------------------------------------------------------------------------
print_question('Question 2.9: Test ethnicity')
# -----------------------------------------------------------------------------
# Question 2.10
# -----------------------------------------------------------------------------
print_question('Question 2.10: Estimate models separately')
# -----------------------------------------------------------------------------
# Question 2.11
# -----------------------------------------------------------------------------
print_question('Question 2.11: Predict lwage')
# -----------------------------------------------------------------------------
# Question 2.12
# -----------------------------------------------------------------------------
print_question('Question 2.12: Estimate model with squared terms')
# -----------------------------------------------------------------------------
# Question 2.13
# -----------------------------------------------------------------------------
print_question('Question 2.13: Squared terms')
# -----------------------------------------------------------------------------
# Question 2.14
# -----------------------------------------------------------------------------
print_question('Question 2.14: Add interaction terms')
# -----------------------------------------------------------------------------
# Question 2.15
# -----------------------------------------------------------------------------
print_question('Question 2.15: Find your favourite model')

View file

@ -0,0 +1,93 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Simulation assignment 2024 - 2025
STARTER FILE
"""
import os
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
#import varplot as vpl
#plt.style.use("finthabo")
# this imports all functions from the helper file
from helper import data_frame_to_latex_table_file, print_question
#colors = vpl.color_list()
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Start of Script for Simulation assignment Econometrics
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Set the folders for output of graphs and tables
# -----------------------------------------------------------------------------
# for the figures
FIGURE_DIR = "../figures/"
if not os.path.exists(FIGURE_DIR):
os.makedirs(FIGURE_DIR)
# for the latex document
REPORT_DIR = "../report/"
if not os.path.exists(REPORT_DIR):
os.makedirs(REPORT_DIR)
# -----------------------------------------------------------------------------
# Here we set the seed for our group to your group number
# -----------------------------------------------------------------------------
# first birthday
bd_1 = 3112
# second birthday
bd_2 = 3112
group_seed = bd_1 * bd_2
# set the seed
np.random.seed(group_seed)
# -----------------------------------------------------------------------------
# set the random number generator and seed
# -----------------------------------------------------------------------------
# setting for output printing
print_line_length = 90
print_line_start = 5
# the number of data sets that we will simulate
num_obs = 2**10
# set the seed and the random number generator for reproducible results
# this ensures that every time you run this code you get exactly the same random numbers.
# changing the seed would change the random numbers.
rng = np.random.default_rng(group_seed)
# the true value of the parameters.
# Known in your role as creator, unknown to the modeller.
beta = np.array([3.0, -4.0, 2.0])
# standard deviation of the error terms
u = rng.normal(0, 3, (num_obs,))
x1 = rng.normal(3, 6, (num_obs,))
x2 = rng.normal(2, 5, (num_obs,))
# y = TODO
# X = TODO
# m = TODO
# results = #TODO

207
scripts/empirical.py Normal file
View file

@ -0,0 +1,207 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Empirical assignment 2024 - 2025
STARTER FILE
"""
import os
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import pandas as pd
from helper import print_question, data_frame_to_latex_table_file
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Start of Script for Empirical assignment Econometrics
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Set the folders for output of graphs and tables
# -----------------------------------------------------------------------------
# for the figures
data_dir = '../data/'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
# for the figures
figure_dir = '../figures/'
if not os.path.exists(figure_dir):
os.makedirs(figure_dir)
# for the latex document
report_dir = '../report/'
if not os.path.exists(report_dir):
os.makedirs(report_dir)
# -----------------------------------------------------------------------------
# Here we set the seed for our group to your group number
# -----------------------------------------------------------------------------
# first birthday
bd_1 = 3112
# second birthday
bd_2 = 3112
group_seed = bd_1 * bd_2
# set the seed
np.random.seed(group_seed)
# -----------------------------------------------------------------------------
# set the random number generator and seed
# -----------------------------------------------------------------------------
# set the seed and the random number generator for reproducible results
rng = np.random.default_rng(group_seed)
# setting for output printing
print_line_length = 90
print_line_start = 5
# number of x points
num_points = 60
# -----------------------------------------------------------------------------
# Load data 2.1
# -----------------------------------------------------------------------------
data_full = pd.read_stata(data_dir + 'assignment2025.dta')
num_obs = int(0.75 * data_full.shape[0])
# select 75% observations randomly ( the rng uses your seed )
observations = rng.choice (len(data_full), num_obs , replace =False)
# select the observationsfor your group
data = data_full.iloc[observations , :].copy()
#data = data_full
# -----------------------------------------------------------------------------
# Descriptive statistics 2.1
# -----------------------------------------------------------------------------
print_question('Question 2.1: Descriptive Statistics')
# compute the summary statistics
# data_summary = TODO
# print to screen
# print(data_summary.T) [uncomment]
# export the summary statistics to a file
# data_frame_to_latex_table_file(report_dir + 'summmary_stats.tex',
# data_summary.T) [uncomment]
# -----------------------------------------------------------------------------
# Question 2.2
# -----------------------------------------------------------------------------
print_question('Question 2.2: Plot histogram wage / lwage')
# -----------------------------------------------------------------------------
# Question 2.3
# -----------------------------------------------------------------------------
print_question('Question 2.3: Sample correlations')
# -----------------------------------------------------------------------------
# Question 2.4
# -----------------------------------------------------------------------------
print_question('Question 2.4: Estimate lwage model')
# explanatory variables for question 2.4
# x_vars_24 = data[['smcity', 'AA']] # TODO
# add a constant
# X_24 = sm.add_constant(x_vars_24) [uncomment]
# set-up model
# model_24 = sm.OLS(,) #TODO
# estimate the model
# results_24 = model_24. #TODO
# print the OLS output
# print(results_24.summary()) [uncomment]
# export the coefficients part of the summary to a table
# data_frame_to_latex_table_file(report_dir + 'results_24.tex',
# results_24.summary2().tables[1])
# -----------------------------------------------------------------------------
# Question 2.5
# -----------------------------------------------------------------------------
print_question('Question 2.5: Adding school')
# -----------------------------------------------------------------------------
# Question 2.6
# -----------------------------------------------------------------------------
print_question('Question 2.6: Adding age')
# -----------------------------------------------------------------------------
# Question 2.7
# -----------------------------------------------------------------------------
print_question('Question 2.7: Create the woman variable')
# -----------------------------------------------------------------------------
# Question 2.8
# -----------------------------------------------------------------------------
print_question('Question 2.8: lwage model')
# -----------------------------------------------------------------------------
# Question 2.9
# -----------------------------------------------------------------------------
print_question('Question 2.9: Test ethnicity')
# -----------------------------------------------------------------------------
# Question 2.10
# -----------------------------------------------------------------------------
print_question('Question 2.10: Estimate models separately')
# -----------------------------------------------------------------------------
# Question 2.11
# -----------------------------------------------------------------------------
print_question('Question 2.11: Predict lwage')
# -----------------------------------------------------------------------------
# Question 2.12
# -----------------------------------------------------------------------------
print_question('Question 2.12: Estimate model with squared terms')
# -----------------------------------------------------------------------------
# Question 2.13
# -----------------------------------------------------------------------------
print_question('Question 2.13: Squared terms')
# -----------------------------------------------------------------------------
# Question 2.14
# -----------------------------------------------------------------------------
print_question('Question 2.14: Add interaction terms')
# -----------------------------------------------------------------------------
# Question 2.15
# -----------------------------------------------------------------------------
print_question('Question 2.15: Find your favourite model')

84
scripts/helper.py Normal file
View file

@ -0,0 +1,84 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Helper functions for assignment
"""
import os
import numpy as np
import pandas as pd
# -----------------------------------------------------------------------------
# ALL FUNCTIONS ARE ON TOP
#
# THE SCRIPT IS BELOW THE FUNCTIONS
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Helper function to transform results summary into a dataFrame
# -----------------------------------------------------------------------------
def results_summary_to_dataframe(results, rounding=2):
'''take the result of an statsmodel results table
and transforms it into a dataframe'''
# get the values from results
# if you want, you can of course generalize this.
# e.g. if you don't have normal error terms
# you could change the pvalues and confidence bounds
# see exercise session 9?!
pvals = results.pvalues
tvals = results.tvalues
coeff = results.params
conf_lower = results.conf_int()[:, 0]
conf_higher = results.conf_int()[:, 1]
# create a pandas DataFrame from a dictionary
results_df = pd.DataFrame({"pvals": np.round(pvals, rounding),
"tvals": np.round(tvals, rounding),
"coeff": np.round(coeff, rounding),
"conf_lower": np.round(conf_lower, rounding),
"conf_higher": np.round(conf_higher, rounding)
})
# This is just to show you how to re-order if needed
# Typically you should put them in the order you like straigh away
#Reordering...
results_df = results_df[["coeff", "tvals", "pvals", "conf_lower",
"conf_higher"]]
return results_df
# -----------------------------------------------------------------------------
def data_frame_to_latex_table_file(file_name, df):
"""takes a DataFrame and creates file_name.tex with LaTeX table data. """
print(df.columns)
# create and open file
text_file = open(file_name, "w")
# data frame to LaTeX
df_latex = df.to_latex()
# Consider extensions (see later in class)
# write latex string to file
text_file.write(df_latex)
# close file
text_file.close()
# -----------------------------------------------------------------------------
def print_question(statement, print_line_start=5, print_line_length=90):
"""
Print question description.
Args:
statement (string): Question description.
print_line_start (int): column where statement starts
print_line_length (int): total columns.
Returns:
None.
"""
print(print_line_start * '#' + ' ' + statement + ' ' +
(print_line_length - len(statement) - print_line_start - 2) * '#')

View file

@ -0,0 +1,198 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Examples of non-linear models
"""
import os
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
# -----------------------------------------------------------------------------
# ALL FUNCTIONS ARE ON TOP
#
# THE SCRIPT IS BELOW THE FUNCTIONS
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Helper function to transform results summary into a dataFrame
# -----------------------------------------------------------------------------
def results_summary_to_dataframe(results, rounding=2):
"""take the result of an statsmodel results table
and transforms it into a dataframe"""
# get the values from results
# if you want, you can of course generalize this.
# e.g. if you don't have normal error terms
# you could change the pvalues and confidence bounds
# see exercise session 9?!
pvals = results.pvalues
tvals = results.tvalues
coeff = results.params
conf_lower = results.conf_int()[:, 0]
conf_higher = results.conf_int()[:, 1]
# create a pandas DataFrame from a dictionary
results_df = pd.DataFrame(
{
"pvals": np.round(pvals, rounding),
"tvals": np.round(tvals, rounding),
"coeff": np.round(coeff, rounding),
"conf\_lower": np.round(conf_lower, rounding),
"conf\_higher": np.round(conf_higher, rounding),
}
)
# This is just to show you how to re-order if needed
# Typically you should put them in the order you like straigh away
# Reordering...
results_df = results_df[["coeff", "tvals", "pvals", "conf\_lower", "conf\_higher"]]
return results_df
# -----------------------------------------------------------------------------
def data_frame_to_latex_table_file(file_name, df):
"""takes a DataFrame and creates file_name.tex with LaTeX table data."""
# create and open file
text_file = open(file_name, "w")
# data frame to LaTeX
df_latex = df.to_latex()
# Consider extensions (see later in class)
# write latex string to file
text_file.write(df_latex)
# close file
text_file.close()
# -----------------------------------------------------------------------------
# Set the folders for output of graphs and tables
# -----------------------------------------------------------------------------
# for the figures
FIGURE_DIR = "../figures/"
if not os.path.exists(FIGURE_DIR):
os.makedirs(FIGURE_DIR)
# for the latex document
REPORT_DIR = "../report/"
if not os.path.exists(REPORT_DIR):
os.makedirs(REPORT_DIR)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Start of Script
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# set the random number generator and seed
# -----------------------------------------------------------------------------
# set the seed and the random number generator for reproducible results
seed = 425246524
rng = np.random.default_rng(seed)
# number of x points
num_points = 60
# -----------------------------------------------------------------------------
# Quadratic
# -----------------------------------------------------------------------------
# the true parameters of the Data Generating process (DGP)
beta = np.array([20, 0.5, -0.5])
# values for x
x = np.linspace(-20, 40, num_points)
# error term
sigma_eps = 59
# generate random numbers
eps = rng.normal(0, sigma_eps, (num_points,))
# create y values for the DGP
y = beta[0] + beta[1] * x + beta[2] * x**2 + eps
# estimate the model
results = sm.OLS(y, sm.add_constant(x)).fit()
# generate a figure and save it to disk
fig_num = 1
fig = plt.figure(num=fig_num)
ax = fig.add_subplot(111)
ax.grid(ls=":")
ax.plot(x, y, "o", color="tab:brown", label="$y$")
ax.legend(loc="best")
ax.set_title("Quadratic model")
plt.savefig(FIGURE_DIR + "quadratic_model_y.png")
plt.show()
fig_num += 1
fig = plt.figure(num=fig_num)
ax = fig.add_subplot(111)
ax.plot(x, results.fittedvalues, label=r"$\hat{y}$")
ax.grid(ls=":")
ax.plot(x, y, "o", color="tab:brown", label="$y$")
ax.legend(loc="best")
ax.set_title("Quadratic model")
plt.savefig(FIGURE_DIR + "quadratic_model_linear.png")
plt.show()
fig_num += 1
X = sm.add_constant(np.array([x, x**2]).T)
results = sm.OLS(y, X).fit()
fig = plt.figure(num=fig_num)
ax = fig.add_subplot(111)
ax.plot(x, results.fittedvalues, label=r"$\hat{y}$")
ax.grid(ls=":")
ax.plot(x, y, "o", color="tab:brown", label="$y$")
ax.legend(loc="best")
ax.set_title("Quadratic model")
plt.savefig(FIGURE_DIR + "quadratic_model_quadratic.png")
plt.show()
fig_num += 1
# print a summary
print(results.summary())
# The results can also be exported to LaTeX.
# do either
latex_summary = results.summary().as_latex()
# alternatively for each group separately
for table in results.summary().tables:
print(table.as_latex_tabular())
###############################################################################
# Generate LateX tables
###############################################################################
# write a string to a file
with open(REPORT_DIR + "summary.tex", "w") as f:
f.write(latex_summary)
# create a DataFrame for the results
estimation_results_df = results_summary_to_dataframe(results)
# give a name to the table
table_data_file = REPORT_DIR + "df_table.tex"
# create a latex file with the table information
data_frame_to_latex_table_file(table_data_file, estimation_results_df)
# an alternative to only send the coefficients part of the results
# to a LaTeX table
data_frame_to_latex_table_file(
REPORT_DIR + "results_coef.tex", results.summary2().tables[1]
)

View file

@ -1,3 +0,0 @@
import util
print(util.group_seed())

188
scripts/simulation.py Normal file
View file

@ -0,0 +1,188 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Simulation assignment 2024 - 2025
STARTER FILE
"""
import os
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
#import varplot as vpl
#plt.style.use("finthabo")
# this imports all functions from the helper file
from helper import data_frame_to_latex_table_file, print_question
#colors = vpl.color_list()
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Start of Script for Simulation assignment Econometrics
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Set the folders for output of graphs and tables
# -----------------------------------------------------------------------------
# for the figures
FIGURE_DIR = "../figures/"
if not os.path.exists(FIGURE_DIR):
os.makedirs(FIGURE_DIR)
# for the latex document
REPORT_DIR = "../report/"
if not os.path.exists(REPORT_DIR):
os.makedirs(REPORT_DIR)
# -----------------------------------------------------------------------------
# Here we set the seed for our group to your group number
# -----------------------------------------------------------------------------
# first birthday
bd_1 = 303
# second birthday
bd_2 = 309
group_seed = bd_1 * bd_2
# set the seed
np.random.seed(group_seed)
# -----------------------------------------------------------------------------
# set the random number generator and seed
# -----------------------------------------------------------------------------
# setting for output printing
print_line_length = 90
print_line_start = 5
# -----------------------------------------------------------------------------
# 1.1
# -----------------------------------------------------------------------------
# the number of data sets that we will simulate
num_obs = 1000
# set the seed and the random number generator for reproducible results
# this ensures that every time you run this code you get exactly the same random numbers.
# changing the seed would change the random numbers.
rng = np.random.default_rng(group_seed)
# the true value of the parameters.
# Known in your role as creator, unknown to the modeller.
beta = np.array([3.0, -4.0, 2.0])
# standard deviation of the error terms
u = rng.normal(0, 3, (num_obs,))
x1 = rng.normal(3, 6, (num_obs,))
x2 = rng.normal(2, 5, (num_obs,))
# y
y = beta[0] + beta[1]*x1 + beta[2]*x2 + u
# -----------------------------------------------------------------------------
# 1.2
# -----------------------------------------------------------------------------
# X
X = np.array([np.ones(num_obs), x1, x2]).T
# m
m = sm.OLS(y, X)
# results =
results = m.fit()
d = {'True': beta,
'Estimated': results.params,
'Std Err': results.bse,
't-Stat': results.tvalues}
df = pd.DataFrame(data = d)
data_frame_to_latex_table_file(REPORT_DIR + 'table_1_2.tex', df)
# -----------------------------------------------------------------------------
# 1.3
# -----------------------------------------------------------------------------
# X
X = np.array([np.ones(num_obs), x1]).T
# m
m = sm.OLS(y, X)
# results =
results = m.fit()
d = {'True': beta[0:2],
'Estimated': results.params,
'Std Err': results.bse,
't-Stat': results.tvalues}
df = pd.DataFrame(data = d)
data_frame_to_latex_table_file(REPORT_DIR + 'table_1_3.tex', df)
# -----------------------------------------------------------------------------
# 1.4
# -----------------------------------------------------------------------------
x2_new = 0.5*x1 + rng.normal(5,4,(num_obs,))
y_new = beta[0] + beta[1]*x1 + beta[2]*x2_new + u
# X
X = np.array([np.ones(num_obs), x1, x2_new]).T
# m
m = sm.OLS(y_new, X)
# results =
results = m.fit()
d = {'True': beta,
'Estimated': results.params,
'Std Err': results.bse,
't-Stat': results.tvalues}
df = pd.DataFrame(data = d)
data_frame_to_latex_table_file(REPORT_DIR + 'table_1_4.tex', df)
# -----------------------------------------------------------------------------
# 1.5
# -----------------------------------------------------------------------------
# X
X = np.array([np.ones(num_obs), x1]).T
# m
m = sm.OLS(y_new, X)
# results =
results = m.fit()
d = {'True': beta[0:2],
'Estimated': results.params,
'Std Err': results.bse,
't-Stat': results.tvalues}
df = pd.DataFrame(data = d)
data_frame_to_latex_table_file(REPORT_DIR + 'table_1_5.tex', df)
# -----------------------------------------------------------------------------
# 1.6
# -----------------------------------------------------------------------------
x1 = rng.normal(3, 1, (num_obs,))
y = beta[0] + beta[1]*x1 + beta[2]*x2 + u
# X
X = np.array([np.ones(num_obs), x1, x2]).T
# m
m = sm.OLS(y, X)
# results =
results = m.fit()
d = {'True': beta,
'Estimated': results.params,
'Std Err': results.bse,
't-Stat': results.tvalues}
df = pd.DataFrame(data = d)
data_frame_to_latex_table_file(REPORT_DIR + 'table_1_6.tex', df)

View file

@ -1,24 +0,0 @@
import os
import numpy as np
def group_seed():
bd_env_var = os.getenv("BIRTHDAYS")
# If no birthdays are set, complain and exit
if not bd_env_var:
print("Environment variable BIRTHDAYS is missing")
exit(-1)
# convert birthdays to ints
birthdays = [int(bd) for bd in bd_env_var.split(",")]
# multiply them to get the group seed
seed = 1
for bd in birthdays:
seed *= bd
return seed
rng = np.random.default_rng(group_seed())