applied-econometrics-2024/scripts/simulation.py

206 lines
5.6 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Simulation assignment 2024 - 2025
STARTER FILE
"""
import os
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
#import varplot as vpl
#plt.style.use("finthabo")
# this imports all functions from the helper file
from helper import data_frame_to_latex_table_file, print_question
#colors = vpl.color_list()
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Start of Script for Simulation assignment Econometrics
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Set the folders for output of graphs and tables
# -----------------------------------------------------------------------------
# for the figures
FIGURE_DIR = "../figures/"
if not os.path.exists(FIGURE_DIR):
os.makedirs(FIGURE_DIR)
# for the latex document
REPORT_DIR = "../report/"
if not os.path.exists(REPORT_DIR):
os.makedirs(REPORT_DIR)
# -----------------------------------------------------------------------------
# Here we set the seed for our group to your group number
# -----------------------------------------------------------------------------
# first birthday
bd_1 = 303
# second birthday
bd_2 = 309
group_seed = bd_1 * bd_2
# set the seed
np.random.seed(group_seed)
# -----------------------------------------------------------------------------
# set the random number generator and seed
# -----------------------------------------------------------------------------
# setting for output printing
print_line_length = 90
print_line_start = 5
# -----------------------------------------------------------------------------
# Utility Functions for the Simulation
# -----------------------------------------------------------------------------
def results_to_latex_table_file(file_name, results, beta):
"""
This function takes a results object from statsmodels and writes it to a latex table file.
"""
d = {'True': beta,
'Estimated': results.params,
'Std Err': results.bse,
't-Stat': results.tvalues}
df = pd.DataFrame(data = d)
data_frame_to_latex_table_file(REPORT_DIR + file_name, df)
# -----------------------------------------------------------------------------
# 1.1
# -----------------------------------------------------------------------------
# the number of data sets that we will simulate
num_obs = 1000
# set the seed and the random number generator for reproducible results
# this ensures that every time you run this code you get exactly the same random numbers.
# changing the seed would change the random numbers.
rng = np.random.default_rng(group_seed)
# the true value of the parameters.
# Known in your role as creator, unknown to the modeller.
beta = np.array([3.0, -4.0, 2.0])
# standard deviation of the error terms
u = rng.normal(0, 3, (num_obs,))
x1 = rng.normal(3, 6, (num_obs,))
x2 = rng.normal(2, 5, (num_obs,))
# y
y = beta[0] + beta[1]*x1 + beta[2]*x2 + u
# plot the resulting data
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(x1, x2, y, marker='o')
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('y')
plt.savefig(FIGURE_DIR + "question_1_1.png")
plt.show()
# -----------------------------------------------------------------------------
# 1.2
# -----------------------------------------------------------------------------
# X
X = np.array([np.ones(num_obs), x1, x2]).T
# m
m = sm.OLS(y, X)
# results =
results = m.fit()
results_to_latex_table_file('table_1_2.tex', results, beta)
# -----------------------------------------------------------------------------
# 1.3
# -----------------------------------------------------------------------------
# X
X = np.array([np.ones(num_obs), x1]).T
# m
m = sm.OLS(y, X)
# results =
results = m.fit()
results_to_latex_table_file('table_1_3.tex', results, beta[0:2])
# -----------------------------------------------------------------------------
# 1.4
# -----------------------------------------------------------------------------
x2_new = 0.5*x1 + rng.normal(5,4,(num_obs,))
y_new = beta[0] + beta[1]*x1 + beta[2]*x2_new + u
# X
X = np.array([np.ones(num_obs), x1, x2_new]).T
# m
m = sm.OLS(y_new, X)
# results =
results = m.fit()
results_to_latex_table_file('table_1_4.tex', results, beta)
# -----------------------------------------------------------------------------
# 1.5
# -----------------------------------------------------------------------------
# X
X = np.array([np.ones(num_obs), x1]).T
# m
m = sm.OLS(y_new, X)
# results =
results = m.fit()
results_to_latex_table_file('table_1_5.tex', results, beta[0:2])
# -----------------------------------------------------------------------------
# 1.6
# -----------------------------------------------------------------------------
# x1 --> x1_new so we can compare to the original x1 from 1.2
x1_new = rng.normal(3, 1, (num_obs,))
y_new = beta[0] + beta[1]*x1_new + beta[2]*x2 + u
# X
X = np.array([np.ones(num_obs), x1_new, x2]).T
# m
m = sm.OLS(y_new, X)
# results =
results = m.fit()
results_to_latex_table_file('table_1_6.tex', results, beta)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(x1, y, c='b', marker="s", label='question 1.1')
ax1.scatter(x1_new, y_new, c='r', marker="o", label='question 1.6')
plt.legend(loc='upper left')
plt.savefig(FIGURE_DIR + "question_1_6.png")
plt.show()