198 lines
6 KiB
Python
198 lines
6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Examples of non-linear models
|
|
"""
|
|
|
|
import os
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import numpy.linalg as la
|
|
import pandas as pd
|
|
import scipy.stats as stats
|
|
import statsmodels.api as sm
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# ALL FUNCTIONS ARE ON TOP
|
|
#
|
|
# THE SCRIPT IS BELOW THE FUNCTIONS
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Helper function to transform results summary into a dataFrame
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
def results_summary_to_dataframe(results, rounding=2):
|
|
"""take the result of an statsmodel results table
|
|
and transforms it into a dataframe"""
|
|
|
|
# get the values from results
|
|
# if you want, you can of course generalize this.
|
|
# e.g. if you don't have normal error terms
|
|
# you could change the pvalues and confidence bounds
|
|
# see exercise session 9?!
|
|
pvals = results.pvalues
|
|
tvals = results.tvalues
|
|
coeff = results.params
|
|
conf_lower = results.conf_int()[:, 0]
|
|
conf_higher = results.conf_int()[:, 1]
|
|
|
|
# create a pandas DataFrame from a dictionary
|
|
results_df = pd.DataFrame(
|
|
{
|
|
"pvals": np.round(pvals, rounding),
|
|
"tvals": np.round(tvals, rounding),
|
|
"coeff": np.round(coeff, rounding),
|
|
"conf\_lower": np.round(conf_lower, rounding),
|
|
"conf\_higher": np.round(conf_higher, rounding),
|
|
}
|
|
)
|
|
# This is just to show you how to re-order if needed
|
|
# Typically you should put them in the order you like straigh away
|
|
# Reordering...
|
|
results_df = results_df[["coeff", "tvals", "pvals", "conf\_lower", "conf\_higher"]]
|
|
|
|
return results_df
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
def data_frame_to_latex_table_file(file_name, df):
|
|
"""takes a DataFrame and creates file_name.tex with LaTeX table data."""
|
|
# create and open file
|
|
text_file = open(file_name, "w")
|
|
# data frame to LaTeX
|
|
df_latex = df.to_latex()
|
|
# Consider extensions (see later in class)
|
|
# write latex string to file
|
|
text_file.write(df_latex)
|
|
# close file
|
|
text_file.close()
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Set the folders for output of graphs and tables
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# for the figures
|
|
FIGURE_DIR = "../figures/"
|
|
if not os.path.exists(FIGURE_DIR):
|
|
os.makedirs(FIGURE_DIR)
|
|
# for the latex document
|
|
REPORT_DIR = "../report/"
|
|
if not os.path.exists(REPORT_DIR):
|
|
os.makedirs(REPORT_DIR)
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# Start of Script
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# set the random number generator and seed
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# set the seed and the random number generator for reproducible results
|
|
seed = 425246524
|
|
rng = np.random.default_rng(seed)
|
|
|
|
# number of x points
|
|
num_points = 60
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Quadratic
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# the true parameters of the Data Generating process (DGP)
|
|
beta = np.array([20, 0.5, -0.5])
|
|
|
|
# values for x
|
|
x = np.linspace(-20, 40, num_points)
|
|
|
|
# error term
|
|
sigma_eps = 59
|
|
# generate random numbers
|
|
eps = rng.normal(0, sigma_eps, (num_points,))
|
|
|
|
# create y values for the DGP
|
|
y = beta[0] + beta[1] * x + beta[2] * x**2 + eps
|
|
|
|
# estimate the model
|
|
results = sm.OLS(y, sm.add_constant(x)).fit()
|
|
|
|
# generate a figure and save it to disk
|
|
fig_num = 1
|
|
fig = plt.figure(num=fig_num)
|
|
ax = fig.add_subplot(111)
|
|
ax.grid(ls=":")
|
|
ax.plot(x, y, "o", color="tab:brown", label="$y$")
|
|
ax.legend(loc="best")
|
|
ax.set_title("Quadratic model")
|
|
plt.savefig(FIGURE_DIR + "quadratic_model_y.png")
|
|
plt.show()
|
|
fig_num += 1
|
|
|
|
|
|
fig = plt.figure(num=fig_num)
|
|
ax = fig.add_subplot(111)
|
|
ax.plot(x, results.fittedvalues, label=r"$\hat{y}$")
|
|
ax.grid(ls=":")
|
|
ax.plot(x, y, "o", color="tab:brown", label="$y$")
|
|
ax.legend(loc="best")
|
|
ax.set_title("Quadratic model")
|
|
plt.savefig(FIGURE_DIR + "quadratic_model_linear.png")
|
|
plt.show()
|
|
fig_num += 1
|
|
|
|
X = sm.add_constant(np.array([x, x**2]).T)
|
|
|
|
results = sm.OLS(y, X).fit()
|
|
|
|
fig = plt.figure(num=fig_num)
|
|
ax = fig.add_subplot(111)
|
|
ax.plot(x, results.fittedvalues, label=r"$\hat{y}$")
|
|
ax.grid(ls=":")
|
|
ax.plot(x, y, "o", color="tab:brown", label="$y$")
|
|
ax.legend(loc="best")
|
|
ax.set_title("Quadratic model")
|
|
plt.savefig(FIGURE_DIR + "quadratic_model_quadratic.png")
|
|
plt.show()
|
|
fig_num += 1
|
|
|
|
# print a summary
|
|
print(results.summary())
|
|
|
|
# The results can also be exported to LaTeX.
|
|
# do either
|
|
latex_summary = results.summary().as_latex()
|
|
|
|
# alternatively for each group separately
|
|
for table in results.summary().tables:
|
|
print(table.as_latex_tabular())
|
|
|
|
###############################################################################
|
|
# Generate LateX tables
|
|
###############################################################################
|
|
|
|
# write a string to a file
|
|
with open(REPORT_DIR + "summary.tex", "w") as f:
|
|
f.write(latex_summary)
|
|
|
|
# create a DataFrame for the results
|
|
estimation_results_df = results_summary_to_dataframe(results)
|
|
|
|
# give a name to the table
|
|
table_data_file = REPORT_DIR + "df_table.tex"
|
|
# create a latex file with the table information
|
|
data_frame_to_latex_table_file(table_data_file, estimation_results_df)
|
|
|
|
# an alternative to only send the coefficients part of the results
|
|
# to a LaTeX table
|
|
data_frame_to_latex_table_file(
|
|
REPORT_DIR + "results_coef.tex", results.summary2().tables[1]
|
|
)
|