applied-econometrics-2024/scripts/non_linear_models_example_report.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Examples of non-linear models
"""

import os

import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm

# -----------------------------------------------------------------------------
# ALL FUNCTIONS ARE ON TOP
#
# THE SCRIPT IS BELOW THE FUNCTIONS
# -----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# Helper function to transform results summary into a dataFrame
# -----------------------------------------------------------------------------


def results_summary_to_dataframe(results, rounding=2):
    """take the result of an statsmodel results table
    and transforms it into a dataframe"""

    # get the values from results
    # if you want, you can of course generalize this.
    # e.g. if you don't have normal error terms
    # you could change the pvalues and confidence bounds
    # see exercise session 9?!
    pvals = results.pvalues
    tvals = results.tvalues
    coeff = results.params
    conf_lower = results.conf_int()[:, 0]
    conf_higher = results.conf_int()[:, 1]

    # create a pandas DataFrame from a dictionary
    results_df = pd.DataFrame(
        {
            "pvals": np.round(pvals, rounding),
            "tvals": np.round(tvals, rounding),
            "coeff": np.round(coeff, rounding),
            "conf\_lower": np.round(conf_lower, rounding),
            "conf\_higher": np.round(conf_higher, rounding),
        }
    )
    # This is just to show you how to re-order if needed
    # Typically you should put them in the order you like straigh away
    # Reordering...
    results_df = results_df[["coeff", "tvals", "pvals", "conf\_lower", "conf\_higher"]]

    return results_df


# -----------------------------------------------------------------------------


def data_frame_to_latex_table_file(file_name, df):
    """takes a DataFrame and creates file_name.tex with LaTeX table data."""
    # create and open file
    text_file = open(file_name, "w")
    # data frame to LaTeX
    df_latex = df.to_latex()
    # Consider extensions (see later in class)
    # write latex string to file
    text_file.write(df_latex)
    # close file
    text_file.close()


# -----------------------------------------------------------------------------
# Set the folders for output of graphs and tables
# -----------------------------------------------------------------------------

# for the figures
FIGURE_DIR = "../figures/"
if not os.path.exists(FIGURE_DIR):
    os.makedirs(FIGURE_DIR)
# for the latex document
REPORT_DIR = "../report/"
if not os.path.exists(REPORT_DIR):
    os.makedirs(REPORT_DIR)


# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Start of Script
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# set the random number generator and seed
# -----------------------------------------------------------------------------

# set the seed and the random number generator for reproducible results
seed = 425246524
rng = np.random.default_rng(seed)

# number of x points
num_points = 60

# -----------------------------------------------------------------------------
# Quadratic
# -----------------------------------------------------------------------------

# the true parameters of the Data Generating process (DGP)
beta = np.array([20, 0.5, -0.5])

# values for x
x = np.linspace(-20, 40, num_points)

# error term
sigma_eps = 59
# generate random numbers
eps = rng.normal(0, sigma_eps, (num_points,))

# create y values for the DGP
y = beta[0] + beta[1] * x + beta[2] * x**2 + eps

# estimate the model
results = sm.OLS(y, sm.add_constant(x)).fit()

# generate a figure and save it to disk
fig_num = 1
fig = plt.figure(num=fig_num)
ax = fig.add_subplot(111)
ax.grid(ls=":")
ax.plot(x, y, "o", color="tab:brown", label="$y$")
ax.legend(loc="best")
ax.set_title("Quadratic model")
plt.savefig(FIGURE_DIR + "quadratic_model_y.png")
plt.show()
fig_num += 1


fig = plt.figure(num=fig_num)
ax = fig.add_subplot(111)
ax.plot(x, results.fittedvalues, label=r"$\hat{y}$")
ax.grid(ls=":")
ax.plot(x, y, "o", color="tab:brown", label="$y$")
ax.legend(loc="best")
ax.set_title("Quadratic model")
plt.savefig(FIGURE_DIR + "quadratic_model_linear.png")
plt.show()
fig_num += 1

X = sm.add_constant(np.array([x, x**2]).T)

results = sm.OLS(y, X).fit()

fig = plt.figure(num=fig_num)
ax = fig.add_subplot(111)
ax.plot(x, results.fittedvalues, label=r"$\hat{y}$")
ax.grid(ls=":")
ax.plot(x, y, "o", color="tab:brown", label="$y$")
ax.legend(loc="best")
ax.set_title("Quadratic model")
plt.savefig(FIGURE_DIR + "quadratic_model_quadratic.png")
plt.show()
fig_num += 1

# print a summary
print(results.summary())

# The results can also be exported to LaTeX.
# do either
latex_summary = results.summary().as_latex()

# alternatively for each group separately
for table in results.summary().tables:
    print(table.as_latex_tabular())

###############################################################################
# Generate LateX tables
###############################################################################

# write a string to a file
with open(REPORT_DIR + "summary.tex", "w") as f:
    f.write(latex_summary)

# create a DataFrame for the results
estimation_results_df = results_summary_to_dataframe(results)

# give a name to the table
table_data_file = REPORT_DIR + "df_table.tex"
# create a latex file with the table information
data_frame_to_latex_table_file(table_data_file, estimation_results_df)

# an alternative to only send the coefficients part of the results
# to a LaTeX table
data_frame_to_latex_table_file(
    REPORT_DIR + "results_coef.tex", results.summary2().tables[1]
)