#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Examples of non-linear models """ import os import matplotlib.pyplot as plt import numpy as np import numpy.linalg as la import pandas as pd import scipy.stats as stats import statsmodels.api as sm # ----------------------------------------------------------------------------- # ALL FUNCTIONS ARE ON TOP # # THE SCRIPT IS BELOW THE FUNCTIONS # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # Helper function to transform results summary into a dataFrame # ----------------------------------------------------------------------------- def results_summary_to_dataframe(results, rounding=2): """take the result of an statsmodel results table and transforms it into a dataframe""" # get the values from results # if you want, you can of course generalize this. # e.g. if you don't have normal error terms # you could change the pvalues and confidence bounds # see exercise session 9?! pvals = results.pvalues tvals = results.tvalues coeff = results.params conf_lower = results.conf_int()[:, 0] conf_higher = results.conf_int()[:, 1] # create a pandas DataFrame from a dictionary results_df = pd.DataFrame( { "pvals": np.round(pvals, rounding), "tvals": np.round(tvals, rounding), "coeff": np.round(coeff, rounding), "conf\_lower": np.round(conf_lower, rounding), "conf\_higher": np.round(conf_higher, rounding), } ) # This is just to show you how to re-order if needed # Typically you should put them in the order you like straigh away # Reordering... results_df = results_df[["coeff", "tvals", "pvals", "conf\_lower", "conf\_higher"]] return results_df # ----------------------------------------------------------------------------- def data_frame_to_latex_table_file(file_name, df): """takes a DataFrame and creates file_name.tex with LaTeX table data.""" # create and open file text_file = open(file_name, "w") # data frame to LaTeX df_latex = df.to_latex() # Consider extensions (see later in class) # write latex string to file text_file.write(df_latex) # close file text_file.close() # ----------------------------------------------------------------------------- # Set the folders for output of graphs and tables # ----------------------------------------------------------------------------- # for the figures FIGURE_DIR = "../figures/" if not os.path.exists(FIGURE_DIR): os.makedirs(FIGURE_DIR) # for the latex document REPORT_DIR = "../report/" if not os.path.exists(REPORT_DIR): os.makedirs(REPORT_DIR) # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # Start of Script # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # set the random number generator and seed # ----------------------------------------------------------------------------- # set the seed and the random number generator for reproducible results seed = 425246524 rng = np.random.default_rng(seed) # number of x points num_points = 60 # ----------------------------------------------------------------------------- # Quadratic # ----------------------------------------------------------------------------- # the true parameters of the Data Generating process (DGP) beta = np.array([20, 0.5, -0.5]) # values for x x = np.linspace(-20, 40, num_points) # error term sigma_eps = 59 # generate random numbers eps = rng.normal(0, sigma_eps, (num_points,)) # create y values for the DGP y = beta[0] + beta[1] * x + beta[2] * x**2 + eps # estimate the model results = sm.OLS(y, sm.add_constant(x)).fit() # generate a figure and save it to disk fig_num = 1 fig = plt.figure(num=fig_num) ax = fig.add_subplot(111) ax.grid(ls=":") ax.plot(x, y, "o", color="tab:brown", label="$y$") ax.legend(loc="best") ax.set_title("Quadratic model") plt.savefig(FIGURE_DIR + "quadratic_model_y.png") plt.show() fig_num += 1 fig = plt.figure(num=fig_num) ax = fig.add_subplot(111) ax.plot(x, results.fittedvalues, label=r"$\hat{y}$") ax.grid(ls=":") ax.plot(x, y, "o", color="tab:brown", label="$y$") ax.legend(loc="best") ax.set_title("Quadratic model") plt.savefig(FIGURE_DIR + "quadratic_model_linear.png") plt.show() fig_num += 1 X = sm.add_constant(np.array([x, x**2]).T) results = sm.OLS(y, X).fit() fig = plt.figure(num=fig_num) ax = fig.add_subplot(111) ax.plot(x, results.fittedvalues, label=r"$\hat{y}$") ax.grid(ls=":") ax.plot(x, y, "o", color="tab:brown", label="$y$") ax.legend(loc="best") ax.set_title("Quadratic model") plt.savefig(FIGURE_DIR + "quadratic_model_quadratic.png") plt.show() fig_num += 1 # print a summary print(results.summary()) # The results can also be exported to LaTeX. # do either latex_summary = results.summary().as_latex() # alternatively for each group separately for table in results.summary().tables: print(table.as_latex_tabular()) ############################################################################### # Generate LateX tables ############################################################################### # write a string to a file with open(REPORT_DIR + "summary.tex", "w") as f: f.write(latex_summary) # create a DataFrame for the results estimation_results_df = results_summary_to_dataframe(results) # give a name to the table table_data_file = REPORT_DIR + "df_table.tex" # create a latex file with the table information data_frame_to_latex_table_file(table_data_file, estimation_results_df) # an alternative to only send the coefficients part of the results # to a LaTeX table data_frame_to_latex_table_file( REPORT_DIR + "results_coef.tex", results.summary2().tables[1] )