#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Empirical assignment 2024 - 2025 STARTER FILE """ import os import numpy as np import numpy.linalg as la import matplotlib.pyplot as plt import statsmodels.api as sm import scipy.stats as stats import pandas as pd from helper import print_question, data_frame_to_latex_table_file # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # Start of Script for Empirical assignment Econometrics # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # Set the folders for output of graphs and tables # ----------------------------------------------------------------------------- # for the figures data_dir = '../data/' if not os.path.exists(data_dir): os.makedirs(data_dir) # for the figures figure_dir = '../figures/' if not os.path.exists(figure_dir): os.makedirs(figure_dir) # for the latex document report_dir = '../report/' if not os.path.exists(report_dir): os.makedirs(report_dir) # ----------------------------------------------------------------------------- # Here we set the seed for our group to your group number # ----------------------------------------------------------------------------- # first birthday bd_1 = 303 # second birthday bd_2 = 309 group_seed = bd_1 * bd_2 # set the seed np.random.seed(group_seed) # ----------------------------------------------------------------------------- # set the random number generator and seed # ----------------------------------------------------------------------------- # set the seed and the random number generator for reproducible results rng = np.random.default_rng(group_seed) # setting for output printing print_line_length = 90 print_line_start = 5 # number of x points num_points = 60 # ----------------------------------------------------------------------------- # Load data 2.1 # ----------------------------------------------------------------------------- data_full = pd.read_stata(data_dir + 'assignment2025.dta') num_obs = int(0.75 * data_full.shape[0]) # select 75% observations randomly ( the rng uses your seed ) observations = rng.choice (len(data_full), num_obs , replace =False) # select the observationsfor your group data = data_full.iloc[observations , :].copy() #data = data_full # ----------------------------------------------------------------------------- # Descriptive statistics 2.1 # ----------------------------------------------------------------------------- print_question('Question 2.1: Descriptive Statistics') # compute the summary statistics data.drop(['fail', 'urban', 'unearn','househ', 'amtland', 'unearnx'], axis='columns', inplace=True) data = data[data['paidwork']==1] data['school'] = data['yprim']+data['ysec'] data['wage'] = np.exp(data['lwage']) data_summary = data.describe() # print to screen print(data_summary.T) # export the summary statistics to a file data_frame_to_latex_table_file(report_dir + 'summary_stats.tex', data_summary.T) # ----------------------------------------------------------------------------- # Question 2.2 # ----------------------------------------------------------------------------- print_question('Question 2.2: Plot histogram wage / lwage') # ----------------------------------------------------------------------------- # Question 2.3 # ----------------------------------------------------------------------------- print_question('Question 2.3: Sample correlations') # ----------------------------------------------------------------------------- # Question 2.4 # ----------------------------------------------------------------------------- print_question('Question 2.4: Estimate lwage model') # explanatory variables for question 2.4 # x_vars_24 = data[['smcity', 'AA']] # TODO # add a constant # X_24 = sm.add_constant(x_vars_24) [uncomment] # set-up model # model_24 = sm.OLS(,) #TODO # estimate the model # results_24 = model_24. #TODO # print the OLS output # print(results_24.summary()) [uncomment] # export the coefficients part of the summary to a table # data_frame_to_latex_table_file(report_dir + 'results_24.tex', # results_24.summary2().tables[1]) # ----------------------------------------------------------------------------- # Question 2.5 # ----------------------------------------------------------------------------- print_question('Question 2.5: Adding school') # ----------------------------------------------------------------------------- # Question 2.6 # ----------------------------------------------------------------------------- print_question('Question 2.6: Adding age') # ----------------------------------------------------------------------------- # Question 2.7 # ----------------------------------------------------------------------------- print_question('Question 2.7: Create the woman variable') # ----------------------------------------------------------------------------- # Question 2.8 # ----------------------------------------------------------------------------- print_question('Question 2.8: lwage model') # ----------------------------------------------------------------------------- # Question 2.9 # ----------------------------------------------------------------------------- print_question('Question 2.9: Test ethnicity') # ----------------------------------------------------------------------------- # Question 2.10 # ----------------------------------------------------------------------------- print_question('Question 2.10: Estimate models separately') # ----------------------------------------------------------------------------- # Question 2.11 # ----------------------------------------------------------------------------- print_question('Question 2.11: Predict lwage') # ----------------------------------------------------------------------------- # Question 2.12 # ----------------------------------------------------------------------------- print_question('Question 2.12: Estimate model with squared terms') # ----------------------------------------------------------------------------- # Question 2.13 # ----------------------------------------------------------------------------- print_question('Question 2.13: Squared terms') # ----------------------------------------------------------------------------- # Question 2.14 # ----------------------------------------------------------------------------- print_question('Question 2.14: Add interaction terms') # ----------------------------------------------------------------------------- # Question 2.15 # ----------------------------------------------------------------------------- print_question('Question 2.15: Find your favourite model')