207 lines
6.6 KiB
Python
207 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Empirical assignment 2024 - 2025
|
|
|
|
STARTER FILE
|
|
"""
|
|
|
|
import os
|
|
import numpy as np
|
|
import numpy.linalg as la
|
|
import matplotlib.pyplot as plt
|
|
import statsmodels.api as sm
|
|
import scipy.stats as stats
|
|
import pandas as pd
|
|
|
|
from helper import print_question, data_frame_to_latex_table_file
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
# Start of Script for Empirical assignment Econometrics
|
|
# -----------------------------------------------------------------------------
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Set the folders for output of graphs and tables
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# for the figures
|
|
data_dir = '../data/'
|
|
if not os.path.exists(data_dir):
|
|
os.makedirs(data_dir)
|
|
# for the figures
|
|
figure_dir = '../figures/'
|
|
if not os.path.exists(figure_dir):
|
|
os.makedirs(figure_dir)
|
|
# for the latex document
|
|
report_dir = '../report/'
|
|
if not os.path.exists(report_dir):
|
|
os.makedirs(report_dir)
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Here we set the seed for our group to your group number
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
# first birthday
|
|
bd_1 = 3112
|
|
# second birthday
|
|
bd_2 = 3112
|
|
|
|
group_seed = bd_1 * bd_2
|
|
|
|
# set the seed
|
|
np.random.seed(group_seed)
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# set the random number generator and seed
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# set the seed and the random number generator for reproducible results
|
|
rng = np.random.default_rng(group_seed)
|
|
|
|
# setting for output printing
|
|
print_line_length = 90
|
|
print_line_start = 5
|
|
|
|
# number of x points
|
|
num_points = 60
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Load data 2.1
|
|
# -----------------------------------------------------------------------------
|
|
|
|
data_full = pd.read_stata(data_dir + 'assignment2025.dta')
|
|
|
|
num_obs = int(0.75 * data_full.shape[0])
|
|
# select 75% observations randomly ( the rng uses your seed )
|
|
observations = rng.choice (len(data_full), num_obs , replace =False)
|
|
# select the observationsfor your group
|
|
data = data_full.iloc[observations , :].copy()
|
|
|
|
#data = data_full
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Descriptive statistics 2.1
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.1: Descriptive Statistics')
|
|
|
|
# compute the summary statistics
|
|
# data_summary = TODO
|
|
|
|
# print to screen
|
|
# print(data_summary.T) [uncomment]
|
|
|
|
# export the summary statistics to a file
|
|
# data_frame_to_latex_table_file(report_dir + 'summmary_stats.tex',
|
|
# data_summary.T) [uncomment]
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.2
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.2: Plot histogram wage / lwage')
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.3
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.3: Sample correlations')
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.4
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.4: Estimate lwage model')
|
|
|
|
# explanatory variables for question 2.4
|
|
# x_vars_24 = data[['smcity', 'AA']] # TODO
|
|
|
|
# add a constant
|
|
# X_24 = sm.add_constant(x_vars_24) [uncomment]
|
|
|
|
# set-up model
|
|
# model_24 = sm.OLS(,) #TODO
|
|
|
|
# estimate the model
|
|
# results_24 = model_24. #TODO
|
|
|
|
# print the OLS output
|
|
# print(results_24.summary()) [uncomment]
|
|
|
|
# export the coefficients part of the summary to a table
|
|
# data_frame_to_latex_table_file(report_dir + 'results_24.tex',
|
|
# results_24.summary2().tables[1])
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.5
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.5: Adding school')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.6
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.6: Adding age')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.7
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.7: Create the woman variable')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.8
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.8: lwage model')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.9
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.9: Test ethnicity')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.10
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.10: Estimate models separately')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.11
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.11: Predict lwage')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.12
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.12: Estimate model with squared terms')
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.13
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.13: Squared terms')
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.14
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.14: Add interaction terms')
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Question 2.15
|
|
# -----------------------------------------------------------------------------
|
|
|
|
print_question('Question 2.15: Find your favourite model')
|
|
|