Professional Documents
Culture Documents
if (A==B)
fprintf('A==B\n');
end
if (A==C)
fprintf('A==C\n');
elseif (A~=C)
fprintf('A~=C\n');
else
fprintf('~(A==C) && ~(A~=C) !?\n');
end
%% Explanation
% When the expression in an if statement is not a scalar, Matlab treats
% it as true only if all elements of the array are true.
Efficiency.m
%% Demonstrating Efficiency
reps = 100;
x = rand(100000,1);
y = rand(size(x));
tloop = cputime;
for r=1:reps
s = 0;
for i=1:numel(x)
s = s+x(i)*y(i);
end
tloop = cputime-tloop;
end
tvec = cputime;
for r=1:reps
svec = x'*y;
end
tvec = cputime-tvec;
%% Preallocate arrays
reps = 500;
tic
for r=1:reps
a(r,:) = rand(1,1000);
end
toc
clear a;
tic
a = zeros(reps,1000);
for r=1:reps
a(r,:) = rand(1,1000);
end
toc
tic
for r=1:reps
for i=1:size(x,1)
s = x(i,:)*y;
end
end
toc
funWithlnv.m
bhat = zeros(K,4);
% from slow to fast ...
tic; bhat(:,1) = (xx)^(-1)*xy; toc;
tic; bhat(:,2) = inv(xx)*xy; toc;
tic; bhat(:,3) = xx \ xy; toc;
% the qr-way: slow but can be more accurate
tic;
[q r] = qr(x,0);
s = inv(r);
ixx = s*s';
bhat(:,4) = ixx*xy;;
toc;
sum(abs(xx*bhat-xy*ones(1,4))) % notice that \ gives smallest error
clear bhat;
%% Illustration of usefulness of QR
% make inv and \ fail because of inaccuracy
d = 1.e-8;
x = [1 1; d 0; 0 d]
y = [1;2;3];
xx = x'*x;
xy = x'*y;
bbad1 = xx^(-1)*xy
bbad2 = inv(xx)*xy
bbad3 = xx \ xy
[q r] = qr(x,0);
s = inv(r);
ixx = s*s';
bbad4 = ixx*xy
%% Explanation
% The problem with using inv() and \ is that they require first computing
% x'*x, and computing x'*x involves adding a relatively big number, 1 to
% a tiny number, 1e-16. Because of rounding, 1 + 1e-16 = 1, and x'*x
% becomes singular.
xx
%%
% Making d slightly bigger reduces the problem
d = 2.e-8;
x = [1 1; d 0; 0 d]
y = [1;2;3];
xx = x'*x
xy = x'*y;
bbad1 = xx^(-1)*xy
bbad2 = inv(xx)*xy
bbad3 = xx \ xy
[q r] = qr(x,0);
s = inv(r);
ixx = s*s';
bbad4 = ixx*xy
Diffs-in-diff
Main-script
%% 14.382 Problem Set 2, Question 4
% Paul Schrimpf, March 2008
%
% Recreates main results of Betrand, Duflo, and Mullainathan (2004).
% Also, applies bias and size corrected estimator of Hausman and
% Kuersteiner (2007) to this setup.
%
% See also SIMULATE, OLSDD, FGLSDD, AGGDATA
%% Load Data
% Data is from the CPS-MORG. Some pre-processing (mainly merging of
% various years into single file) was done in stata.
% Here, we read the data from a .csv file and save it as a structure.
% The structure that we choose to save our data in is meant to be a
% generic diff in diff setup with 2 level clustering. The fields of data
% are:
%
% * y = outcome variable, size number of observations by 1
% * x = regressors, size number of observations by number of regressors
% * t = time index, size number of observations by 1
% * s = state index, size number of observations by 1
% * p = policy indicator, size number of observations by 1
%
clear;
profile on;
% Whenever importing data, an easy solution is to use "Import Data" under
% the file menu. It gives you a GUI to import data and will
% automatically generate code.
cps = importdata('cpsSample.csv',',',1);
% break data into nicely named variables
for c=1:size(cps.data,2)
eval([cps.colheaders{c} '= cps.data(:,c);']);
end
%% Run Simulations
% Just loops through simulations. All combinations of N number of states
% with T time periods are run S times. Each simulation is run with both
% a 0 and 2% policy effect. Each time, the estimated policy effect, its
% variance, and whether or not the null hypothesis of 0 effect is
% rejected at a 5% level is saved. For each simulation we estimate the
% effect using OLSDD with and without clustering and using FGLS with and
% without bias correction.
profile viewer;
%% Print Results
% Prints the results in latex formatted tables.
% print results
of = fopen('sim.tex','w');
fprintf(of,['Estimator & N & T & Rejection Rate No Effect & Rejection Rate
2' ...
' % Effect \\\\ \\hline\n']);
estOLS = {'OLS' ...
'OLS state-year clustered' ...
'OLS state clustered'};
estGLS = {'FGLS' ...
'FGLS bias-corrected' ...
'FGLS bias and size corrected' ...
'OLS aggregate'};
for n=length(N):(-1):1
for t=length(T):(-1):1
if (T(t)>N(n)-1) % too few T for N
continue;
end
for i=1:3;
fprintf(of,'%s & %d & %d & %7.3g & %7.3g \\\\ \n', ...
estOLS{i},N(n),T(t),mean(testOLS(1,n,t,:,i)), ...
mean(testOLS(2,n,t,:,i)));
end
i = 4;
fprintf(of,'%s & %d & %d & %7.3g & %7.3g \\\\ \n', ...
estGLS{i},N(n),T(t),mean(testGLS(1,n,t,:,i)), ...
mean(testGLS(2,n,t,:,i)));
for i=1:3
fprintf(of,'%s & %d & %d & %7.3g & %7.3g \\\\ \n', ...
estGLS{i},N(n),T(t),mean(testGLS(1,n,t,:,i)), ...
mean(testGLS(2,n,t,:,i)));
end
fprintf(of,'\\hline \n');
end
end
olsDD.m
%% OLS for diff-in-diff
% Estimates diff in diff by OLS. Calculates variance and reports whether
% reject
%
% $$H_0: \beta=0$$
%
% at 5% level in three ways:
%
% # usual homoskedastic OLS variances
% # clustered on data.t,data.s
% # clustered on data.s
%
% Returns the variance and test results in vectors test and var
%
% Takes as input a data structure with fields:
%
% * y = outcome variable, size number of observations by 1
% * x = regressors, size number of observations by number of regressors
% * t = time index, size number of observations by 1
% * s = state index, size number of observations by 1
% * p = policy indicator, size number of observations by 1
%
% See also FGLSDD
function [b test var]=olsDD(data);
% form X
X = [data.p data.x data.T data.S];
%%
% Note that inv should be avoided whenever possible.
% See
% <http://blogs.mathworks.com/loren/2007/05/16/purpose-of-inv/
% purpose of inv> regarding the perils of using inv. Here we need
% inv(X'*X) to compute the variance of beta, but maybe there's a better
% way.
iXX = inv(X'*X);
if (any(isnan(iXX)))
err = 1;
end
% compute b
beta = iXX*X'*data.y;
b = beta(1);
% compute residuals
e = data.y - X*beta;
% ols test
n = length(data.y);
k = length(beta);
V = e'*e*iXX/(n - k);
test(1) = abs(b/sqrt(V(1)))>crit;
var(1) = V(1);
%% Clustered on state, year
%
% $$ V = (X'X)^{-1} \sum_{s,t} X_{s,t}'\hat{e}_{s,t} \hat{e}_{s,t}'
% X_{s,t} $$
%
state=unique(data.s);
time=unique(data.t);
V = zeros(size(iXX));
for s=1:length(state)
for t=1:length(time)
g = data.s==state(s) & data.t==time(t);
V = V + X(g,:)'*e(g)*e(g)'*X(g,:);
end
end
V = iXX*V*iXX;
test(2) = abs(b/sqrt(V(1)))>crit;
var(2) = V(1);
%% Clustered on state
%
% $$ V = (X'X)^{-1} \sum_{s} X_{s}'\hat{e}_{s} \hat{e}_{s}'
% X_{s} $$
%
V = zeros(size(iXX));
for s=1:length(state)
g = data.s==state(s);
V = V + X(g,:)'*e(g)*e(g)'*X(g,:);
end
V = iXX*V*iXX;
test(3) = abs(b/sqrt(V(1))) > crit;
var(3) = V(1);
end
fglsDD.m
%% FGLs for diffs in diffs.
% Estimates using FGLS and reports whether
% reject
%
% $$H_0: \beta=0$$
%
% at 5% level in three ways:
%
% # plain FGLS allowing arbitrary autocorrelation
% # Hausman and Kuersteiner bias correction
% # bias corrected with size corrected test
%
% Takes as input a data structure with fields:
%
% * y = outcome variable, size number of observations by 1
% * x = regressors, size number of observations by number of regressors
% * t = time index, size number of observations by 1
% * s = state index, size number of observations by 1
% * p = policy indicator, size number of observations by 1
%
% As in BDM and HK, we work with data aggregated to state-time cells,
% this makes the computations simpler. The justification is that there
% are so many within state-time observations that we can ignore
% uncertainty from that. We require that the input data has already been
% aggregated. This can be accomplished with aggData()
%
% See also AGGDATA, OLSDD
%% ols
ixx = inv(x'*x);
betaP = ixx*x'*y;
u = y - x*betaP;
bGLS(4) = betaP(1);
V = u'*u/length(u)*ixx;
vGLS(4) = V(1);
testGLS = zeros(1,4);
testGLS(4) = abs(bGLS(4)/sqrt(vGLS(4))) > crit;
%% Bias-corrected FGLS
% see Hausman and Kuersteiner
yt = reshape(y,T,S);
v = zeros(S,2);
% must be careful about defining v to make it full rank
for s=1:S
v(s,:) = [1 any(x(data.s==s,1)==1)];
end
Mv = eye(S) - v*inv(v'*v)*v';
Stilde = yt*Mv*yt'/trace(Mv);
B = eye(T);
B = B(2:T,:);
SigHat = B*M1t*Stilde*M1t*B';
tran = kron(eye(S),B*M1t);
z = tran*xf(:,2:size(xf,2));
ups = tran*xf(:,1);
ybc = tran*y;
iSig = inv(SigHat);
iO = kron(eye(S),iSig);
Oz = iO - iO*z*inv(z'*iO*z)*z'*iO;
V = inv(ups'*Oz*ups);
bGLS(2:3) = V*ups'*Oz*ybc;
t1 = bGLS(2)/sqrt(V);
testGLS(2) = abs(t1) > crit;
vGLS(2:3) = V;
simulate.m
%% Simulate a diff in diff data set
% Draws a sample of N states with T time periods from original data orig.
% assigns half the states to be treated with a policy effect of gamma.
%
% The data structure is meant to be a
% generic diff in diff setup with 2 level clustering. The fields of data
% are:
%
% * y = outcome variable, size number of observations by 1
% * x = regressors, size number of observations by number of regressors
% * t = time index, size number of observations by 1
% * s = state index, size number of observations by 1
% * p = policy indicator, size number of observations by 1
%
% Input arguments are:
%
% * orig = original data structure
% * N = number of states to draw in sample
% * T = number of periods to draw in sample
% * gamma = effect of policy
%
% States are drawn with replacement. Time is drawn as a continuous block
% of length T. The policy is randomly chosen to affect half of the
% states beginning at t ~ U(0.2*T,0.8*T). When the policy is in effect,
% y is changed to (1+gamma)*y
%
% See also OLSDD, FGLSDD, AGGDATA
function simd = simulate(orig,N,T,gamma)
states = unique(orig.s);
times = unique(orig.t);
if ~isequal((1:length(times))',sort(times))
error('simulate requires t go from 1 to T in the original data');
end
end
Metropolis-Hastings code
%% A simple Markov Chain
% Uses Metropolis-Hastings to draw from a mixture of bivariate normal
% densities.
% Uses a N(0,tau I) candidate density. Runs the chain for various
% values of tau to compare results.
% From 14.384 2008 ps5
clear;
% set parameters
mu = {[1.5 1.5], [-1.5 -1.5]};
K = length(mu{1});
sig = {[1 0.5; 0.5 1], [1 0.5; 0.5 1]};
p = [0.5,0.5];
f = @(x) mvnpdf(x,mu{1},sig{1})*p(1) + mvnpdf(x,mu{2},sig{2})*p(2);
n = 2000;
tau = [0.1^2, 1, 8^2];
theta = zeros(length(tau),n,K);
for t=1:length(tau)
theta(t,1,:) = [10,-10];
fold = f(squeeze(theta(t,1,:))');
muTheta(1,:) = mean(theta(t,1:1,:),2);
varTheta(1,:) = var(theta(t,1:1,:),0,2);
accept = 0;
for i=2:n
% draw candidate
x = mvnrnd(squeeze(theta(t,i-1,:))',eye(K)*tau(t));
u = rand();
fnew = f(x);
if (u<fnew/fold)
theta(t,i,:) = x;
fold = fnew;
accept = accept+1;
else
theta(t,i,:) = theta(t,i-1,:);
end
muTheta(i,:) = mean(theta(t,1:i,:),2);
varTheta(i,:) = var(theta(t,1:i,:),0,2);
end
figure;
ezcontour(@(t0,t1) f([t0 t1]),[-4,4])
h1 = gca;
h2 = axes('Position',get(h1,'Position'));
plot(squeeze(theta(t,:,1)),squeeze(theta(t,:,2)),'.')
set(h2,'Ylim',get(h1,'YLim'),'Color','none')
set(h2,'XLim',get(h1,'XLim'),'Layer','top','Color','none')
fprintf('acceptance rate=%g\n',accept/n);
figure;
plot(muTheta)
figure;
plot(varTheta)
end
https://ocw.mit.edu/courses/economics/14-384-time-series-analysis-fall-2013/recitations/
AMPL Code for simulating and estimating Aguirregabiria and Mira style entry game:
# fixed parameters
set markets;
param M;
param nSize;
param sizes{1..nSize};
param nFirm{markets};
param nState{m in markets} := nSize*2^nFirm[m];
set firms := 1..(max{m in markets} nFirm[m]);
set states := 1..(max{m in markets} nState[m]);
param state{m in markets,s in 1..nState[m],i in 1..nFirm[m]};
# state[m,s,:] = s/nSize in binary
param size{m in markets,s in 1..nState[m]} :=
sizes[((s-1) mod nSize)+1];
set actions := {0,1};
param eulerMascheroni := 0.577215665;
# observed states and actions
param T;
set time := 1..T;
param oState{time, markets};
param oAction{time, markets, firms} within actions;
# estimated parameters
var P{markets,firms,states,actions} >=0, <=1; # P(firm i in state s takes
action a)
var V{markets,firms,states}; # Value of state s for firm i
var FP{markets,states,states}>=0,<=1; # P(state i|state j)
var sizeTran{markets,1..nSize,1..nSize}>=0,<=1;
# profit parameters
var entryCost;
var fixedCost;
var thetaR;
var sigma;
var beta>=0,<=1;
# profit function
var pi{m in markets,i in 1..nFirm[m],s in 1..nState[m],a in actions} =
a*( (if state[m,s,i]==0 then -entryCost else 0)
-fixedCost +
thetaR*size[m,s]*sum{o in 1..nState[m]: state[m,o,i]=1}
(prod{j in 1..nFirm[m] diff {i}} P[m,j,s,state[m,o,j]]) /
(2 + sum{j in 1..nFirm[m] diff {i}} state[m,o,j])^2 );
# E[epsilon|action,state]
var ep{m in markets,i in 1..nFirm[m],s in 1..nState[m],a in actions} =
a*(eulerMascheroni - sigma*log(max(P[m,i,s,a],1e-307)));
maximize likelihood:
sum{t in 2..T, m in markets, i in 1..nFirm[m]}
log(max(FP[m,oState[t,m],oState[t-1,m]] , 1e-307) );
Data_file
param M := 1;
param nSize := 2;
set markets := 1;
param sizes := 1 2 2 4;
param nFirm :=
1 2;
param T := 50;
Command file:
/* Script for solving, simulating, and estimating.
*
*/
reset;
model ag.mod;
data ag.dat;
option solver "./snopt"; # snopt,minos, donlp2, loqo
option snopt_options 'timing 1 outlev=3 iterations=100000';
option randseed 0; # set random seed based on system time
#option solver minos;
#option minos_options 'timing 1 outlev 3';
# set up state
param temp;
for{m in markets, s in 1..nState[m]}
{
let temp:= (s-1) div nSize;
for{i in 1..nFirm[m]}
{
let state[m,s,i] := temp mod 2;
let temp := temp div 2;
}
}
display state,size;
# set parameters & solve for equilibrium
fix fixedCost := 1;
fix entryCost := 1;
fix sigma := 1;
fix thetaR := 1;
fix beta := 0.8;
#fix{m in markets, z in 1..nSize-1, w in 1..nSize} sizeTran[m,z,w] := 1/nSize;
fix sizeTran[1,1,1] := 0.75;
fix sizeTran[1,2,2] := 0.75;
#let{m in markets, w in 1..nSize} sizeTran[m,nSize,w] := 1 - sum
drop likelihood;
solve;
printf "\nEnter any number to continue\n";
read temp <-;
display P;
display state;
display V,size,pi,ep;
# update size
let temp := Uniform(0,1);
let csum := 0;
let sizenew := 0;
repeat while (csum < temp) {
let sizenew := sizenew + 1;
let csum := csum + sizeTran[m,sizenew,((slast-1) mod nSize)+1];
}
# update state
let slast := 0;
for{i in 1..nFirm[m]}
{
let slast := slast + oAction[t,m,i]*2^(i-1);
}
let slast := slast*nSize+sizenew
} # end loop over time
} # end loop over markets
# check simulations
param pState{m in markets, s in states} :=
1/T * sum{t in time} (if oState[t,m]==s then 1 else 0);
display pState;
param Psim{m in markets, i in firms, s in states, a in actions} :=
(sum{t in time} (if (oAction[t,m,i]==a && oState[t,m]==s) then 1 else 0))
/ (1.0e-307 + sum{t in time} (if oState[t,m]==s then 1 else 0));
display P,Psim;
######################################################################
# estimation
unfix entryCost;
unfix fixedCost;
unfix thetaR;
#unfix sigma;
#unfix beta;
unfix sizeTran;
restore likelihood;
# make initial values wrong
#let beta := 0.9;
let fixedCost := 1;
let entryCost := 1;
let thetaR := 1;
# initial guesses - equal prob
#let{m in markets, s in 1..nState[m], r in 1..nState[m]-1} FP[m,s,r] :=
1/nState[m];
#let{m in markets, s in 1..nState[m], i in 1..nFirm[m], a in {1}}
# P[m,i,s,a] := Uniform(0.4,0.6);
#let{m in markets, s in 1..nState[m], i in 1..nFirm[m], a in {1}}
# P[m,i,s,0] := 1-P[m,i,s,1];
solve;
display entryCost,fixedCost,thetaR,sigma,beta;
printf "\nEnter any number to continue\n";
read temp <-;
display sizeTran;
display V,P;