CFA for Applied Research in R package lavaan

What is lavaan?

lavaan is a free, open source R package for latent variable analysis. You can use lavaan to estimate a large variety of multivariate statistical models, including path analysis, confirmatory factor analysis, structural equation modeling and growth curve models.

What is “Confirmatory Factor Analysis for Applied Research”?

It is a very comprehensive book by Timothy A. Brown emphasizing practical and theoretical aspects of Confirmatory Factor Analysis (CFA), a well-established multivariate statistics method. CFA/SEM is mostly performed via commercial software, such as Mplus, EQS, Lisrel and Amos. The R package lavaan offers a very promising open source alternative. The companion website of Brown’s book on CFA currently contains the syntax code for running the book examples in most (if not all) of the above commercial packages. So what’s missing? A lavaan implementation. Here it is, following the chapters of the book.

Chapter 4 - Specification and Interpretation of CFA Models

Contains two examples a) Two-Factor CFA (Neuroticism, Extraversion) and b) CFA with Single Indicators: Health Status

#############################################################
# Run this code at your own risk! 
############################################################# 
# Chapter 4: Specification and Interpretation of CFA Models

install.packages("lavaan")
library(lavaan)

# Two Factor CFA Model of Neuroticism and Extraversion (Tables 4.2, 4.3, 4.4)

# input: the sample variance-covariance matrix of page 116
covmat <- '32.49
 24.4826 31.36
 26.6669 25.4106 40.96
 25.2772 23.557 27.7978 32.49
 -12.0042 -10.1472 -13.6704 -10.8756 36
 -11.1674 -9.7216 -11.904 -9.4358 25.11 38.44
 -9.617 -9.2249 -10.8346 -9.617 21.6828 23.0063 32.49
 -9.0014 -7.9654 -10.4653 -7.8204 17.9424 20.589 18.0667 31.36'

neodat.cov <- getCov(covmat, names=c("N1","N2","N3","N4","E1","E2","E3","E4"))

#set up model
new.model <- ' neuro =~ N1 + N2 + N3 + N4
extrav =~ E1 + E2 + E3 + E4'

# fit model
fit <- cfa(new.model, sample.cov=neodat.cov, sample.nobs=250, mimic="Mplus")

#summary statistics
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

parameterEstimates(fit)   #coefficients with se, 95%CI and z values etc. 
fitted(fit)               #the model implied variance-covariance matrix, slightly different values from Table 4.2 p. 117
resid(fit) # unstandardized residuals matrix
resid(fit,type="standardized") #standardized residuals matrix, there are differences with the values found in Table 4.2 p. 117

#############################################################
#CFA with Single Indicators: Health Status
#############################################################

# read Health Status data from the book's companion website
health <- read.table("http://people.bu.edu/tabrown/Ch4/fig4.3.dat")
# remove id variable
health <- health[,-1]
# assign variable names
names(health) <- c("ACTIV","SOMA","PAIN","MENTH","SOCF","VITAL","GENHLTH","AGE")
head(health)

#set up model
model <- '
# measurement model
psysfun =~ ACTIV + SOMA + PAIN
mentfun =~ MENTH + SOCF + VITAL
AGEF =~ AGE
GWB =~ GENHLTH
#fix variances of single indicators
AGE ~~ 0*AGE
GENHLTH ~~ 7.88*GENHLTH
# residual correlations
ACTIV ~~ SOMA
'

# fit model
fit <- cfa(model, data=health,mimic="Mplus")
# summary statistics
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

Chapter 5 - CFA Model Revision and Comparison

Contains two examples a) a Three-Factor CFA Model (Alcohol Motives) and b) E/CFA (EFA in the CFA framework)

################################################
# Chapter 5: CFA Model Revision and Comparison #
################################################

library(lavaan)

####################################################################
# Three-Factor CFA (Alcohol Motives QN) model (Figure 1, page 168) #
####################################################################

#input: the lower part of a correlation matrix, see p.169, Table 5.2 (the lisrel syntax)
cormat <- '
1.000
0.300 1.000
0.229 0.261 1.000
0.411 0.406 0.429 1.000
0.172 0.252 0.218 0.481 1.000
0.214 0.268 0.267 0.579 0.484 1.000
0.200 0.214 0.241 0.543 0.426 0.492 1.000
0.185 0.230 0.185 0.545 0.463 0.548 0.522 1.000
0.134 0.146 0.108 0.186 0.122 0.131 0.108 0.151 1.000
0.134 0.099 0.061 0.223 0.133 0.188 0.105 0.170 0.448 1.000
0.160 0.131 0.158 0.161 0.044 0.124 0.066 0.061 0.370 0.350 1.000
0.087 0.088 0.101 0.198 0.077 0.177 0.128 0.112 0.356 0.359 0.507 1.000'

#convert it to a square matrix
Cmat <- getCov(cormat)

# the standard deviations of the variables (Table 5.2)
neodat.sdev = c(2.06, 1.52, 1.92, 1.41, 1.73, 1.77, 2.49, 2.27, 2.68, 1.75, 2.57, 2.66)

# convert the correlation matrix to a covariance matrix (lavaan needs a covariance matrix as input)
Dmat = diag(neodat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10","X11","X12")
colnames(covmat)=rownames(covmat)               

#model set up - this is the model presented in Figure 1, p.168 
# you can play around with this model following the text of Ch.5
new.model <- '#X4 loads on both coping and social factors
              coping =~ X1 + X2 + X3 + X4   
              social =~ X5 + X6 + X7 + X8 + X4
              enhance =~ X9 + X10 + X11 + X12
              # residual correlation
              X11 ~~ X12'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=500)

#results are almost identical due to rounding errors in the matrix conversion step
fit

summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


########################################################
#E/CFA (EFA in the CFA framework) page 193 - Table 5.8 #
########################################################

# read Alcohol Motives data from the book's companion website
alcohol <- read.table("http://people.bu.edu/tabrown/Ch5/efa.dat")
#remove last column
alcohol <- alcohol[,-13]
# assign variable names
names(alcohol) <- c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10","X11","X12")
head(alcohol)

#set up model
new.model <- '#Force the loadings of the first indicators to be free 
              coping =~ NA*X1 + X2+ X3 + X4 + X5 + X6 + X7 + X9 + X10 + X11  
              social =~ NA*X8 + X2 + X3 + X4 + X5 + X6 + X7 + X9 + X10 + X11 
              enhance =~ NA*X12 + X2 + X3 + X4 + X5 + X6 + X7 + X9 + X10 + X11 
              #factor variances fixed to 1.0
              coping ~~ 1*coping
              social ~~ 1*social
              enhance ~~ 1*enhance'

#fit model
fit <- sem(new.model, data=alcohol)

summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

Chapter 6 - CFA of Multitrait-Multimethod Matrices

Contains two examples a) a Correlated Methods Model of a Multitrait-Multimethod Matrix (MTMM) and b) a Correlated Uniqueness Model of a MTMM matrix

#####################################################
# Chapter 6: CFA of Multitrait-Multimethod Matrices #
#####################################################

library(lavaan)

######################################################
# Correlated Methods Model of the MTMM Matrix, p.218 #
######################################################

#Input matrix: the correlation matrix of Table 6.2 (Lisrel syntax)
cormat <- '
1.000
0.290  1.000
0.372  0.478  1.000
0.587  0.238  0.209  1.000
0.201  0.586  0.126  0.213  1.000
0.218  0.281  0.681  0.195  0.096  1.000
0.557  0.228  0.195  0.664  0.242  0.232  1.000
0.196  0.644  0.146  0.261  0.641  0.248  0.383  1.000
0.219  0.241  0.676  0.290  0.168  0.749  0.361  0.342  1.000'


Cmat <- getCov(cormat)

neodat.sdev = c(3.61, 3.66, 3.59, 2.94, 3.03, 2.85, 2.22, 2.42, 2.04)
Dmat = diag(neodat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("PARI","SZTI","SZDI","PARC","SZTC","SZDC","PARO","SZTO","SZDO")
colnames(covmat)= rownames(covmat)

#Correlated Methods Model p.218
#model set up - this is the model presented in Figure 6.1, p.218 
new.model <- 'paranoid =~ PARI +  PARC + PARO   
              schizotypal =~ SZTI + SZTC + SZTO
              schizoid =~ SZDI + SZDC + SZDO
              inventory =~ SZTI + PARI + SZDI
              clininter =~ PARC + SZTC + SZDC
              obsrating =~ PARO + SZTO + SZDO'
            
#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=500)

#The model has NOT converged! And this is exactly what we expected. 
# Brown does not present the results of this model for the same reason.
fit


#########################################################
# Correlated Uniqueness Model of the MTMM Matrix, p.220 #
#########################################################


#Input matrix: the correlation matrix of Table 6.3 (Lisrel syntax)
cormat <- '
1.000
0.290  1.000
0.372  0.478  1.000
0.587  0.238  0.209  1.000
0.201  0.586  0.126  0.213  1.000
0.218  0.281  0.681  0.195  0.096  1.000
0.557  0.228  0.195  0.664  0.242  0.232  1.000
0.196  0.644  0.146  0.261  0.641  0.248  0.383  1.000
0.219  0.241  0.676  0.290  0.168  0.749  0.361  0.342  1.000'


Cmat <- getCov(cormat)

neodat.sdev = c(3.61, 3.66, 3.59, 2.94, 3.03, 2.85, 2.22, 2.42, 2.04)
Dmat = diag(neodat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("PARI","SZTI","SZDI","PARC","SZTC","SZDC","PARO","SZTO","SZDO")
colnames(covmat)= rownames(covmat)

#Correlated Uniqueness Model of the MTMM Matrix, p.228
#model set up - this is the model presented in Figure 6.2, p.220 
new.model <- 'paranoid =~ PARI +  PARC + PARO   
              schizotypal =~ SZTI + SZTC + SZTO
              schizoid =~ SZDI + SZDC + SZDO
              # residual correlation
              PARI ~~ SZTI
              SZTI ~~ SZDI
              PARI ~~ SZDI
              PARC ~~ SZTC
              SZTC ~~ SZDC
              PARC ~~ SZDC
              PARO ~~ SZTO
              SZTO ~~ SZDO
              PARO ~~ SZDO'
#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=500)

#results same as in Table 6.4, page 226
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

Chapter 7 - CFA with Equality Constraints, Multiple Groups, and Mean Structures

Contains examples for a) Tau Equivalent and Parallel Indicators b) Longitudinal Measurement Invariance c) Multiple Groups CFA (Depression example) d) MIMIC Models (Phobia example)

#####################################################
# Chapter 7: CFA with Equality Constraints,         #
#            Multiple Groups, and Mean Structures   #
#####################################################

library(lavaan)

#####################################
# Equality Constraints p.236 - 252  #
#####################################

#Input matrix: the correlation matrix of Figure 7.1
cormat <- '
1.000
0.661  1.000
0.630  0.643  1.000
0.270  0.300  0.268  1.000
0.297  0.265  0.225  0.805  1.000
0.290  0.287  0.248  0.796  0.779  1.000'

Cmat <- getCov(cormat)

neodat.sdev = c(2.610,  2.660,  2.590,  1.940,  2.030,  2.050)
Dmat = diag(neodat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("x1","x2","x3","x4","x5","x6")
colnames(covmat)= rownames(covmat)

############ Congeneric Model #############
#model set up - this is the model presented in Figure 7.1, p.238 
new.model <- 'auditory =~ x1 +  x2 + x3   
              visual =~ x4 + x5 + x6'            


#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=200)

#results same as in Table 7.1, page 241
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


############# Tau-equivalent Model - Auditory Memory only ############
#model set up - this is the model presented in Figure 7.1, p.238 
new.model <- 'auditory =~ v1*x1 +  v1*x2 + v1*x3
              visual =~ x4 + x5 + x6'            

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=200)

#results same as in Table 7.3, page 246 (only model fit)
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


############# Tau-equivalent Model - Auditory & Visual memory ############
#model set up - this is the model presented in Figure 7.1, p.238 
new.model <- 'auditory =~ v1*x1 +  v1*x2 + v1*x3
              visual =~ v2*x4 + v2*x5 + v2*x6'
              
#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=200)

#results same as in Table 7.3, page 246 (only model fit)
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


############# Parallel Model - Auditory Memory only ############
#model set up - this is the model presented in Figure 7.1, p.238 
new.model <- 'auditory =~ v1*x1 +  v1*x2 + v1*x3
              visual =~ v2*x4 + v2*x5 + v2*x6
              #equality constraints of error variances (Auditory)
              f1*x1 ~~ f1*x1
              f1*x2 ~~ f1*x2
              f1*x3 ~~ f1*x3'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=200)

#results same as in Table 7.3, page 246 (only model fit)
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


############# Parallel Model - Auditory & Visual memory ############
#model set up - this is the model presented in Figure 7.1, p.238 
new.model <- 'auditory =~ v1*x1 +  v1*x2 + v1*x3
              visual =~ v2*x4 + v2*x5 + v2*x6
              #equality constraints of error variances (Auditory & Visual)
              f1*x1 ~~ f1*x1
              f1*x2 ~~ f1*x2
              f1*x3 ~~ f1*x3
              f2*x4 ~~ f2*x4
              f2*x5 ~~ f2*x5
              f2*x6 ~~ f2*x6'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=200)

#results same as in Table 7.4, page 248
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

#to see the internal representation of the model (this helps understand the model definition)
inspect(fit,what="list")

########################################
# Longitudinal Invariance p.252 - 268  #
########################################

#Input matrix: the correlation matrix of Figure 7.2
cormat <- '
1.000
0.736  1.000
0.731  0.648  1.000
0.771  0.694  0.700  1.000
0.685  0.512  0.496  0.508  1.000
0.481  0.638  0.431  0.449  0.726  1.000
0.485  0.442  0.635  0.456  0.743  0.672  1.000
0.508  0.469  0.453  0.627  0.759  0.689  0.695  1.000'

Cmat <- getCov(cormat)

neodat.sdev = c(1.940,  2.030,  2.050,  1.990,  2.610,  2.660, 2.590,  2.550)
Dmat = diag(neodat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("A1","B1","C1","D1","A2","B2","C2","D2")
colnames(covmat)= rownames(covmat)

#Means: 1.500  1.320  1.450  1.410  6.600  6.420  6.560  6.310
#I couldn't find a way to bring in the indicator means, 
#so I used the meanstructure = TRUE argument in cfa call (which is not the same thing). 

##################################################################
#Results of the Equal Form Longitudinal Model of Job Satisfaction
##################################################################
#model set up - this is the model presented in Figure 7.2, p.254 
new.model <- 'SATIS1 =~ A1 + B1 + C1 + D1
              SATIS2 =~ A2 + B2 + C2 + D2
              A1 ~~ A2
              B1 ~~ B2
              C1 ~~ C2
              D1 ~~ D2'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=250, meanstructure = TRUE)

#results same as in Table 7.6, page 260 and fit same as in Table 7.7
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

##############################################################################
#Results of the Equal Factor Loadings Longitudinal Model of Job Satisfaction #
##############################################################################
#model set up - this is the model presented in Figure 7.2, p.254 
# loadings constrained to be equal using custom labels
new.model <- 'SATIS1 =~ v1*A1 +  v2*B1 + v3*C1 + v4*D1
              SATIS2 =~ v1*A2 + v2*B2 + v3*C2 + v4*D2
              A1 ~~ A2
              B1 ~~ B2
              C1 ~~ C2
              D1 ~~ D2'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=250,meanstructure = TRUE)

#Fit same as in Table 7.7
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


##############################################################################
#Equal indicator intercepts Longitudinal Model of Job Satisfaction #
##############################################################################
#model set up - this is the model presented in Figure 7.2, p.254 
# loadings constrained to be equal using custom labels
new.model <- 'SATIS1 =~ v1*A1 +  v2*B1 + v3*C1 + v4*D1
              SATIS2 =~ v1*A2 + v2*B2 + v3*C2 + v4*D2
              A1 ~~ A2
              B1 ~~ B2
              C1 ~~ C2
              D1 ~~ D2
              # intercepts constrained to be equal
              # using custom labels
              A1 ~ int2 * 1
              A2 ~ int2 * 1
              B1 ~ int3 * 1
              B2 ~ int3 * 1
              C1 ~ int4 * 1
              C2 ~ int4 * 1
              D1 ~ int5 * 1
              D2 ~ int5 * 1'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=250,meanstructure=TRUE)

#results are not the same as in Table 7.7 - no way to bring in means in lavaan (?)
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


##############################################################################
#Equal indicator error variances Longitudinal Model of Job Satisfaction #
##############################################################################
#model set up - this is the model presented in Figure 7.2, p.254
new.model <- 'SATIS1 =~ v1*A1 +  v2*B1 + v3*C1 + v4*D1
              SATIS2 =~ v1*A2 + v2*B2 + v3*C2 + v4*D2
              A1 ~~ A2
              B1 ~~ B2
              C1 ~~ C2
              D1 ~~ D2
              # intercepts constrained to be equal
              # using custom labels
              A1 ~ int2 * 1
              A2 ~ int2 * 1
              B1 ~ int3 * 1
              B2 ~ int3 * 1
              C1 ~ int4 * 1
              C2 ~ int4 * 1
              D1 ~ int5 * 1
              D2 ~ int5 * 1
              #error variances constrained to be equal
              #using custom labels
              f1*A1 ~~ f1*A1
              f1*A2 ~~ f1*A2
              f2*B1 ~~ f2*B1
              f2*B2 ~~ f2*B2
              f3*C1 ~~ f3*C1
              f3*C2 ~~ f3*C2
              f4*D1 ~~ f4*D1
              f4*D2 ~~ f4*D2'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=250,meanstructure=TRUE)

#results are not the same as in Table 7.7 - no way to bring in means in lavaan (?)
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

#to see the internal representation of the model (this helps understand the model definition)
inspect(fit,what="list")

#############################################
#### Multiple Group Analysis pp. 268-299 ####
#############################################

# read Depression example from the book's companion website
depres <- read.table("http://people.bu.edu/tabrown/Ch7/MDDALL.dat")

# assign variable names
names(depres) <- c("Sex","M1","M2","M3","M4","M5","M6","M7","M8","M9")
head(depres)

#model set up - this is the model presented in Figure 7.2, p.238 
new.model <- 'majdep =~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9
              M1 ~~ M2'

### Separate analysis of males and females to inspect model fit

#model fit for Female
fit <- cfa(new.model, data=depres[Sex==0,-1])
#results are not the same as in Table xxxx
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

#model fit for Male
fit2 <- cfa(new.model, data=depres[Sex==1,-1])
#results are not the same as in Table xxxx
summary(fit2,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

#Tests of Measurement Invariance and Population Heterogeneity

#Measurement Invariance - same model fit as in Table 7.9 
# model 1: configural invariance
fit1 <- cfa(new.model, data=depres, group="Sex")
# model 2: weak invariance
fit2 <- cfa(new.model, data=depres, group="Sex",
            group.equal="loadings")
# model 3: strong invariance
fit3 <- cfa(new.model, data=depres, group="Sex",
            group.equal=c("loadings", "intercepts"))
# model 4: equal loadings + intercepts + residuals
fit4 <- cfa(new.model, data=depres, group="Sex",
            group.equal=c("loadings", "intercepts", "residuals"))
#Population heterogeneity- # same model fit as in Table 7.9 
# model 5: equal loadings + intercepts + residuals + factor variances
fit5 <- cfa(new.model, data=depres, group="Sex",
            group.equal=c("loadings", "intercepts", "residuals","lv.variances"))
# model 6: equal loadings + intercepts + residuals + factor variances + factor means
fit6 <- cfa(new.model, data=depres, group="Sex",
            group.equal=c("loadings", "intercepts", "residuals","lv.variances","means"))

############## Alternatively for Models 1-3 and equal loadings + intercepts + means ##########
############## with chi-square tests of differences ##########
install.packages("semTools")
library(semTools)
measurementInvariance(new.model, data=depres, group="Sex", strict=FALSE)
###########################################

###################################
##### MIMIC Models pp.304-316 #####
###################################

cormat <- '
1.000
0.705   1.000
0.724   0.646   1.000
0.213   0.195   0.190   1.000
0.149   0.142   0.128   0.521   1.000
0.155   0.162   0.135   0.557   0.479   1.000'

Cmat <- getCov(cormat)

neodat.sdev = c(2.26, 2.73, 2.11, 2.32, 2.61, 2.44)
Dmat = diag(neodat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("S1","S2","S3","A1","A2","A3")
colnames(covmat)= rownames(covmat)

# Step 1. Ensure that the two-factor model of Social Phobia
# and Agoraphobia is reasonable and good fitting in the full sample (N = 730).

#model set up - this is the CFA model of the correlation matrix presented in Figure 7.5, p.308 
new.model <- 'socialphobia =~ S1 + S2 + S3   
              agoraphobia =~ A1 + A2 + A3
              socialphobia ~~ agoraphobia'            

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=730)

#results same as p. 308
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


# Step 2. MIMIC Model of Fig. 7.5, p. 308.

#Introduce sex
cormat <- '
1.000
0.705   1.000
0.724   0.646   1.000
0.213   0.195   0.190   1.000
0.149   0.142   0.128   0.521   1.000
0.155   0.162   0.135   0.557   0.479   1.000
-0.019  -0.024  -0.029  -0.110  -0.074  -0.291   1.000'


Cmat <- getCov(cormat)

neodat.sdev = c(2.26, 2.73, 2.11, 2.32, 2.61, 2.44, 0.50)
Dmat = diag(neodat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("S1","S2","S3","A1","A2","A3","sex")
colnames(covmat)= rownames(covmat)

new.model <- 'socialphobia =~ S1 + S2 + S3   
              agoraphobia =~ A1 + A2 + A3
              socialphobia ~~ agoraphobia
              #regressions
              socialphobia ~ sex
              agoraphobia ~ sex'            

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=730)

#results same as p. 315 - TABLE 7.15. Results of MIMIC Model of Social Phobia and Agoraphobia
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)


### Extra: Evaluation of measurement invariance by fixing all direct effects 
### between the covariate and the indicators to zero and then inspecting modification indices.
### pp.314-316

new.model <- 'socialphobia =~ S1 + S2 + S3   
              agoraphobia =~ A1 + A2 + A3
              socialphobia ~~ agoraphobia
              #regressions
              socialphobia ~ sex
              agoraphobia ~ sex
              #fix direct effects between sex and the indicators to zero
              A3 ~ 0*sex
              A2 ~ 0*sex
              A1 ~ 0*sex'            

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=730)

#results same as p. 315 - TABLE 7.15. Results of MIMIC Model of Social Phobia and Agoraphobia
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

Chapter 8 - Higher-Order CFA, Scale Reliability Evaluation, Formative Indicators

Contains examples for a) a Higher Order model CFA b) estimating a scale’s reliability using the semtools package c) a Formative Indicators model

################################################
# Chapter 8: Other Types of CFA Models,        #
#            Higher-Order Factor Analysis      #
#            Scale Reliability and             #
#            Formative Indicators              #
################################################

library(lavaan)

#############################################
# Higher-Order Factor Analysis p.320 - 337  #
#############################################

#Input matrix: the correlation matrix of Figure 8.1
cormat <- '
1.00
0.78  1.00
0.80  0.77  1.00
0.56  0.51  0.48  1.00
0.52  0.51  0.46  0.78  1.00
0.59  0.51  0.51  0.80  0.79  1.00
0.16  0.15  0.17  0.14  0.18  0.16  1.00
0.19  0.13  0.18  0.14  0.16  0.16  0.81  1.00
0.12  0.17  0.17  0.17  0.20  0.16  0.75  0.80  1.00
0.16  0.13  0.17  0.15  0.16  0.18  0.56  0.52  0.50  1.00
0.16  0.14  0.18  0.15  0.16  0.18  0.51  0.58  0.51  0.81  1.00
0.16  0.15  0.14  0.16  0.16  0.14  0.52  0.57  0.52  0.80  0.79  1.00'

Cmat <- getCov(cormat)

solvedat.sdev = c(1.40, 2.10, 1.30, 2.30, 2.40, 1.90, 2.00, 2.90, 2.30, 3.10, 2.20, 1.20)

Dmat = diag(solvedat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("P1","P2","P3","C1","C2","C3","E1","E2","E3","S1","S2","S3")
colnames(covmat)= rownames(covmat)

#model set up - this is the model presented in Figure 8.1, p.322 
new.model <- 'PROBSLV =~ P1 + P2 + P3   
              COGRES =~ C1 + C2 + C3
              EXPREMOT =~ E1 + E2 + E3
              SOCSPT =~ S1 + S2 + S3
              PROBFOC =~ PROBSLV + COGRES
              EMOTFOC =~ EXPREMOT + SOCSPT'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=275)

#results very close to Table 8.3, page 333 
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

###########################################
# Scale Reliability Estimation p. 337-351 #
###########################################

#Input matrix: the correlation matrix of Figure 8.1
cormat <- '
1.000
0.594  1.000
0.607  0.613  1.000
0.736  0.765  0.717  1.000
0.378  0.321  0.360  0.414  1.000
0.314  0.301  0.345  0.363  0.732  1.000
0.310  0.262  0.323  0.337  0.665  0.583  1.000
0.317  0.235  0.276  0.302  0.632  0.557  0.796  1.000'

Cmat <- getCov(cormat)

solvedat.sdev = c(1.150,  1.200,  1.570,  2.820,  1.310,  1.240,  1.330,  1.290)

Dmat = diag(solvedat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("Y1","Y2","Y3","Y4","Y5","Y6","Y7","Y8")
colnames(covmat)= rownames(covmat)

#model set up - this is the model presented in Figure 8.2, p.340 
new.model <- 'INTRUS =~ Y1 + Y2 + Y3 + Y4
              AVOID =~ Y5 + Y6 + Y7 + Y8
              Y7 ~~ Y8'

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=500)

#model fit is close but not identical to p.339 
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

#EXTRA: For scale reliability estimation you can use the reliability function
#of the semTools package
install.packages("semTools")
library(semTools)
reliability(fit) 
#OUTPUT
#       INTRUS     AVOID
#alpha  0.8399990 0.8865353
#omega  0.9193497 0.8232744
#omega2 0.9193497 0.8232744
#omega3 0.9195232 0.8234095
# the omega values equal the point estimates of scale reliability
# shown in p.344, but standard errors or confidence intervals are not calculated.

#For handmade calculations of omega and omega_h you can visit 
#https://bearspace.baylor.edu/Alex_Beaujean/R/Omega.pdf
#for an example

#######################################################
##### Models with Formative Indicators pp.351-362 #####
#######################################################

#Input matrix: the correlation matrix of Figure 8.5
cormat <- '
1.000
0.700  1.000
0.713  0.636  1.000
0.079  0.066  0.076  1.000
0.088  0.058  0.070  0.681  1.000
0.084  0.056  0.074  0.712  0.633  1.000
0.279  0.248  0.240  0.177  0.155  0.170  1.000
0.250  0.214  0.222  0.157  0.143  0.152  0.373  1.000
0.280  0.236  0.251  0.173  0.178  0.171  0.448  0.344  1.000'

Cmat <- getCov(cormat)

solvedat.sdev = c(2.5, 2.1, 3.0, 4.1, 3.9, 4.4, 1.2, 1.0, 1.2)

Dmat = diag(solvedat.sdev)
covmat = Dmat%*%Cmat%*%Dmat

# assign row and column names to the covariance matrix
rownames(covmat) = c("Y1","Y2","Y3","Y4","Y5","Y6","X1","X2","X3")
colnames(covmat)= rownames(covmat)

#Life Stress example
#model set up - this is the model presented in Figure 8.5, p.359 
new.model <- 'SATIS =~ Y1 + Y2 + Y3
OPTIM =~ Y4 + Y5 + Y6
STRESS ~ 1*X1 + X2 +X3  
STRESS =~ NA*SATIS + OPTIM
X1 ~~ X2
X1 ~~ X3
X2 ~~ X3'            

#model fit
fit <- cfa(new.model, sample.cov=covmat, sample.nobs=500)

#results same as in Table 8.9, page 360-361 (only model fit)
summary(fit,standardized=TRUE, fit.measures=TRUE, rsq=TRUE, modindices=TRUE)

#### EXTRA: Visualize this model with semPlot ####
install.packages("semPlot")
library(semPlot)
semPaths(fit, "std", title = FALSE, curvePivot = TRUE)

Chapter 9 - Data Issues in CFA: Missing, Non-normal, and Categorical Data (coming soon)

Chapter 10 - Statistical Power and Sample Size (coming soon)

Useful links:

A discussion group for the R package lavaan
Y. Rosseel’s paper on lavaan in the Journal of Statistical Software

Angelos Markos