paste0("There is a total of ", nrow(dat_raw), " eligible participants, with ", sum(dat_raw$CCA), " (", round(sum(dat_raw$CCA)/nrow(dat_raw)*100), "%) participants with no missing data at our key variables")
## [1] "There is a total of 2035 eligible participants, with 1742 (86%) participants with no missing data at our key variables"
VIM::aggr(dat_raw %>%
dplyr::select(
UCLA3_TOT, UCLA20_TOT, PHQ_TOT, GAD_TOT,
Age_dim, Gender, Financ_diff, Marital_status),
numbers = TRUE, prop = c(TRUE, FALSE),
cex.axis = 0.7)
tab1 = table1::table1(~ Age_cat +
Marital_status + Year_stud_cat+
Financ_diff_cat | Gender,
data = dat)
t1flex(tab1) %>%
save_as_docx(path="tab1_UCLA3.docx")
tab1
Man (N=47) |
Women (N=1695) |
Overall (N=1742) |
|
---|---|---|---|
Age_cat | |||
18-19 | 4 (8.5%) | 260 (15.3%) | 264 (15.2%) |
20-21 | 19 (40.4%) | 595 (35.1%) | 614 (35.2%) |
22-23 | 20 (42.6%) | 636 (37.5%) | 656 (37.7%) |
24-25 | 4 (8.5%) | 97 (5.7%) | 101 (5.8%) |
>25 | 0 (0%) | 107 (6.3%) | 107 (6.1%) |
Marital_status | |||
Couple | 15 (31.9%) | 695 (41.0%) | 710 (40.8%) |
Single | 32 (68.1%) | 1000 (59.0%) | 1032 (59.2%) |
Year_stud_cat | |||
2nd year | 8 (17.0%) | 377 (22.2%) | 385 (22.1%) |
3rd year | 8 (17.0%) | 385 (22.7%) | 393 (22.6%) |
4th year | 16 (34.0%) | 469 (27.7%) | 485 (27.8%) |
5th year | 13 (27.7%) | 458 (27.0%) | 471 (27.0%) |
Other | 2 (4.3%) | 6 (0.4%) | 8 (0.5%) |
Financ_diff_cat | |||
Little importance | 14 (29.8%) | 850 (50.1%) | 864 (49.6%) |
Moderately important | 17 (36.2%) | 345 (20.4%) | 362 (20.8%) |
No difficulties | 15 (31.9%) | 464 (27.4%) | 479 (27.5%) |
Very important | 1 (2.1%) | 36 (2.1%) | 37 (2.1%) |
dat_pairs = dat[,c(
"UCLA3_TOT", "UCLA20_TOT",
"PHQ_TOT","GAD_TOT", "Marital_status",
"Financ_diff_cat", "Age_cat")]
GGally::ggpairs(dat_pairs,
lower = list(
continuous=GGally::wrap("points",
position=position_jitter(height=1, width=1),
alpha = 0.1)))
heat = data.frame(apply(dat[,c(
"PHQ_TOT","GAD_TOT",
"Age_dim", "Financ_diff",
"Marital_status",
"UCLA3_TOT", "UCLA20_TOT")], 2, as.numeric))
## Warning in apply(dat[, c("PHQ_TOT", "GAD_TOT", "Age_dim", "Financ_diff", : NAs
## introduced by coercion
ggstatsplot::ggcorrmat(heat)
res = cor.test(~UCLA3_TOT + UCLA20_TOT, dat)
res
##
## Pearson's product-moment correlation
##
## data: UCLA3_TOT and UCLA20_TOT
## t = 38.179, df = 1740, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6487738 0.6999377
## sample estimates:
## cor
## 0.675167
alpha_UCLA3 = dat[, c("UCLA1", "UCLA2", "UCLA3")] %>%
psych::alpha()
alpha_UCLA20 = dat %>%
select(starts_with("UCLA20")) %>%
select(-UCLA20_TOT) %>%
psych::alpha()
CTT::disattenuated.cor(res$estimate,
c(alpha_UCLA3$total$raw_alpha,
alpha_UCLA20$total$raw_alpha))
## cor
## [1,] 0.7813001
library(ggExtra)
ggscatterstats(dat, UCLA3_TOT, UCLA20_TOT,
bf.message = FALSE,
ylab = "UCLA-20 scale",
xlab = "UCLA-3 scale")
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg GGally
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
UCLA-3
dat[, c("UCLA1", "UCLA2", "UCLA3")] %>%
psych::alpha()
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.8 0.8 0.74 0.58 4.1 0.0082 1.7 0.59 0.56
##
## lower alpha upper 95% confidence boundaries
## 0.79 0.8 0.82
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## UCLA1 0.81 0.81 0.68 0.68 4.2 0.0093 NA 0.68
## UCLA2 0.72 0.72 0.56 0.56 2.5 0.0136 NA 0.56
## UCLA3 0.66 0.66 0.50 0.50 2.0 0.0161 NA 0.50
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## UCLA1 1742 0.81 0.81 0.64 0.58 1.9 0.69
## UCLA2 1742 0.85 0.85 0.75 0.67 1.6 0.67
## UCLA3 1742 0.88 0.88 0.80 0.71 1.7 0.71
##
## Non missing response frequency for each item
## 1 2 3 miss
## UCLA1 0.30 0.51 0.18 0
## UCLA2 0.47 0.42 0.11 0
## UCLA3 0.43 0.42 0.15 0
om3 = dat[, c("UCLA1", "UCLA2", "UCLA3")] %>%
psych::omega(nfactors=1)
## Loading required namespace: GPArotation
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
om3$omega.tot
## [1] 0.8095527
UCLA-20
dat %>%
select(starts_with("UCLA20")) %>%
select(-UCLA20_TOT) %>%
psych::alpha()
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.93 0.93 0.94 0.41 14 0.0024 1.8 0.57 0.41
##
## lower alpha upper 95% confidence boundaries
## 0.92 0.93 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## UCLA20_1 0.93 0.93 0.94 0.41 13 0.0025 0.014 0.42
## UCLA20_2 0.93 0.93 0.94 0.41 13 0.0025 0.013 0.41
## UCLA20_3 0.93 0.93 0.94 0.42 13 0.0025 0.013 0.41
## UCLA20_4 0.93 0.93 0.94 0.42 14 0.0025 0.013 0.42
## UCLA20_5 0.93 0.93 0.94 0.41 13 0.0025 0.013 0.41
## UCLA20_6 0.92 0.93 0.94 0.41 13 0.0026 0.013 0.40
## UCLA20_7 0.93 0.93 0.94 0.43 14 0.0023 0.011 0.43
## UCLA20_8 0.93 0.93 0.94 0.42 14 0.0024 0.013 0.43
## UCLA20_9 0.93 0.93 0.94 0.43 14 0.0024 0.012 0.43
## UCLA20_10 0.93 0.93 0.94 0.41 13 0.0025 0.013 0.41
## UCLA20_11 0.92 0.93 0.94 0.41 13 0.0026 0.013 0.41
## UCLA20_12 0.93 0.93 0.94 0.41 13 0.0025 0.013 0.41
## UCLA20_13 0.92 0.93 0.94 0.41 13 0.0026 0.013 0.41
## UCLA20_14 0.92 0.93 0.94 0.40 13 0.0026 0.012 0.40
## UCLA20_15 0.93 0.93 0.94 0.41 13 0.0025 0.014 0.41
## UCLA20_16 0.92 0.93 0.94 0.41 13 0.0026 0.013 0.40
## UCLA20_17 0.92 0.93 0.94 0.40 13 0.0026 0.013 0.40
## UCLA20_18 0.92 0.93 0.94 0.40 13 0.0026 0.013 0.40
## UCLA20_19 0.93 0.93 0.94 0.41 13 0.0025 0.012 0.41
## UCLA20_20 0.93 0.93 0.94 0.41 13 0.0025 0.012 0.41
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## UCLA20_1 1742 0.62 0.63 0.60 0.57 1.9 0.79
## UCLA20_2 1742 0.64 0.62 0.60 0.58 2.3 0.97
## UCLA20_3 1742 0.62 0.62 0.59 0.56 1.6 0.89
## UCLA20_4 1742 0.59 0.58 0.55 0.53 1.9 0.97
## UCLA20_5 1742 0.70 0.71 0.69 0.66 1.5 0.77
## UCLA20_6 1742 0.72 0.73 0.72 0.69 1.7 0.79
## UCLA20_7 1742 0.46 0.44 0.39 0.38 2.0 1.04
## UCLA20_8 1742 0.53 0.53 0.49 0.48 2.1 0.88
## UCLA20_9 1742 0.48 0.48 0.44 0.42 1.8 0.83
## UCLA20_10 1742 0.68 0.70 0.68 0.65 1.3 0.60
## UCLA20_11 1742 0.73 0.72 0.71 0.69 2.0 0.88
## UCLA20_12 1742 0.69 0.68 0.66 0.64 2.0 0.93
## UCLA20_13 1742 0.72 0.71 0.69 0.67 2.1 1.07
## UCLA20_14 1742 0.79 0.78 0.78 0.76 2.0 0.94
## UCLA20_15 1742 0.67 0.67 0.65 0.62 1.6 0.78
## UCLA20_16 1742 0.73 0.74 0.73 0.69 1.6 0.77
## UCLA20_17 1742 0.78 0.76 0.76 0.74 1.9 0.99
## UCLA20_18 1742 0.76 0.75 0.74 0.72 2.1 0.97
## UCLA20_19 1742 0.68 0.71 0.71 0.65 1.3 0.60
## UCLA20_20 1742 0.70 0.72 0.72 0.66 1.3 0.63
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## UCLA20_1 0.37 0.42 0.18 0.02 0
## UCLA20_2 0.25 0.32 0.32 0.12 0
## UCLA20_3 0.61 0.22 0.12 0.05 0
## UCLA20_4 0.47 0.26 0.20 0.07 0
## UCLA20_5 0.67 0.22 0.08 0.03 0
## UCLA20_6 0.51 0.33 0.14 0.02 0
## UCLA20_7 0.41 0.27 0.21 0.12 0
## UCLA20_8 0.29 0.40 0.25 0.06 0
## UCLA20_9 0.44 0.39 0.13 0.04 0
## UCLA20_10 0.72 0.23 0.04 0.01 0
## UCLA20_11 0.36 0.38 0.21 0.05 0
## UCLA20_12 0.37 0.35 0.20 0.08 0
## UCLA20_13 0.37 0.26 0.23 0.14 0
## UCLA20_14 0.38 0.33 0.22 0.07 0
## UCLA20_15 0.56 0.30 0.12 0.02 0
## UCLA20_16 0.56 0.31 0.11 0.02 0
## UCLA20_17 0.48 0.26 0.18 0.08 0
## UCLA20_18 0.35 0.32 0.24 0.09 0
## UCLA20_19 0.75 0.19 0.05 0.01 0
## UCLA20_20 0.73 0.20 0.06 0.01 0
om20 = dat %>%
select(starts_with("UCLA20")) %>%
select(-UCLA20_TOT) %>%
psych::omega(nfactors=1)
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
om20$omega.tot
## [1] 0.9345274
Comparison
cocron(list(dat[, c("UCLA1", "UCLA2", "UCLA3")],
dat %>%
select(starts_with("UCLA20")) %>%
select(-UCLA20_TOT)),
dep=TRUE,
standardized=TRUE)
##
## Compare n alpha coefficients
##
## Comparison between: a1 = 0.8033, a2 = 0.9332
## The coefficients are based on dependent groups
## 95% confidence intervals: CI1 = 0.7867 0.8187, CI2 = 0.9286 0.9377
## Group sizes: n1 = 1742, n2 = 1742
## Item count: i1 = 3, i2 = 20
## Null hypothesis: a1 and a2 are equal
## Alternative hypothesis: a1 and a2 are not equal
## Level of significance: 0.05
##
## chisq = 571.6193, df = 1, p-value = 0.0000
## Null hypothesis rejected
PHQ-9
alpha_PHQ = dat %>%
select(starts_with("PHQ")) %>%
select(-c(PHQ_TOT, PHQ9_10)) %>%
psych::alpha()
alpha_PHQ
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.85 0.39 5.8 0.0052 2.1 0.6 0.38
##
## lower alpha upper 95% confidence boundaries
## 0.84 0.85 0.86
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## PHQ9_1 0.83 0.83 0.82 0.38 5.0 0.0059 0.0065 0.37
## PHQ9_2 0.82 0.83 0.82 0.37 4.7 0.0061 0.0053 0.36
## PHQ9_3 0.83 0.84 0.83 0.39 5.1 0.0059 0.0063 0.38
## PHQ9_4 0.83 0.83 0.82 0.38 5.0 0.0060 0.0061 0.37
## PHQ9_5 0.83 0.84 0.83 0.39 5.1 0.0059 0.0073 0.37
## PHQ9_6 0.83 0.83 0.83 0.38 5.0 0.0059 0.0072 0.37
## PHQ9_7 0.84 0.84 0.83 0.40 5.2 0.0057 0.0078 0.39
## PHQ9_8 0.84 0.84 0.84 0.40 5.4 0.0056 0.0077 0.41
## PHQ9_9 0.85 0.85 0.84 0.41 5.6 0.0055 0.0053 0.40
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## PHQ9_1 1742 0.70 0.71 0.66 0.60 1.9 0.83
## PHQ9_2 1742 0.75 0.76 0.74 0.67 2.1 0.85
## PHQ9_3 1742 0.70 0.68 0.63 0.59 2.4 1.03
## PHQ9_4 1742 0.72 0.71 0.67 0.63 2.9 0.90
## PHQ9_5 1742 0.71 0.68 0.63 0.59 2.2 1.05
## PHQ9_6 1742 0.71 0.71 0.66 0.60 2.3 1.01
## PHQ9_7 1742 0.67 0.65 0.59 0.54 2.1 0.99
## PHQ9_8 1742 0.59 0.61 0.53 0.49 1.4 0.73
## PHQ9_9 1742 0.53 0.58 0.50 0.45 1.2 0.55
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## PHQ9_1 0.34 0.49 0.11 0.06 0
## PHQ9_2 0.24 0.52 0.16 0.08 0
## PHQ9_3 0.21 0.37 0.22 0.20 0
## PHQ9_4 0.05 0.34 0.31 0.30 0
## PHQ9_5 0.31 0.32 0.21 0.16 0
## PHQ9_6 0.25 0.39 0.20 0.16 0
## PHQ9_7 0.31 0.37 0.19 0.12 0
## PHQ9_8 0.73 0.18 0.06 0.03 0
## PHQ9_9 0.83 0.13 0.03 0.01 0
GAD-7
alpha_GAD = dat %>%
select(starts_with("GAD")) %>%
select(-c(GAD_TOT))%>%
psych::alpha()
alpha_GAD
##
## Reliability analysis
## Call: psych::alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.9 0.9 0.9 0.56 9 0.0035 2.2 0.76 0.53
##
## lower alpha upper 95% confidence boundaries
## 0.9 0.9 0.91
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## GAD7_1 0.88 0.88 0.87 0.55 7.4 0.0043 0.0125 0.53
## GAD7_2 0.87 0.87 0.86 0.53 6.8 0.0046 0.0092 0.50
## GAD7_3 0.87 0.87 0.86 0.53 6.9 0.0046 0.0098 0.50
## GAD7_4 0.88 0.88 0.87 0.55 7.2 0.0043 0.0156 0.50
## GAD7_5 0.90 0.90 0.90 0.61 9.2 0.0036 0.0130 0.57
## GAD7_6 0.90 0.89 0.89 0.59 8.5 0.0037 0.0175 0.57
## GAD7_7 0.90 0.89 0.89 0.59 8.5 0.0037 0.0173 0.53
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## GAD7_1 1742 0.83 0.82 0.80 0.76 2.7 0.96
## GAD7_2 1742 0.88 0.87 0.88 0.82 2.3 1.04
## GAD7_3 1742 0.88 0.87 0.87 0.82 2.3 1.04
## GAD7_4 1742 0.84 0.83 0.81 0.77 2.4 0.96
## GAD7_5 1742 0.66 0.68 0.59 0.56 1.5 0.81
## GAD7_6 1742 0.73 0.73 0.65 0.62 2.3 0.95
## GAD7_7 1742 0.73 0.73 0.66 0.63 1.8 0.96
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## GAD7_1 0.09 0.40 0.26 0.25 0
## GAD7_2 0.25 0.34 0.23 0.17 0
## GAD7_3 0.26 0.34 0.22 0.17 0
## GAD7_4 0.16 0.42 0.24 0.18 0
## GAD7_5 0.63 0.24 0.09 0.04 0
## GAD7_6 0.20 0.44 0.21 0.14 0
## GAD7_7 0.50 0.29 0.13 0.08 0
dat_single = subset(dat, Marital_status == "Single")
dat_couple = subset(dat, Marital_status != "Single")
metaConvert::es_from_means_sd(
mean_exp = mean(dat_single$UCLA3_TOT),
mean_sd_exp = sd(dat_single$UCLA3_TOT),
n_exp = nrow(dat_single),
mean_nexp = mean(dat_couple$UCLA3_TOT),
mean_sd_nexp = sd(dat_couple$UCLA3_TOT),
n_nexp = nrow(dat_couple)
)
## d d_se d_ci_lo d_ci_up g g_se g_ci_lo
## 1 0.16039 0.04883469 0.06460913 0.2561708 0.1603208 0.04881363 0.06458127
## g_ci_up r r_se r_ci_lo r_ci_up z z_se
## 1 0.2560604 0.09951305 0.03010088 0.0402962 0.1581794 0.07882024 0.02396628
## z_ci_lo z_ci_up logor logor_se logor_ci_lo logor_ci_up info_used
## 1 0.0318472 0.1257933 0.2909152 0.08857632 0.1173088 0.4645217 means_sd
## md md_se md_ci_lo md_ci_up
## 1 0.2811524 0.08535987 0.1137337 0.4485711
metaConvert::es_from_means_sd(
mean_exp = mean(dat_single$UCLA20_TOT),
mean_sd_exp = sd(dat_single$UCLA20_TOT),
n_exp = nrow(dat_single),
mean_nexp = mean(dat_couple$UCLA20_TOT),
mean_sd_nexp = sd(dat_couple$UCLA20_TOT),
n_nexp = nrow(dat_couple)
)
## d d_se d_ci_lo d_ci_up g g_se g_ci_lo
## 1 0.1303699 0.04880903 0.03463932 0.2261004 0.1303137 0.04878799 0.03462439
## g_ci_up r r_se r_ci_lo r_ci_up z z_se
## 1 0.2260029 0.08097218 0.03018083 0.0216494 0.1398458 0.06407796 0.02396628
## z_ci_lo z_ci_up logor logor_se logor_ci_lo logor_ci_up info_used
## 1 0.01710491 0.111051 0.2364648 0.08852979 0.06294957 0.40998 means_sd
## md md_se md_ci_lo md_ci_up
## 1 1.47164 0.5437662 0.4051359 2.538144
dat_long = dat %>%
pivot_longer(cols = c(UCLA3_TOT, UCLA20_TOT),
names_to = "scale",
values_to = "values")
grouped_ggbetweenstats(
bf.message = FALSE,
data = dat_long,
x = Marital_status,
y = values,
grouping.var = scale,
pairwise.comparisons = FALSE,
results.subtitle = FALSE
)
lm_phq = lm(PHQ_TOT ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_phq <- calc.yhat(lm_phq)
boot.out.phq <- boot(dat,
boot.yhat, n_sim,
lmOut=lm_phq,
regrout0=regrOut_phq)
result_phq <- booteval.yhat(regrOut_phq, boot.out.phq,
bty="perc")
result_phq$combCIpm
## b Beta r
## UCLA3_TOT 1.099(0.915,1.282) 0.356(0.298,0.414) 0.524(0.486,0.561)
## UCLA20_TOT 0.119(0.091,0.147) 0.248(0.189,0.307) 0.489(0.449,0.527)
## rs rs2 Unique
## UCLA3_TOT 0.944(0.914,0.968) 0.891(0.836,0.937) 0.069(0.048,0.094)
## UCLA20_TOT 0.881(0.838,0.917) 0.776(0.702,0.841) 0.034(0.019,0.051)
## Common CD:0 CD:1
## UCLA3_TOT 0.205(0.176,0.236) 0.275(0.236,0.315) 0.069(0.048,0.094)
## UCLA20_TOT 0.205(0.176,0.236) 0.239(0.201,0.278) 0.034(0.019,0.051)
## GenDom Pratt RLW
## UCLA3_TOT 0.172(0.145,0.201) 0.187(0.148,0.229) 0.172(0.145,0.201)
## UCLA20_TOT 0.136(0.112,0.162) 0.121(0.087,0.159) 0.136(0.112,0.162)
result_phq$combCIpmDiff
## b Beta
## UCLA3_TOT-UCLA20_TOT 0.980(0.778,1.184)* 0.108(-0.002,0.220)
## r rs
## UCLA3_TOT-UCLA20_TOT 0.035(-0.001,0.072) 0.063(-0.001,0.128)
## rs2 Unique
## UCLA3_TOT-UCLA20_TOT 0.115(-0.002,0.232) 0.035(-0.001,0.073)
## Common CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* 0.036(-0.001,0.073)
## CD:1 GenDom
## UCLA3_TOT-UCLA20_TOT 0.035(-0.001,0.073) 0.036(-0.001,0.073)
## Pratt RLW
## UCLA3_TOT-UCLA20_TOT 0.066(-0.001,0.133) 0.036(-0.001,0.073)
## var_tot var_com var_uniq
## [1,] "UCLA-3" "27.4576" "74.6605675659926" "25.1296544490414"
## [2,] "UCLA-20" "23.9121" "85.7306551913048" "14.2187428122164"
resUCLA3_UCLA20 = cor.test(~UCLA3_TOT + UCLA20_TOT, dat)
resPHQ_UCLA3 = cor.test(~UCLA3_TOT + PHQ_TOT, dat)
resPHQ_UCLA20 = cor.test(~UCLA20_TOT + PHQ_TOT, dat)
PHQ_UCLA3 = CTT::disattenuated.cor(resPHQ_UCLA3$estimate,
c(alpha_UCLA3$total$raw_alpha,
alpha_PHQ$total$raw_alpha))
PHQ_UCLA20 = CTT::disattenuated.cor(resPHQ_UCLA20$estimate,
c(alpha_UCLA20$total$raw_alpha,
alpha_PHQ$total$raw_alpha))
UCLA3_UCLA20 = CTT::disattenuated.cor(resUCLA3_UCLA20$estimate,
c(alpha_UCLA3$total$raw_alpha,
alpha_UCLA20$total$raw_alpha))
PHQ_UCLA3; PHQ_UCLA20
## cor
## [1,] 0.6343675
## cor
## [1,] 0.5502963
cocor::cocor.dep.groups.overlap(
r.jk = as.numeric(PHQ_UCLA3),
r.jh = as.numeric(PHQ_UCLA20),
r.kh = as.numeric(UCLA3_UCLA20),
n = nrow(dat),
var.labels=c("Anxiety",
"UCLA3",
"UCLA20"))
##
## Results of a comparison of two overlapping correlations based on dependent groups
##
## Comparison between r.jk (Anxiety, UCLA3) = 0.6344 and r.jh (Anxiety, UCLA20) = 0.5503
## Difference: r.jk - r.jh = 0.0841
## Related correlation: r.kh = 0.7813
## Data: j = Anxiety, k = UCLA3, h = UCLA20
## Group size: n = 1742
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
##
## pearson1898: Pearson and Filon's z (1898)
## z = 6.7234, p-value = 0.0000
## Null hypothesis rejected
##
## hotelling1940: Hotelling's t (1940)
## t = 6.9019, df = 1739, p-value = 0.0000
## Null hypothesis rejected
##
## williams1959: Williams' t (1959)
## t = 6.8745, df = 1739, p-value = 0.0000
## Null hypothesis rejected
##
## olkin1967: Olkin's z (1967)
## z = 6.7234, p-value = 0.0000
## Null hypothesis rejected
##
## dunn1969: Dunn and Clark's z (1969)
## z = 6.8242, p-value = 0.0000
## Null hypothesis rejected
##
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
## t = 6.9019, df = 1739, p-value = 0.0000
## Null hypothesis rejected
##
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
## z = 6.8025, p-value = 0.0000
## Null hypothesis rejected
##
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
## z = 6.7949, p-value = 0.0000
## Null hypothesis rejected
## 95% confidence interval for r.jk - r.jh: 0.0924 0.1673
## Null hypothesis rejected (Interval does not include 0)
##
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
## z = 6.7942, p-value = 0.0000
## Null hypothesis rejected
##
## zou2007: Zou's (2007) confidence interval
## 95% confidence interval for r.jk - r.jh: 0.0598 0.1089
## Null hypothesis rejected (Interval does not include 0)
lm_gad = lm(GAD_TOT ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_gad <- calc.yhat(lm_gad)
boot.out_gad <- boot(dat,
boot.yhat, n_sim,
lmOut=lm_gad,
regrout0=regrOut_gad)
result_gad <- booteval.yhat(regrOut_gad, boot.out_gad, bty="perc")
result_gad$combCIpm
## b Beta r
## UCLA3_TOT 0.906(0.729,1.086) 0.298(0.240,0.356) 0.434(0.392,0.473)
## UCLA20_TOT 0.095(0.067,0.122) 0.200(0.142,0.259) 0.402(0.360,0.442)
## rs rs2 Unique
## UCLA3_TOT 0.947(0.912,0.973) 0.896(0.831,0.946) 0.048(0.031,0.069)
## UCLA20_TOT 0.877(0.825,0.920) 0.769(0.681,0.846) 0.022(0.011,0.036)
## Common CD:0 CD:1
## UCLA3_TOT 0.140(0.114,0.166) 0.188(0.154,0.223) 0.048(0.031,0.069)
## UCLA20_TOT 0.140(0.114,0.166) 0.161(0.130,0.195) 0.022(0.011,0.036)
## GenDom Pratt RLW
## UCLA3_TOT 0.118(0.094,0.144) 0.129(0.096,0.166) 0.118(0.094,0.144)
## UCLA20_TOT 0.092(0.072,0.114) 0.080(0.053,0.112) 0.092(0.072,0.114)
result_gad$combCIpmDiff
## b Beta
## UCLA3_TOT-UCLA20_TOT 0.811(0.615,1.012)* 0.098(-0.010,0.206)
## r rs
## UCLA3_TOT-UCLA20_TOT 0.032(-0.003,0.067) 0.070(-0.007,0.146)
## rs2 Unique
## UCLA3_TOT-UCLA20_TOT 0.127(-0.014,0.263) 0.026(-0.003,0.057)
## Common CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* 0.027(-0.003,0.057)
## CD:1 GenDom
## UCLA3_TOT-UCLA20_TOT 0.026(-0.003,0.057) 0.026(-0.003,0.057)
## Pratt RLW
## UCLA3_TOT-UCLA20_TOT 0.049(-0.005,0.103) 0.026(-0.003,0.057)
resUCLA3_UCLA20 = cor.test(~UCLA3_TOT + UCLA20_TOT, dat)
resGAD_UCLA3 = cor.test(~UCLA3_TOT + GAD_TOT, dat)
resGAD_UCLA20 = cor.test(~UCLA20_TOT + GAD_TOT, dat)
GAD_UCLA3 = CTT::disattenuated.cor(resGAD_UCLA3$estimate,
c(alpha_UCLA3$total$raw_alpha,
alpha_GAD$total$raw_alpha))
GAD_UCLA20 = CTT::disattenuated.cor(resGAD_UCLA20$estimate,
c(alpha_UCLA20$total$raw_alpha,
alpha_GAD$total$raw_alpha))
UCLA3_UCLA20 = CTT::disattenuated.cor(resUCLA3_UCLA20$estimate,
c(alpha_UCLA3$total$raw_alpha,
alpha_UCLA20$total$raw_alpha))
GAD_UCLA3; GAD_UCLA20
## cor
## [1,] 0.5092153
## cor
## [1,] 0.4387058
cocor::cocor.dep.groups.overlap(
r.jk = as.numeric(GAD_UCLA3),
r.jh = as.numeric(GAD_UCLA20),
r.kh = as.numeric(UCLA3_UCLA20),
n = nrow(dat),
var.labels=c("Anxiety",
"UCLA3",
"UCLA20"))
##
## Results of a comparison of two overlapping correlations based on dependent groups
##
## Comparison between r.jk (Anxiety, UCLA3) = 0.5092 and r.jh (Anxiety, UCLA20) = 0.4387
## Difference: r.jk - r.jh = 0.0705
## Related correlation: r.kh = 0.7813
## Data: j = Anxiety, k = UCLA3, h = UCLA20
## Group size: n = 1742
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
##
## pearson1898: Pearson and Filon's z (1898)
## z = 5.1215, p-value = 0.0000
## Null hypothesis rejected
##
## hotelling1940: Hotelling's t (1940)
## t = 5.1808, df = 1739, p-value = 0.0000
## Null hypothesis rejected
##
## williams1959: Williams' t (1959)
## t = 5.1702, df = 1739, p-value = 0.0000
## Null hypothesis rejected
##
## olkin1967: Olkin's z (1967)
## z = 5.1215, p-value = 0.0000
## Null hypothesis rejected
##
## dunn1969: Dunn and Clark's z (1969)
## z = 5.1449, p-value = 0.0000
## Null hypothesis rejected
##
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
## t = 5.1808, df = 1739, p-value = 0.0000
## Null hypothesis rejected
##
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
## z = 5.1369, p-value = 0.0000
## Null hypothesis rejected
##
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
## z = 5.1334, p-value = 0.0000
## Null hypothesis rejected
## 95% confidence interval for r.jk - r.jh: 0.0563 0.1258
## Null hypothesis rejected (Interval does not include 0)
##
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
## z = 5.1348, p-value = 0.0000
## Null hypothesis rejected
##
## zou2007: Zou's (2007) confidence interval
## 95% confidence interval for r.jk - r.jh: 0.0436 0.0977
## Null hypothesis rejected (Interval does not include 0)
cols = c("Unique", "Common", "CD.0")
res_plot1 = data.frame(regrOut_phq$PredictorMetrics[-nrow(regrOut_phq$PredictorMetrics),])[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot1.lo = data.frame(result_phq$lowerCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot1.lo) = paste0(names(res_plot1.lo), "_ci_lo")
res_plot1.up = data.frame(result_phq$upperCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance")%>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot1.up) = paste0(names(res_plot1.up), "_ci_up")
res_plot1 = cbind(res_plot1, res_plot1.lo, res_plot1.up)
res_plot1$variance[res_plot1$variance =="CD.0"] <- "Total\n(unique+common)"
res_plot2 = data.frame(regrOut_gad$PredictorMetrics[-nrow(regrOut_gad$PredictorMetrics),])[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot2.lo = data.frame(result_gad$lowerCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot2.lo) = paste0(names(res_plot2.lo), "_ci_lo")
res_plot2.up = data.frame(result_gad$upperCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance")%>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot2.up) = paste0(names(res_plot2.up), "_ci_up")
res_plot2 = cbind(res_plot2, res_plot2.lo, res_plot2.up)
res_plot2$variance[res_plot2$variance =="CD.0"] <- "Total\n(unique+common)"
res_plot1$Measure = "PHQ-9"
res_plot2$Measure = "GAD-7"
res_plotA = rbind(res_plot1, res_plot2)
ggplot(res_plotA, aes(x=variance, y = values, color = Measure)) +
geom_point() +
facet_wrap(~scale) +
theme_bw() +
geom_errorbar(aes(ymin=values_ci_lo, ymax=values_ci_up), width=.2)
lm_age = lm(Age_dim ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_age <- calc.yhat(lm_age)
boot.out.age <- boot(dat,
boot.yhat, n_sim,
lmOut=lm_age,
regrout0=regrOut_age)
result_age <- booteval.yhat(regrOut_age,
boot.out.age, bty="perc")
result_age$combCIpm
## b Beta r
## UCLA3_TOT 0.030(-0.006,0.066) 0.052(-0.011,0.114) 0.057(0.009,0.106)
## UCLA20_TOT 0.001(-0.005,0.006) 0.007(-0.053,0.071) 0.043(-0.006,0.091)
## rs rs2 Unique
## UCLA3_TOT 0.995(0.364,1.000) 0.991(0.171,1.000) 0.001(0.000,0.007)
## UCLA20_TOT 0.743(-0.184,0.998) 0.551(0.008,0.995) 0.000(0.000,0.003)
## Common CD:0 CD:1
## UCLA3_TOT 0.002(-0.001,0.007) 0.003(0.000,0.011) 0.001(0.000,0.007)
## UCLA20_TOT 0.002(-0.001,0.007) 0.002(0.000,0.008) 0.000(0.000,0.003)
## GenDom Pratt RLW
## UCLA3_TOT 0.002(0.000,0.009) 0.003(0.000,0.011) 0.002(0.000,0.009)
## UCLA20_TOT 0.001(0.000,0.005) 0.000(-0.001,0.006) 0.001(0.000,0.005)
result_age$combCIpmDiff
## b Beta
## UCLA3_TOT-UCLA20_TOT 0.029(-0.011,0.069) 0.045(-0.070,0.155)
## r rs
## UCLA3_TOT-UCLA20_TOT 0.014(-0.022,0.051) 0.252(-0.446,0.763)
## rs2 Unique
## UCLA3_TOT-UCLA20_TOT 0.440(-0.649,0.741) 0.001(-0.002,0.006)
## Common CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* 0.001(-0.002,0.006)
## CD:1 GenDom
## UCLA3_TOT-UCLA20_TOT 0.001(-0.002,0.006) 0.001(-0.002,0.006)
## Pratt RLW
## UCLA3_TOT-UCLA20_TOT 0.003(-0.004,0.012) 0.001(-0.002,0.006)
lm_fdiff = lm(Financ_diff ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_fdiff <- calc.yhat(lm_fdiff)
boot.out.fdiff <- boot(dat,
boot.yhat, n_sim,
lmOut=lm_fdiff,
regrout0=regrOut_fdiff)
result_fdiff <- booteval.yhat(regrOut_fdiff, boot.out.fdiff, bty="perc")
result_fdiff$combCIpm
## b Beta r
## UCLA3_TOT 0.034(0.005,0.062) 0.079(0.013,0.146) 0.197(0.149,0.244)
## UCLA20_TOT 0.012(0.007,0.016) 0.175(0.109,0.240) 0.228(0.180,0.274)
## rs rs2 Unique
## UCLA3_TOT 0.836(0.702,0.933) 0.700(0.493,0.871) 0.003(0.000,0.012)
## UCLA20_TOT 0.969(0.895,0.999) 0.939(0.801,0.998) 0.017(0.006,0.031)
## Common CD:0 CD:1
## UCLA3_TOT 0.035(0.021,0.051) 0.039(0.022,0.059) 0.003(0.000,0.012)
## UCLA20_TOT 0.035(0.021,0.051) 0.052(0.033,0.075) 0.017(0.006,0.031)
## GenDom Pratt RLW
## UCLA3_TOT 0.021(0.011,0.034) 0.015(0.002,0.034) 0.021(0.011,0.034)
## UCLA20_TOT 0.034(0.020,0.052) 0.040(0.021,0.064) 0.034(0.020,0.052)
result_fdiff$combCIpmDiff
## b Beta
## UCLA3_TOT-UCLA20_TOT 0.022(-0.009,0.054) -0.096(-0.218,0.025)
## r rs
## UCLA3_TOT-UCLA20_TOT -0.031(-0.071,0.009) -0.133(-0.296,0.037)
## rs2 Unique
## UCLA3_TOT-UCLA20_TOT -0.239(-0.504,0.068) -0.014(-0.030,0.004)
## Common CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* -0.013(-0.030,0.004)
## CD:1 GenDom
## UCLA3_TOT-UCLA20_TOT -0.014(-0.030,0.004) -0.013(-0.030,0.004)
## Pratt RLW
## UCLA3_TOT-UCLA20_TOT -0.025(-0.056,0.007) -0.013(-0.030,0.004)
res_plot3 = data.frame(regrOut_age$PredictorMetrics[-nrow(regrOut_age$PredictorMetrics),])[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot3.lo = data.frame(result_age$lowerCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot3.lo) = paste0(names(res_plot3.lo), "_ci_lo")
res_plot3.up = data.frame(result_age$upperCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance")%>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot3.up) = paste0(names(res_plot3.up), "_ci_up")
res_plot3 = cbind(res_plot3, res_plot3.lo, res_plot3.up)
res_plot3$variance[res_plot3$variance =="CD.0"] <- "Total\n(unique+common)"
res_plot4 = data.frame(regrOut_fdiff$PredictorMetrics[-nrow(regrOut_fdiff$PredictorMetrics),])[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot4.lo = data.frame(result_fdiff$lowerCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance") %>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot4.lo) = paste0(names(res_plot4.lo), "_ci_lo")
res_plot4.up = data.frame(result_fdiff$upperCIpm)[,cols] %>%
pivot_longer(cols=everything(),
values_to="values",
names_to="variance")%>%
mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot4.up) = paste0(names(res_plot4.up), "_ci_up")
res_plot4 = cbind(res_plot4, res_plot4.lo, res_plot4.up)
res_plot4$variance[res_plot4$variance =="CD.0"] <- "Total\n(unique+common)"
res_plot3$Measure = "Age"
res_plot4$Measure = "Financial"
res_plotB = rbind(res_plot3, res_plot4)
ggplot(res_plotB, aes(x=variance, y = values, color = Measure)) +
geom_point() +
facet_wrap(~scale) +
theme_bw() +
geom_errorbar(aes(ymin=values_ci_lo, ymax=values_ci_up), width=.2)
predictors = c("UCLA3_TOT", "UCLA20_TOT")
outcomes = c("PHQ_TOT","GAD_TOT",
"Age_dim", "Financ_diff")
res = NULL
for (pred in predictors) {
for (out in outcomes) {
res_cor = NA
res_cor = cbind(pred, out,
broom::tidy(
cor.test(~ dat[, pred] + dat[, out])))
res = rbind(res, res_cor)
}
}
res$se = (res$conf.high - res$conf.low) / qnorm(.975)
# res$name = paste0(res$pred, "_", res$out)
res$group = ifelse(res$out %in% c("PHQ_TOT", "GAD_TOT"), "Convergent validity", "Discriminant validity")
resplot = res
# resplot = dplyr::bind_rows(res, rb)
# resplot = data.frame(resplot[,intersect(names(res), names(rb))])
resplot$name = resplot$out
forestplot(
resplot,
colour = pred,
estimate = estimate,
se = se,
xlab = "Association of the UCLA-3/UCLA-20\n with various variables") +
ggforce::facet_col(
facets = ~group,
scales = "free_y",
space = "free"
)
res_plotC = rbind(res_plot1, res_plot2, res_plot3, res_plot4)
ggplot(res_plotC, aes(x=variance, y = values,
color = Measure)) +
geom_point() +
facet_wrap(~scale) +
theme_bw() +
geom_errorbar(aes(ymin=values_ci_lo, ymax=values_ci_up), width=0) +
scale_y_continuous(labels = scales::percent) +
labs(x = "", y = "Percentage of variance explained")
dat$ID = 1:nrow(dat)
dat$UCLA20_DIAG_38 = ifelse(dat$UCLA20_TOT > 38,
1, 0)
dat$UCLA20_DIAG_38txt = ifelse(dat$UCLA20_TOT > 38,
"yes", "no")
dat$UCLA20_DIAG_42 = ifelse(dat$UCLA20_TOT > 42,
1, 0)
dat$UCLA20_DIAG_42txt = ifelse(dat$UCLA20_TOT > 42,
"yes", "no")
dat$UCLA20_DIAG_52 = ifelse(dat$UCLA20_TOT > 52,
1, 0)
dat$UCLA20_DIAG_52txt = ifelse(dat$UCLA20_TOT > 52,
"yes", "no")
dat$UCLA3_DIAG7 = ifelse(dat$UCLA3_TOT > 6, 1, 0)
dat$UCLA3_DIAG6 = ifelse(dat$UCLA3_TOT > 5, 1, 0)
dat$UCLA3_DIAG_SEV = apply(dat[, c("UCLA1", "UCLA2", "UCLA3")], 1, function(x) ifelse(
length(x[x==3]) > 0, 1, 0))
suffixes <- c("42", "38", "52")
# Loop through the suffixes and create the objects
for (suffix in suffixes) {
assign(paste0("SENS_6_", suffix),
round(with(subset(dat,
dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "yes"),
sum(UCLA3_DIAG6)/length(UCLA3_DIAG6)), 4)*100)
assign(paste0("SPE_6_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "no"), 1 - sum(UCLA3_DIAG6)/length(UCLA3_DIAG6)), 4)*100)
assign(paste0("SENS_7_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "yes"), sum(UCLA3_DIAG7)/length(UCLA3_DIAG7)), 4)*100)
assign(paste0("SPE_7_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "no"), 1 - sum(UCLA3_DIAG7)/length(UCLA3_DIAG7)), 4)*100)
assign(paste0("SENS_SEV_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "yes"), sum(UCLA3_DIAG_SEV)/length(UCLA3_DIAG_SEV)), 4)*100)
assign(paste0("SPE_SEV_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "no"), 1 - sum(UCLA3_DIAG_SEV)/length(UCLA3_DIAG_SEV)), 4)*100)
assign(paste0("dat", suffix),
rbind(mget(paste0("SENS_6_", suffix))[[1]],
mget(paste0("SPE_6_", suffix))[[1]],
mget(paste0("SENS_7_", suffix))[[1]],
mget(paste0("SPE_7_", suffix))[[1]],
mget(paste0("SENS_SEV_", suffix))[[1]],
mget(paste0("SPE_SEV_", suffix))[[1]]))
}
dat_prev = data.frame(rbind(dat38, dat42, dat52))
dat_prev$type = rep(c("Sensitivity", "Specificity"), nrow(dat_prev)/2)
dat_prev$co_LS20 = rep(c("UCLA-LS-20 > 38", "UCLA-LS-20 > 42",
"UCLA-LS-20 > 52"), each=nrow(dat_prev)/3)
dat_prev$co_LS3 = rep(c("UCLA-LS-3 >= 6", "UCLA-LS-3 >= 7", ">=1 severe UCLA-LS-3"), each=2, times = 3)
names(dat_prev)[1] = "value"
dat_prev$value = dat_prev$value/100
dat_prev_p = dat_prev
dat_prev_p$value = NA
dat_prev_p$type = rep(c("Prevalence UCLA-LS-20",
"Prevalence UCLA-LS-3"), times = 9)
meanR = function(x) {
round(mean(x), digits=4) * 100
}
PRE_UCLA3_DIAG6 = meanR(dat$UCLA3_DIAG6)
PRE_UCLA3_DIAG7 = meanR(dat$UCLA3_DIAG7)
PRE_UCLA3_DIAG_SEV = meanR(dat$UCLA3_DIAG_SEV)
PRE_UCLA20_DIAG38 = meanR(dat$UCLA20_DIAG_38)
PRE_UCLA20_DIAG42 = meanR(dat$UCLA20_DIAG_42)
PRE_UCLA20_DIAG52 = meanR(dat$UCLA20_DIAG_52)
dat_prev_p$value = value = c(
PRE_UCLA20_DIAG38, PRE_UCLA3_DIAG6,
PRE_UCLA20_DIAG38, PRE_UCLA3_DIAG7,
PRE_UCLA20_DIAG38, PRE_UCLA3_DIAG_SEV,
PRE_UCLA20_DIAG42, PRE_UCLA3_DIAG6,
PRE_UCLA20_DIAG42, PRE_UCLA3_DIAG7,
PRE_UCLA20_DIAG42, PRE_UCLA3_DIAG_SEV,
PRE_UCLA20_DIAG52, PRE_UCLA3_DIAG6,
PRE_UCLA20_DIAG52, PRE_UCLA3_DIAG7,
PRE_UCLA20_DIAG52, PRE_UCLA3_DIAG_SEV)
dat_prev_p$value = dat_prev_p$value/100
dat_P = dplyr::bind_rows(dat_prev, dat_prev_p)
ggplot(dat_P, aes(x = type, y = value, fill = type)) +
geom_bar(stat = "identity", position = position_dodge2(),
alpha = 0.5) +
facet_grid(co_LS3~co_LS20) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
scale_fill_manual(values = rep(c("#8C6661", "#C09791", "#839BBB", "#4B6990"))) +
theme_bw() +
xlab("") + ylab("") + theme(legend.position="none")
roc1 = roc(UCLA20_DIAG_42txt~UCLA3_TOT, data=dat)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
res1 = data.frame(cbind(roc1$thresholds, roc1$sensitivities, roc1$specificities))
names(res1) = c("UCLA3 threshold", "Sensitivity", "Specificity")
res1
## UCLA3 threshold Sensitivity Specificity
## 1 -Inf 1.0000000 0.0000000
## 2 3.5 0.9837587 0.2723112
## 3 4.5 0.9489559 0.5125858
## 4 5.5 0.8375870 0.6727689
## 5 6.5 0.5754060 0.8893974
## 6 7.5 0.3573086 0.9633867
## 7 8.5 0.2064965 0.9893211
## 8 Inf 0.0000000 1.0000000
cp1 <- cutpointr(dat,
UCLA3_TOT,
UCLA20_DIAG_42txt,
direction = ">=",
pos_class = "yes",
method = maximize_metric,
metric = sum_sens_spec)
paste0("the optimal cut-off point is >= ", cp1$optimal_cutpoint)
## [1] "the optimal cut-off point is >= 6"
cp1$AUC
## [1] 0.8413443
plot_metric(cp1)
dat$UCLA3_DIAG42_OPTI= ifelse(
dat$UCLA3_TOT >= cp1$optimal_cutpoint, 1, 0)
roc2 = roc(UCLA20_DIAG_38txt~UCLA3_TOT, data=dat)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
res2 = data.frame(cbind(roc2$thresholds, roc2$sensitivities, roc2$specificities))
names(res2) = c("UCLA3 threshold", "Sensitivity", "Specificity")
res2
## UCLA3 threshold Sensitivity Specificity
## 1 -Inf 1.0000000 0.0000000
## 2 3.5 0.9672131 0.3038869
## 3 4.5 0.9032787 0.5609541
## 4 5.5 0.7721311 0.7181979
## 5 6.5 0.4803279 0.9116608
## 6 7.5 0.2803279 0.9726148
## 7 8.5 0.1524590 0.9911661
## 8 Inf 0.0000000 1.0000000
cp2 <- cutpointr(dat,
UCLA3_TOT,
UCLA20_DIAG_38txt,
direction = ">=",
pos_class = "yes",
method = maximize_metric,
metric = sum_sens_spec)
paste0("the optimal cut-off point is >= ", cp2$optimal_cutpoint)
## [1] "the optimal cut-off point is >= 6"
cp2$AUC
## [1] 0.8200726
plot_metric(cp2)
dat$UCLA3_DIAG38_OPTI= ifelse(
dat$UCLA3_TOT >= cp2$optimal_cutpoint, 1, 0)
roc3 = roc(UCLA20_DIAG_52txt~UCLA3_TOT, data=dat)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
res3 = data.frame(cbind(roc3$thresholds, roc3$sensitivities, roc3$specificities))
names(res3) = c("UCLA3 threshold", "Sensitivity", "Specificity")
res3
## UCLA3 threshold Sensitivity Specificity
## 1 -Inf 1.0000000 0.0000000
## 2 3.5 0.9945946 0.2331407
## 3 4.5 0.9783784 0.4431599
## 4 5.5 0.9351351 0.6037251
## 5 6.5 0.7405405 0.8355812
## 6 7.5 0.5567568 0.9364162
## 7 8.5 0.3567568 0.9762364
## 8 Inf 0.0000000 1.0000000
cp3 <- cutpointr(dat,
UCLA3_TOT,
UCLA20_DIAG_52txt,
direction = ">=",
pos_class = "yes",
method = maximize_metric,
metric = sum_sens_spec)
paste0("the optimal cut-off point is >= ", cp3$optimal_cutpoint)
## [1] "the optimal cut-off point is >= 7"
cp3$AUC
## [1] 0.8754049
plot_metric(cp3)
Comparisons of prevalence for various cut-offs of UCLA-3 and the standard cut-off for UCLA-20
mcnemar.test(table(dat$UCLA20_DIAG_42,
dat$UCLA3_DIAG6))
##
## McNemar's Chi-squared test with continuity correction
##
## data: table(dat$UCLA20_DIAG_42, dat$UCLA3_DIAG6)
## McNemar's chi-squared = 256.84, df = 1, p-value < 2.2e-16
mcnemar.test(table(dat$UCLA20_DIAG_42,
dat$UCLA3_DIAG7))
##
## McNemar's Chi-squared test with continuity correction
##
## data: table(dat$UCLA20_DIAG_42, dat$UCLA3_DIAG7)
## McNemar's chi-squared = 4.1738, df = 1, p-value = 0.04105
mcnemar.test(table(dat$UCLA20_DIAG_42,
dat$UCLA3_DIAG_SEV))
##
## McNemar's Chi-squared test with continuity correction
##
## data: table(dat$UCLA20_DIAG_42, dat$UCLA3_DIAG_SEV)
## McNemar's chi-squared = 3.4745, df = 1, p-value = 0.06232