S1. Demographic

paste0("There is a total of ", nrow(dat_raw), " eligible participants, with ", sum(dat_raw$CCA), " (", round(sum(dat_raw$CCA)/nrow(dat_raw)*100), "%) participants with no missing data at our key variables")
## [1] "There is a total of 2035 eligible participants, with 1742 (86%) participants with no missing data at our key variables"
VIM::aggr(dat_raw %>%
            dplyr::select(
              UCLA3_TOT, UCLA20_TOT, PHQ_TOT, GAD_TOT,
              Age_dim, Gender, Financ_diff, Marital_status),
          numbers = TRUE, prop = c(TRUE, FALSE),
          cex.axis = 0.7)

tab1 = table1::table1(~ Age_cat + 
                 Marital_status + Year_stud_cat+
                 Financ_diff_cat | Gender,
                 data = dat)
t1flex(tab1) %>% 
  save_as_docx(path="tab1_UCLA3.docx")  
tab1
Man
(N=47)
Women
(N=1695)
Overall
(N=1742)
Age_cat
18-19 4 (8.5%) 260 (15.3%) 264 (15.2%)
20-21 19 (40.4%) 595 (35.1%) 614 (35.2%)
22-23 20 (42.6%) 636 (37.5%) 656 (37.7%)
24-25 4 (8.5%) 97 (5.7%) 101 (5.8%)
>25 0 (0%) 107 (6.3%) 107 (6.1%)
Marital_status
Couple 15 (31.9%) 695 (41.0%) 710 (40.8%)
Single 32 (68.1%) 1000 (59.0%) 1032 (59.2%)
Year_stud_cat
2nd year 8 (17.0%) 377 (22.2%) 385 (22.1%)
3rd year 8 (17.0%) 385 (22.7%) 393 (22.6%)
4th year 16 (34.0%) 469 (27.7%) 485 (27.8%)
5th year 13 (27.7%) 458 (27.0%) 471 (27.0%)
Other 2 (4.3%) 6 (0.4%) 8 (0.5%)
Financ_diff_cat
Little importance 14 (29.8%) 850 (50.1%) 864 (49.6%)
Moderately important 17 (36.2%) 345 (20.4%) 362 (20.8%)
No difficulties 15 (31.9%) 464 (27.4%) 479 (27.5%)
Very important 1 (2.1%) 36 (2.1%) 37 (2.1%)

S2. Preliminary analyses

Distribution of the variables

dat_pairs = dat[,c(
  "UCLA3_TOT", "UCLA20_TOT",
  "PHQ_TOT","GAD_TOT", "Marital_status",
  "Financ_diff_cat", "Age_cat")]
GGally::ggpairs(dat_pairs,
    lower = list(
      continuous=GGally::wrap("points",
          position=position_jitter(height=1, width=1),
          alpha = 0.1)))

Heatmap

heat = data.frame(apply(dat[,c(
               "PHQ_TOT","GAD_TOT",
               "Age_dim", "Financ_diff",
               "Marital_status", 
               "UCLA3_TOT", "UCLA20_TOT")], 2, as.numeric))
## Warning in apply(dat[, c("PHQ_TOT", "GAD_TOT", "Age_dim", "Financ_diff", : NAs
## introduced by coercion
ggstatsplot::ggcorrmat(heat)

Association UCLA-20 with UCLA-3

res = cor.test(~UCLA3_TOT + UCLA20_TOT, dat)
res
## 
##  Pearson's product-moment correlation
## 
## data:  UCLA3_TOT and UCLA20_TOT
## t = 38.179, df = 1740, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6487738 0.6999377
## sample estimates:
##      cor 
## 0.675167
alpha_UCLA3 = dat[, c("UCLA1",  "UCLA2",    "UCLA3")] %>%
  psych::alpha()
alpha_UCLA20 = dat %>%
  select(starts_with("UCLA20")) %>%
  select(-UCLA20_TOT) %>%
  psych::alpha()

CTT::disattenuated.cor(res$estimate,
      c(alpha_UCLA3$total$raw_alpha,
        alpha_UCLA20$total$raw_alpha))
##            cor
## [1,] 0.7813001
library(ggExtra)
 
ggscatterstats(dat, UCLA3_TOT, UCLA20_TOT,
             bf.message = FALSE,
             ylab = "UCLA-20 scale",
             xlab = "UCLA-3 scale")
## Registered S3 method overwritten by 'ggside':
##   method from  
##   +.gg   GGally
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

S3. Internal reliability

UCLA-3

dat[, c("UCLA1",    "UCLA2",    "UCLA3")] %>%
  psych::alpha()
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##        0.8       0.8    0.74      0.58 4.1 0.0082  1.7 0.59     0.56
## 
##  lower alpha upper     95% confidence boundaries
## 0.79 0.8 0.82 
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## UCLA1      0.81      0.81    0.68      0.68 4.2   0.0093    NA  0.68
## UCLA2      0.72      0.72    0.56      0.56 2.5   0.0136    NA  0.56
## UCLA3      0.66      0.66    0.50      0.50 2.0   0.0161    NA  0.50
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## UCLA1 1742  0.81  0.81  0.64   0.58  1.9 0.69
## UCLA2 1742  0.85  0.85  0.75   0.67  1.6 0.67
## UCLA3 1742  0.88  0.88  0.80   0.71  1.7 0.71
## 
## Non missing response frequency for each item
##          1    2    3 miss
## UCLA1 0.30 0.51 0.18    0
## UCLA2 0.47 0.42 0.11    0
## UCLA3 0.43 0.42 0.15    0
om3 = dat[, c("UCLA1",  "UCLA2",    "UCLA3")] %>%
  psych::omega(nfactors=1)
## Loading required namespace: GPArotation
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
om3$omega.tot
## [1] 0.8095527

UCLA-20

dat %>%
  select(starts_with("UCLA20")) %>%
  select(-UCLA20_TOT) %>%
  psych::alpha()
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.93      0.93    0.94      0.41  14 0.0024  1.8 0.57     0.41
## 
##  lower alpha upper     95% confidence boundaries
## 0.92 0.93 0.93 
## 
##  Reliability if an item is dropped:
##           raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## UCLA20_1       0.93      0.93    0.94      0.41  13   0.0025 0.014  0.42
## UCLA20_2       0.93      0.93    0.94      0.41  13   0.0025 0.013  0.41
## UCLA20_3       0.93      0.93    0.94      0.42  13   0.0025 0.013  0.41
## UCLA20_4       0.93      0.93    0.94      0.42  14   0.0025 0.013  0.42
## UCLA20_5       0.93      0.93    0.94      0.41  13   0.0025 0.013  0.41
## UCLA20_6       0.92      0.93    0.94      0.41  13   0.0026 0.013  0.40
## UCLA20_7       0.93      0.93    0.94      0.43  14   0.0023 0.011  0.43
## UCLA20_8       0.93      0.93    0.94      0.42  14   0.0024 0.013  0.43
## UCLA20_9       0.93      0.93    0.94      0.43  14   0.0024 0.012  0.43
## UCLA20_10      0.93      0.93    0.94      0.41  13   0.0025 0.013  0.41
## UCLA20_11      0.92      0.93    0.94      0.41  13   0.0026 0.013  0.41
## UCLA20_12      0.93      0.93    0.94      0.41  13   0.0025 0.013  0.41
## UCLA20_13      0.92      0.93    0.94      0.41  13   0.0026 0.013  0.41
## UCLA20_14      0.92      0.93    0.94      0.40  13   0.0026 0.012  0.40
## UCLA20_15      0.93      0.93    0.94      0.41  13   0.0025 0.014  0.41
## UCLA20_16      0.92      0.93    0.94      0.41  13   0.0026 0.013  0.40
## UCLA20_17      0.92      0.93    0.94      0.40  13   0.0026 0.013  0.40
## UCLA20_18      0.92      0.93    0.94      0.40  13   0.0026 0.013  0.40
## UCLA20_19      0.93      0.93    0.94      0.41  13   0.0025 0.012  0.41
## UCLA20_20      0.93      0.93    0.94      0.41  13   0.0025 0.012  0.41
## 
##  Item statistics 
##              n raw.r std.r r.cor r.drop mean   sd
## UCLA20_1  1742  0.62  0.63  0.60   0.57  1.9 0.79
## UCLA20_2  1742  0.64  0.62  0.60   0.58  2.3 0.97
## UCLA20_3  1742  0.62  0.62  0.59   0.56  1.6 0.89
## UCLA20_4  1742  0.59  0.58  0.55   0.53  1.9 0.97
## UCLA20_5  1742  0.70  0.71  0.69   0.66  1.5 0.77
## UCLA20_6  1742  0.72  0.73  0.72   0.69  1.7 0.79
## UCLA20_7  1742  0.46  0.44  0.39   0.38  2.0 1.04
## UCLA20_8  1742  0.53  0.53  0.49   0.48  2.1 0.88
## UCLA20_9  1742  0.48  0.48  0.44   0.42  1.8 0.83
## UCLA20_10 1742  0.68  0.70  0.68   0.65  1.3 0.60
## UCLA20_11 1742  0.73  0.72  0.71   0.69  2.0 0.88
## UCLA20_12 1742  0.69  0.68  0.66   0.64  2.0 0.93
## UCLA20_13 1742  0.72  0.71  0.69   0.67  2.1 1.07
## UCLA20_14 1742  0.79  0.78  0.78   0.76  2.0 0.94
## UCLA20_15 1742  0.67  0.67  0.65   0.62  1.6 0.78
## UCLA20_16 1742  0.73  0.74  0.73   0.69  1.6 0.77
## UCLA20_17 1742  0.78  0.76  0.76   0.74  1.9 0.99
## UCLA20_18 1742  0.76  0.75  0.74   0.72  2.1 0.97
## UCLA20_19 1742  0.68  0.71  0.71   0.65  1.3 0.60
## UCLA20_20 1742  0.70  0.72  0.72   0.66  1.3 0.63
## 
## Non missing response frequency for each item
##              1    2    3    4 miss
## UCLA20_1  0.37 0.42 0.18 0.02    0
## UCLA20_2  0.25 0.32 0.32 0.12    0
## UCLA20_3  0.61 0.22 0.12 0.05    0
## UCLA20_4  0.47 0.26 0.20 0.07    0
## UCLA20_5  0.67 0.22 0.08 0.03    0
## UCLA20_6  0.51 0.33 0.14 0.02    0
## UCLA20_7  0.41 0.27 0.21 0.12    0
## UCLA20_8  0.29 0.40 0.25 0.06    0
## UCLA20_9  0.44 0.39 0.13 0.04    0
## UCLA20_10 0.72 0.23 0.04 0.01    0
## UCLA20_11 0.36 0.38 0.21 0.05    0
## UCLA20_12 0.37 0.35 0.20 0.08    0
## UCLA20_13 0.37 0.26 0.23 0.14    0
## UCLA20_14 0.38 0.33 0.22 0.07    0
## UCLA20_15 0.56 0.30 0.12 0.02    0
## UCLA20_16 0.56 0.31 0.11 0.02    0
## UCLA20_17 0.48 0.26 0.18 0.08    0
## UCLA20_18 0.35 0.32 0.24 0.09    0
## UCLA20_19 0.75 0.19 0.05 0.01    0
## UCLA20_20 0.73 0.20 0.06 0.01    0
om20 = dat %>%
  select(starts_with("UCLA20")) %>%
  select(-UCLA20_TOT) %>%
  psych::omega(nfactors=1) 
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
om20$omega.tot
## [1] 0.9345274

Comparison

cocron(list(dat[, c("UCLA1",    "UCLA2",    "UCLA3")], 
            dat %>%
  select(starts_with("UCLA20")) %>%
  select(-UCLA20_TOT)),
  dep=TRUE, 
  standardized=TRUE)
## 
##   Compare n alpha coefficients
## 
## Comparison between: a1 = 0.8033, a2 = 0.9332
## The coefficients are based on dependent groups
## 95% confidence intervals: CI1 = 0.7867 0.8187, CI2 = 0.9286 0.9377
## Group sizes: n1 = 1742, n2 = 1742
## Item count: i1 = 3, i2 = 20
## Null hypothesis: a1 and a2 are equal
## Alternative hypothesis: a1 and a2 are not equal
## Level of significance: 0.05
## 
## chisq = 571.6193, df = 1, p-value = 0.0000
## Null hypothesis rejected

PHQ-9

alpha_PHQ = dat %>%
  select(starts_with("PHQ")) %>%
  select(-c(PHQ_TOT, PHQ9_10)) %>%
  psych::alpha()
alpha_PHQ
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.85      0.85    0.85      0.39 5.8 0.0052  2.1 0.6     0.38
## 
##  lower alpha upper     95% confidence boundaries
## 0.84 0.85 0.86 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## PHQ9_1      0.83      0.83    0.82      0.38 5.0   0.0059 0.0065  0.37
## PHQ9_2      0.82      0.83    0.82      0.37 4.7   0.0061 0.0053  0.36
## PHQ9_3      0.83      0.84    0.83      0.39 5.1   0.0059 0.0063  0.38
## PHQ9_4      0.83      0.83    0.82      0.38 5.0   0.0060 0.0061  0.37
## PHQ9_5      0.83      0.84    0.83      0.39 5.1   0.0059 0.0073  0.37
## PHQ9_6      0.83      0.83    0.83      0.38 5.0   0.0059 0.0072  0.37
## PHQ9_7      0.84      0.84    0.83      0.40 5.2   0.0057 0.0078  0.39
## PHQ9_8      0.84      0.84    0.84      0.40 5.4   0.0056 0.0077  0.41
## PHQ9_9      0.85      0.85    0.84      0.41 5.6   0.0055 0.0053  0.40
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## PHQ9_1 1742  0.70  0.71  0.66   0.60  1.9 0.83
## PHQ9_2 1742  0.75  0.76  0.74   0.67  2.1 0.85
## PHQ9_3 1742  0.70  0.68  0.63   0.59  2.4 1.03
## PHQ9_4 1742  0.72  0.71  0.67   0.63  2.9 0.90
## PHQ9_5 1742  0.71  0.68  0.63   0.59  2.2 1.05
## PHQ9_6 1742  0.71  0.71  0.66   0.60  2.3 1.01
## PHQ9_7 1742  0.67  0.65  0.59   0.54  2.1 0.99
## PHQ9_8 1742  0.59  0.61  0.53   0.49  1.4 0.73
## PHQ9_9 1742  0.53  0.58  0.50   0.45  1.2 0.55
## 
## Non missing response frequency for each item
##           1    2    3    4 miss
## PHQ9_1 0.34 0.49 0.11 0.06    0
## PHQ9_2 0.24 0.52 0.16 0.08    0
## PHQ9_3 0.21 0.37 0.22 0.20    0
## PHQ9_4 0.05 0.34 0.31 0.30    0
## PHQ9_5 0.31 0.32 0.21 0.16    0
## PHQ9_6 0.25 0.39 0.20 0.16    0
## PHQ9_7 0.31 0.37 0.19 0.12    0
## PHQ9_8 0.73 0.18 0.06 0.03    0
## PHQ9_9 0.83 0.13 0.03 0.01    0

GAD-7

alpha_GAD = dat %>%
  select(starts_with("GAD")) %>%
  select(-c(GAD_TOT))%>%
  psych::alpha()
alpha_GAD
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##        0.9       0.9     0.9      0.56   9 0.0035  2.2 0.76     0.53
## 
##  lower alpha upper     95% confidence boundaries
## 0.9 0.9 0.91 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## GAD7_1      0.88      0.88    0.87      0.55 7.4   0.0043 0.0125  0.53
## GAD7_2      0.87      0.87    0.86      0.53 6.8   0.0046 0.0092  0.50
## GAD7_3      0.87      0.87    0.86      0.53 6.9   0.0046 0.0098  0.50
## GAD7_4      0.88      0.88    0.87      0.55 7.2   0.0043 0.0156  0.50
## GAD7_5      0.90      0.90    0.90      0.61 9.2   0.0036 0.0130  0.57
## GAD7_6      0.90      0.89    0.89      0.59 8.5   0.0037 0.0175  0.57
## GAD7_7      0.90      0.89    0.89      0.59 8.5   0.0037 0.0173  0.53
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## GAD7_1 1742  0.83  0.82  0.80   0.76  2.7 0.96
## GAD7_2 1742  0.88  0.87  0.88   0.82  2.3 1.04
## GAD7_3 1742  0.88  0.87  0.87   0.82  2.3 1.04
## GAD7_4 1742  0.84  0.83  0.81   0.77  2.4 0.96
## GAD7_5 1742  0.66  0.68  0.59   0.56  1.5 0.81
## GAD7_6 1742  0.73  0.73  0.65   0.62  2.3 0.95
## GAD7_7 1742  0.73  0.73  0.66   0.63  1.8 0.96
## 
## Non missing response frequency for each item
##           1    2    3    4 miss
## GAD7_1 0.09 0.40 0.26 0.25    0
## GAD7_2 0.25 0.34 0.23 0.17    0
## GAD7_3 0.26 0.34 0.22 0.17    0
## GAD7_4 0.16 0.42 0.24 0.18    0
## GAD7_5 0.63 0.24 0.09 0.04    0
## GAD7_6 0.20 0.44 0.21 0.14    0
## GAD7_7 0.50 0.29 0.13 0.08    0

S4. Known-groups validity

dat_single = subset(dat, Marital_status == "Single")
dat_couple = subset(dat, Marital_status != "Single")

metaConvert::es_from_means_sd(
  mean_exp = mean(dat_single$UCLA3_TOT), 
  mean_sd_exp = sd(dat_single$UCLA3_TOT),  
  n_exp = nrow(dat_single), 
  mean_nexp = mean(dat_couple$UCLA3_TOT), 
  mean_sd_nexp = sd(dat_couple$UCLA3_TOT),
  n_nexp = nrow(dat_couple)
)
##         d       d_se    d_ci_lo   d_ci_up         g       g_se    g_ci_lo
## 1 0.16039 0.04883469 0.06460913 0.2561708 0.1603208 0.04881363 0.06458127
##     g_ci_up          r       r_se   r_ci_lo   r_ci_up          z       z_se
## 1 0.2560604 0.09951305 0.03010088 0.0402962 0.1581794 0.07882024 0.02396628
##     z_ci_lo   z_ci_up     logor   logor_se logor_ci_lo logor_ci_up info_used
## 1 0.0318472 0.1257933 0.2909152 0.08857632   0.1173088   0.4645217  means_sd
##          md      md_se  md_ci_lo  md_ci_up
## 1 0.2811524 0.08535987 0.1137337 0.4485711
metaConvert::es_from_means_sd(
  mean_exp = mean(dat_single$UCLA20_TOT), 
  mean_sd_exp = sd(dat_single$UCLA20_TOT),  
  n_exp = nrow(dat_single), 
  mean_nexp = mean(dat_couple$UCLA20_TOT), 
  mean_sd_nexp = sd(dat_couple$UCLA20_TOT),
  n_nexp = nrow(dat_couple)
)
##           d       d_se    d_ci_lo   d_ci_up         g       g_se    g_ci_lo
## 1 0.1303699 0.04880903 0.03463932 0.2261004 0.1303137 0.04878799 0.03462439
##     g_ci_up          r       r_se   r_ci_lo   r_ci_up          z       z_se
## 1 0.2260029 0.08097218 0.03018083 0.0216494 0.1398458 0.06407796 0.02396628
##      z_ci_lo  z_ci_up     logor   logor_se logor_ci_lo logor_ci_up info_used
## 1 0.01710491 0.111051 0.2364648 0.08852979  0.06294957     0.40998  means_sd
##        md     md_se  md_ci_lo md_ci_up
## 1 1.47164 0.5437662 0.4051359 2.538144
dat_long = dat %>%
  pivot_longer(cols = c(UCLA3_TOT, UCLA20_TOT),
               names_to = "scale",
               values_to = "values")

grouped_ggbetweenstats(
  bf.message = FALSE,
  data = dat_long,
  x = Marital_status,
  y = values,
  grouping.var = scale,
  pairwise.comparisons = FALSE,
  results.subtitle = FALSE
)

S5. Convergent validity

PHQ-9

lm_phq = lm(PHQ_TOT ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_phq <- calc.yhat(lm_phq)

boot.out.phq <- boot(dat,
                 boot.yhat, n_sim,
                 lmOut=lm_phq,
                 regrout0=regrOut_phq)
result_phq <- booteval.yhat(regrOut_phq, boot.out.phq,
                            bty="perc")

result_phq$combCIpm
##                             b               Beta                  r
## UCLA3_TOT  1.099(0.915,1.282) 0.356(0.298,0.414) 0.524(0.486,0.561)
## UCLA20_TOT 0.119(0.091,0.147) 0.248(0.189,0.307) 0.489(0.449,0.527)
##                            rs                rs2             Unique
## UCLA3_TOT  0.944(0.914,0.968) 0.891(0.836,0.937) 0.069(0.048,0.094)
## UCLA20_TOT 0.881(0.838,0.917) 0.776(0.702,0.841) 0.034(0.019,0.051)
##                        Common               CD:0               CD:1
## UCLA3_TOT  0.205(0.176,0.236) 0.275(0.236,0.315) 0.069(0.048,0.094)
## UCLA20_TOT 0.205(0.176,0.236) 0.239(0.201,0.278) 0.034(0.019,0.051)
##                        GenDom              Pratt                RLW
## UCLA3_TOT  0.172(0.145,0.201) 0.187(0.148,0.229) 0.172(0.145,0.201)
## UCLA20_TOT 0.136(0.112,0.162) 0.121(0.087,0.159) 0.136(0.112,0.162)
result_phq$combCIpmDiff
##                                        b                 Beta
## UCLA3_TOT-UCLA20_TOT 0.980(0.778,1.184)* 0.108(-0.002,0.220) 
##                                         r                   rs
## UCLA3_TOT-UCLA20_TOT 0.035(-0.001,0.072)  0.063(-0.001,0.128) 
##                                       rs2               Unique
## UCLA3_TOT-UCLA20_TOT 0.115(-0.002,0.232)  0.035(-0.001,0.073) 
##                                   Common                 CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* 0.036(-0.001,0.073) 
##                                      CD:1               GenDom
## UCLA3_TOT-UCLA20_TOT 0.035(-0.001,0.073)  0.036(-0.001,0.073) 
##                                     Pratt                  RLW
## UCLA3_TOT-UCLA20_TOT 0.066(-0.001,0.133)  0.036(-0.001,0.073)
##                var_tot   var_com            var_uniq          
## [1,] "UCLA-3"  "27.4576" "74.6605675659926" "25.1296544490414"
## [2,] "UCLA-20" "23.9121" "85.7306551913048" "14.2187428122164"
resUCLA3_UCLA20 = cor.test(~UCLA3_TOT + UCLA20_TOT, dat)
resPHQ_UCLA3 = cor.test(~UCLA3_TOT + PHQ_TOT, dat)
resPHQ_UCLA20 = cor.test(~UCLA20_TOT + PHQ_TOT, dat)

PHQ_UCLA3 = CTT::disattenuated.cor(resPHQ_UCLA3$estimate,
      c(alpha_UCLA3$total$raw_alpha,
        alpha_PHQ$total$raw_alpha))
PHQ_UCLA20 = CTT::disattenuated.cor(resPHQ_UCLA20$estimate,
      c(alpha_UCLA20$total$raw_alpha,
        alpha_PHQ$total$raw_alpha))
UCLA3_UCLA20 = CTT::disattenuated.cor(resUCLA3_UCLA20$estimate,
      c(alpha_UCLA3$total$raw_alpha,
        alpha_UCLA20$total$raw_alpha))

PHQ_UCLA3; PHQ_UCLA20
##            cor
## [1,] 0.6343675
##            cor
## [1,] 0.5502963
cocor::cocor.dep.groups.overlap(
  r.jk = as.numeric(PHQ_UCLA3), 
  r.jh = as.numeric(PHQ_UCLA20), 
  r.kh = as.numeric(UCLA3_UCLA20), 
  n = nrow(dat), 
  var.labels=c("Anxiety",
               "UCLA3",
               "UCLA20"))
## 
##   Results of a comparison of two overlapping correlations based on dependent groups
## 
## Comparison between r.jk (Anxiety, UCLA3) = 0.6344 and r.jh (Anxiety, UCLA20) = 0.5503
## Difference: r.jk - r.jh = 0.0841
## Related correlation: r.kh = 0.7813
## Data: j = Anxiety, k = UCLA3, h = UCLA20
## Group size: n = 1742
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
## 
## pearson1898: Pearson and Filon's z (1898)
##   z = 6.7234, p-value = 0.0000
##   Null hypothesis rejected
## 
## hotelling1940: Hotelling's t (1940)
##   t = 6.9019, df = 1739, p-value = 0.0000
##   Null hypothesis rejected
## 
## williams1959: Williams' t (1959)
##   t = 6.8745, df = 1739, p-value = 0.0000
##   Null hypothesis rejected
## 
## olkin1967: Olkin's z (1967)
##   z = 6.7234, p-value = 0.0000
##   Null hypothesis rejected
## 
## dunn1969: Dunn and Clark's z (1969)
##   z = 6.8242, p-value = 0.0000
##   Null hypothesis rejected
## 
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
##   t = 6.9019, df = 1739, p-value = 0.0000
##   Null hypothesis rejected
## 
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
##   z = 6.8025, p-value = 0.0000
##   Null hypothesis rejected
## 
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
##   z = 6.7949, p-value = 0.0000
##   Null hypothesis rejected
##   95% confidence interval for r.jk - r.jh: 0.0924 0.1673
##   Null hypothesis rejected (Interval does not include 0)
## 
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
##   z = 6.7942, p-value = 0.0000
##   Null hypothesis rejected
## 
## zou2007: Zou's (2007) confidence interval
##   95% confidence interval for r.jk - r.jh: 0.0598 0.1089
##   Null hypothesis rejected (Interval does not include 0)

GAD-7

lm_gad = lm(GAD_TOT ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_gad <- calc.yhat(lm_gad)

boot.out_gad <- boot(dat,
                 boot.yhat, n_sim,
                 lmOut=lm_gad,
                 regrout0=regrOut_gad)
result_gad <- booteval.yhat(regrOut_gad, boot.out_gad, bty="perc")

result_gad$combCIpm
##                             b               Beta                  r
## UCLA3_TOT  0.906(0.729,1.086) 0.298(0.240,0.356) 0.434(0.392,0.473)
## UCLA20_TOT 0.095(0.067,0.122) 0.200(0.142,0.259) 0.402(0.360,0.442)
##                            rs                rs2             Unique
## UCLA3_TOT  0.947(0.912,0.973) 0.896(0.831,0.946) 0.048(0.031,0.069)
## UCLA20_TOT 0.877(0.825,0.920) 0.769(0.681,0.846) 0.022(0.011,0.036)
##                        Common               CD:0               CD:1
## UCLA3_TOT  0.140(0.114,0.166) 0.188(0.154,0.223) 0.048(0.031,0.069)
## UCLA20_TOT 0.140(0.114,0.166) 0.161(0.130,0.195) 0.022(0.011,0.036)
##                        GenDom              Pratt                RLW
## UCLA3_TOT  0.118(0.094,0.144) 0.129(0.096,0.166) 0.118(0.094,0.144)
## UCLA20_TOT 0.092(0.072,0.114) 0.080(0.053,0.112) 0.092(0.072,0.114)
result_gad$combCIpmDiff
##                                        b                 Beta
## UCLA3_TOT-UCLA20_TOT 0.811(0.615,1.012)* 0.098(-0.010,0.206) 
##                                         r                   rs
## UCLA3_TOT-UCLA20_TOT 0.032(-0.003,0.067)  0.070(-0.007,0.146) 
##                                       rs2               Unique
## UCLA3_TOT-UCLA20_TOT 0.127(-0.014,0.263)  0.026(-0.003,0.057) 
##                                   Common                 CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* 0.027(-0.003,0.057) 
##                                      CD:1               GenDom
## UCLA3_TOT-UCLA20_TOT 0.026(-0.003,0.057)  0.026(-0.003,0.057) 
##                                     Pratt                  RLW
## UCLA3_TOT-UCLA20_TOT 0.049(-0.005,0.103)  0.026(-0.003,0.057)
resUCLA3_UCLA20 = cor.test(~UCLA3_TOT + UCLA20_TOT, dat)
resGAD_UCLA3 = cor.test(~UCLA3_TOT + GAD_TOT, dat)
resGAD_UCLA20 = cor.test(~UCLA20_TOT + GAD_TOT, dat)

GAD_UCLA3 = CTT::disattenuated.cor(resGAD_UCLA3$estimate,
      c(alpha_UCLA3$total$raw_alpha,
        alpha_GAD$total$raw_alpha))
GAD_UCLA20 = CTT::disattenuated.cor(resGAD_UCLA20$estimate,
      c(alpha_UCLA20$total$raw_alpha,
        alpha_GAD$total$raw_alpha))
UCLA3_UCLA20 = CTT::disattenuated.cor(resUCLA3_UCLA20$estimate,
      c(alpha_UCLA3$total$raw_alpha,
        alpha_UCLA20$total$raw_alpha))

GAD_UCLA3; GAD_UCLA20
##            cor
## [1,] 0.5092153
##            cor
## [1,] 0.4387058
cocor::cocor.dep.groups.overlap(
  r.jk = as.numeric(GAD_UCLA3), 
  r.jh = as.numeric(GAD_UCLA20), 
  r.kh = as.numeric(UCLA3_UCLA20), 
  n = nrow(dat), 
  var.labels=c("Anxiety",
               "UCLA3",
               "UCLA20"))
## 
##   Results of a comparison of two overlapping correlations based on dependent groups
## 
## Comparison between r.jk (Anxiety, UCLA3) = 0.5092 and r.jh (Anxiety, UCLA20) = 0.4387
## Difference: r.jk - r.jh = 0.0705
## Related correlation: r.kh = 0.7813
## Data: j = Anxiety, k = UCLA3, h = UCLA20
## Group size: n = 1742
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
## 
## pearson1898: Pearson and Filon's z (1898)
##   z = 5.1215, p-value = 0.0000
##   Null hypothesis rejected
## 
## hotelling1940: Hotelling's t (1940)
##   t = 5.1808, df = 1739, p-value = 0.0000
##   Null hypothesis rejected
## 
## williams1959: Williams' t (1959)
##   t = 5.1702, df = 1739, p-value = 0.0000
##   Null hypothesis rejected
## 
## olkin1967: Olkin's z (1967)
##   z = 5.1215, p-value = 0.0000
##   Null hypothesis rejected
## 
## dunn1969: Dunn and Clark's z (1969)
##   z = 5.1449, p-value = 0.0000
##   Null hypothesis rejected
## 
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
##   t = 5.1808, df = 1739, p-value = 0.0000
##   Null hypothesis rejected
## 
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
##   z = 5.1369, p-value = 0.0000
##   Null hypothesis rejected
## 
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
##   z = 5.1334, p-value = 0.0000
##   Null hypothesis rejected
##   95% confidence interval for r.jk - r.jh: 0.0563 0.1258
##   Null hypothesis rejected (Interval does not include 0)
## 
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
##   z = 5.1348, p-value = 0.0000
##   Null hypothesis rejected
## 
## zou2007: Zou's (2007) confidence interval
##   95% confidence interval for r.jk - r.jh: 0.0436 0.0977
##   Null hypothesis rejected (Interval does not include 0)

Plot

cols = c("Unique", "Common", "CD.0")
res_plot1 = data.frame(regrOut_phq$PredictorMetrics[-nrow(regrOut_phq$PredictorMetrics),])[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot1.lo = data.frame(result_phq$lowerCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot1.lo) = paste0(names(res_plot1.lo), "_ci_lo")
res_plot1.up = data.frame(result_phq$upperCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance")%>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot1.up) = paste0(names(res_plot1.up), "_ci_up")
res_plot1 = cbind(res_plot1, res_plot1.lo, res_plot1.up)

res_plot1$variance[res_plot1$variance =="CD.0"] <- "Total\n(unique+common)"
res_plot2 = data.frame(regrOut_gad$PredictorMetrics[-nrow(regrOut_gad$PredictorMetrics),])[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot2.lo = data.frame(result_gad$lowerCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot2.lo) = paste0(names(res_plot2.lo), "_ci_lo")
res_plot2.up = data.frame(result_gad$upperCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance")%>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot2.up) = paste0(names(res_plot2.up), "_ci_up")
res_plot2 = cbind(res_plot2, res_plot2.lo, res_plot2.up)
res_plot2$variance[res_plot2$variance =="CD.0"] <- "Total\n(unique+common)"

res_plot1$Measure = "PHQ-9"
res_plot2$Measure = "GAD-7"
res_plotA = rbind(res_plot1, res_plot2)
ggplot(res_plotA, aes(x=variance, y = values, color = Measure)) + 
    geom_point() +
    facet_wrap(~scale) + 
    theme_bw() + 
    geom_errorbar(aes(ymin=values_ci_lo, ymax=values_ci_up), width=.2)

S6. Discriminant validity

Age

lm_age = lm(Age_dim ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_age <- calc.yhat(lm_age)

boot.out.age <- boot(dat,
                 boot.yhat, n_sim,
                 lmOut=lm_age,
                 regrout0=regrOut_age)
result_age <- booteval.yhat(regrOut_age, 
                        boot.out.age, bty="perc")

result_age$combCIpm
##                              b                Beta                   r
## UCLA3_TOT  0.030(-0.006,0.066) 0.052(-0.011,0.114)  0.057(0.009,0.106)
## UCLA20_TOT 0.001(-0.005,0.006) 0.007(-0.053,0.071) 0.043(-0.006,0.091)
##                             rs                rs2             Unique
## UCLA3_TOT   0.995(0.364,1.000) 0.991(0.171,1.000) 0.001(0.000,0.007)
## UCLA20_TOT 0.743(-0.184,0.998) 0.551(0.008,0.995) 0.000(0.000,0.003)
##                         Common               CD:0               CD:1
## UCLA3_TOT  0.002(-0.001,0.007) 0.003(0.000,0.011) 0.001(0.000,0.007)
## UCLA20_TOT 0.002(-0.001,0.007) 0.002(0.000,0.008) 0.000(0.000,0.003)
##                        GenDom               Pratt                RLW
## UCLA3_TOT  0.002(0.000,0.009)  0.003(0.000,0.011) 0.002(0.000,0.009)
## UCLA20_TOT 0.001(0.000,0.005) 0.000(-0.001,0.006) 0.001(0.000,0.005)
result_age$combCIpmDiff
##                                         b                 Beta
## UCLA3_TOT-UCLA20_TOT 0.029(-0.011,0.069)  0.045(-0.070,0.155) 
##                                         r                   rs
## UCLA3_TOT-UCLA20_TOT 0.014(-0.022,0.051)  0.252(-0.446,0.763) 
##                                       rs2               Unique
## UCLA3_TOT-UCLA20_TOT 0.440(-0.649,0.741)  0.001(-0.002,0.006) 
##                                   Common                 CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* 0.001(-0.002,0.006) 
##                                      CD:1               GenDom
## UCLA3_TOT-UCLA20_TOT 0.001(-0.002,0.006)  0.001(-0.002,0.006) 
##                                     Pratt                  RLW
## UCLA3_TOT-UCLA20_TOT 0.003(-0.004,0.012)  0.001(-0.002,0.006)

Financial difficulties

lm_fdiff = lm(Financ_diff ~ UCLA3_TOT + UCLA20_TOT, dat)
regrOut_fdiff <- calc.yhat(lm_fdiff)

boot.out.fdiff <- boot(dat,
                 boot.yhat, n_sim,
                 lmOut=lm_fdiff,
                 regrout0=regrOut_fdiff)
result_fdiff <- booteval.yhat(regrOut_fdiff, boot.out.fdiff, bty="perc")

result_fdiff$combCIpm
##                             b               Beta                  r
## UCLA3_TOT  0.034(0.005,0.062) 0.079(0.013,0.146) 0.197(0.149,0.244)
## UCLA20_TOT 0.012(0.007,0.016) 0.175(0.109,0.240) 0.228(0.180,0.274)
##                            rs                rs2             Unique
## UCLA3_TOT  0.836(0.702,0.933) 0.700(0.493,0.871) 0.003(0.000,0.012)
## UCLA20_TOT 0.969(0.895,0.999) 0.939(0.801,0.998) 0.017(0.006,0.031)
##                        Common               CD:0               CD:1
## UCLA3_TOT  0.035(0.021,0.051) 0.039(0.022,0.059) 0.003(0.000,0.012)
## UCLA20_TOT 0.035(0.021,0.051) 0.052(0.033,0.075) 0.017(0.006,0.031)
##                        GenDom              Pratt                RLW
## UCLA3_TOT  0.021(0.011,0.034) 0.015(0.002,0.034) 0.021(0.011,0.034)
## UCLA20_TOT 0.034(0.020,0.052) 0.040(0.021,0.064) 0.034(0.020,0.052)
result_fdiff$combCIpmDiff
##                                         b                  Beta
## UCLA3_TOT-UCLA20_TOT 0.022(-0.009,0.054)  -0.096(-0.218,0.025) 
##                                          r                    rs
## UCLA3_TOT-UCLA20_TOT -0.031(-0.071,0.009)  -0.133(-0.296,0.037) 
##                                        rs2                Unique
## UCLA3_TOT-UCLA20_TOT -0.239(-0.504,0.068)  -0.014(-0.030,0.004) 
##                                   Common                  CD:0
## UCLA3_TOT-UCLA20_TOT 0.000(0.000,0.000)* -0.013(-0.030,0.004) 
##                                       CD:1                GenDom
## UCLA3_TOT-UCLA20_TOT -0.014(-0.030,0.004)  -0.013(-0.030,0.004) 
##                                      Pratt                   RLW
## UCLA3_TOT-UCLA20_TOT -0.025(-0.056,0.007)  -0.013(-0.030,0.004)

Plot

res_plot3 = data.frame(regrOut_age$PredictorMetrics[-nrow(regrOut_age$PredictorMetrics),])[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot3.lo = data.frame(result_age$lowerCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot3.lo) = paste0(names(res_plot3.lo), "_ci_lo")
res_plot3.up = data.frame(result_age$upperCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance")%>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot3.up) = paste0(names(res_plot3.up), "_ci_up")
res_plot3 = cbind(res_plot3, res_plot3.lo, res_plot3.up)
res_plot3$variance[res_plot3$variance =="CD.0"] <- "Total\n(unique+common)"
res_plot4 = data.frame(regrOut_fdiff$PredictorMetrics[-nrow(regrOut_fdiff$PredictorMetrics),])[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
res_plot4.lo = data.frame(result_fdiff$lowerCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance") %>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot4.lo) = paste0(names(res_plot4.lo), "_ci_lo")
res_plot4.up = data.frame(result_fdiff$upperCIpm)[,cols] %>%
  pivot_longer(cols=everything(),
               values_to="values",
               names_to="variance")%>%
  mutate(scale = rep(c("UCLA-LS-3", "UCLA-LS-20"), each=3))
names(res_plot4.up) = paste0(names(res_plot4.up), "_ci_up")
res_plot4 = cbind(res_plot4, res_plot4.lo, res_plot4.up)
res_plot4$variance[res_plot4$variance =="CD.0"] <- "Total\n(unique+common)"

res_plot3$Measure = "Age"
res_plot4$Measure = "Financial"
res_plotB = rbind(res_plot3, res_plot4)
ggplot(res_plotB, aes(x=variance, y = values, color = Measure)) + 
    geom_point() +
    facet_wrap(~scale) + 
    theme_bw() + 
    geom_errorbar(aes(ymin=values_ci_lo, ymax=values_ci_up), width=.2)

Nomological net

predictors = c("UCLA3_TOT", "UCLA20_TOT")
outcomes = c("PHQ_TOT","GAD_TOT",
             "Age_dim", "Financ_diff")
res = NULL
for (pred in predictors) {
  for (out in outcomes) {
    res_cor = NA
    res_cor = cbind(pred, out,
        broom::tidy(
          cor.test(~ dat[, pred] + dat[, out])))
    res = rbind(res, res_cor)
  }
}
res$se = (res$conf.high - res$conf.low) / qnorm(.975)
# res$name = paste0(res$pred, "_", res$out)
res$group = ifelse(res$out %in% c("PHQ_TOT", "GAD_TOT"), "Convergent validity", "Discriminant validity")
resplot = res
# resplot = dplyr::bind_rows(res, rb)
# resplot = data.frame(resplot[,intersect(names(res), names(rb))])
resplot$name = resplot$out


forestplot(
  resplot,
  colour = pred,
  estimate = estimate,
  se = se,
  xlab = "Association of the UCLA-3/UCLA-20\n with various variables") +
  ggforce::facet_col(
    facets = ~group,
    scales = "free_y",
    space = "free"
  )

res_plotC = rbind(res_plot1, res_plot2, res_plot3, res_plot4)
ggplot(res_plotC, aes(x=variance, y = values, 
                      color = Measure)) + 
    geom_point() +
    facet_wrap(~scale) + 
    theme_bw() + 
    geom_errorbar(aes(ymin=values_ci_lo, ymax=values_ci_up), width=0) +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "", y = "Percentage of variance explained") 

S7. Prevalence

UCLA-20 (>42)

dat$ID = 1:nrow(dat)
dat$UCLA20_DIAG_38 = ifelse(dat$UCLA20_TOT > 38,
                        1, 0)
dat$UCLA20_DIAG_38txt = ifelse(dat$UCLA20_TOT > 38,
                        "yes", "no")
dat$UCLA20_DIAG_42 = ifelse(dat$UCLA20_TOT > 42,
                        1, 0)
dat$UCLA20_DIAG_42txt = ifelse(dat$UCLA20_TOT > 42,
                        "yes", "no")
dat$UCLA20_DIAG_52 = ifelse(dat$UCLA20_TOT > 52,
                        1, 0)
dat$UCLA20_DIAG_52txt = ifelse(dat$UCLA20_TOT > 52,
                        "yes", "no")
dat$UCLA3_DIAG7 = ifelse(dat$UCLA3_TOT > 6, 1, 0)
dat$UCLA3_DIAG6 = ifelse(dat$UCLA3_TOT > 5, 1, 0)
dat$UCLA3_DIAG_SEV = apply(dat[, c("UCLA1", "UCLA2",    "UCLA3")], 1, function(x) ifelse(
  length(x[x==3]) > 0, 1, 0))

suffixes <- c("42", "38", "52")


# Loop through the suffixes and create the objects
for (suffix in suffixes) {

  assign(paste0("SENS_6_", suffix), 
         round(with(subset(dat, 
  dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "yes"),
  sum(UCLA3_DIAG6)/length(UCLA3_DIAG6)), 4)*100)
  
    assign(paste0("SPE_6_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "no"), 1 - sum(UCLA3_DIAG6)/length(UCLA3_DIAG6)), 4)*100)

      assign(paste0("SENS_7_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "yes"), sum(UCLA3_DIAG7)/length(UCLA3_DIAG7)), 4)*100)
  
    assign(paste0("SPE_7_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "no"), 1 - sum(UCLA3_DIAG7)/length(UCLA3_DIAG7)), 4)*100)

      assign(paste0("SENS_SEV_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "yes"), sum(UCLA3_DIAG_SEV)/length(UCLA3_DIAG_SEV)), 4)*100)
  
    assign(paste0("SPE_SEV_", suffix), round(with(subset(dat, dat[, paste0("UCLA20_DIAG_", suffix, "txt")] == "no"), 1 - sum(UCLA3_DIAG_SEV)/length(UCLA3_DIAG_SEV)), 4)*100)

    assign(paste0("dat", suffix),
        rbind(mget(paste0("SENS_6_", suffix))[[1]], 
          mget(paste0("SPE_6_", suffix))[[1]], 
          mget(paste0("SENS_7_", suffix))[[1]], 
          mget(paste0("SPE_7_", suffix))[[1]],
          mget(paste0("SENS_SEV_", suffix))[[1]], 
          mget(paste0("SPE_SEV_", suffix))[[1]]))
}
    
dat_prev = data.frame(rbind(dat38, dat42, dat52))
dat_prev$type = rep(c("Sensitivity", "Specificity"), nrow(dat_prev)/2)
dat_prev$co_LS20 = rep(c("UCLA-LS-20 > 38", "UCLA-LS-20 > 42",
                         "UCLA-LS-20 > 52"), each=nrow(dat_prev)/3)
dat_prev$co_LS3 = rep(c("UCLA-LS-3 >= 6", "UCLA-LS-3 >= 7", ">=1 severe UCLA-LS-3"), each=2, times = 3)
names(dat_prev)[1] = "value"
dat_prev$value = dat_prev$value/100


dat_prev_p = dat_prev
dat_prev_p$value = NA
dat_prev_p$type = rep(c("Prevalence UCLA-LS-20", 
                        "Prevalence UCLA-LS-3"), times = 9)
meanR = function(x) {
  round(mean(x), digits=4) * 100
}

PRE_UCLA3_DIAG6 = meanR(dat$UCLA3_DIAG6)
PRE_UCLA3_DIAG7 = meanR(dat$UCLA3_DIAG7)
PRE_UCLA3_DIAG_SEV = meanR(dat$UCLA3_DIAG_SEV)
PRE_UCLA20_DIAG38 = meanR(dat$UCLA20_DIAG_38)
PRE_UCLA20_DIAG42 = meanR(dat$UCLA20_DIAG_42)
PRE_UCLA20_DIAG52 = meanR(dat$UCLA20_DIAG_52)

dat_prev_p$value = value = c(
  PRE_UCLA20_DIAG38, PRE_UCLA3_DIAG6,
  PRE_UCLA20_DIAG38, PRE_UCLA3_DIAG7, 
  PRE_UCLA20_DIAG38, PRE_UCLA3_DIAG_SEV, 
  PRE_UCLA20_DIAG42, PRE_UCLA3_DIAG6,
  PRE_UCLA20_DIAG42, PRE_UCLA3_DIAG7, 
  PRE_UCLA20_DIAG42, PRE_UCLA3_DIAG_SEV, 
  PRE_UCLA20_DIAG52, PRE_UCLA3_DIAG6, 
  PRE_UCLA20_DIAG52, PRE_UCLA3_DIAG7, 
  PRE_UCLA20_DIAG52, PRE_UCLA3_DIAG_SEV)
dat_prev_p$value = dat_prev_p$value/100

dat_P = dplyr::bind_rows(dat_prev, dat_prev_p)

Summary plot

ggplot(dat_P, aes(x = type, y = value, fill = type)) +
  geom_bar(stat = "identity", position = position_dodge2(),
           alpha = 0.5) +
  facet_grid(co_LS3~co_LS20) +
  scale_y_continuous(labels = scales::percent) +
  coord_flip() + 
  scale_fill_manual(values = rep(c("#8C6661", "#C09791", "#839BBB", "#4B6990"))) +
  theme_bw() + 
  xlab("") + ylab("") + theme(legend.position="none")

ROC

UCLA-20 (>42)

roc1 = roc(UCLA20_DIAG_42txt~UCLA3_TOT, data=dat)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
res1 = data.frame(cbind(roc1$thresholds, roc1$sensitivities, roc1$specificities))
names(res1) = c("UCLA3 threshold", "Sensitivity", "Specificity")
res1
##   UCLA3 threshold Sensitivity Specificity
## 1            -Inf   1.0000000   0.0000000
## 2             3.5   0.9837587   0.2723112
## 3             4.5   0.9489559   0.5125858
## 4             5.5   0.8375870   0.6727689
## 5             6.5   0.5754060   0.8893974
## 6             7.5   0.3573086   0.9633867
## 7             8.5   0.2064965   0.9893211
## 8             Inf   0.0000000   1.0000000
cp1 <- cutpointr(dat, 
                UCLA3_TOT,
                UCLA20_DIAG_42txt, 
                direction = ">=", 
                pos_class = "yes",
                method = maximize_metric,
                metric = sum_sens_spec)

paste0("the optimal cut-off point is >= ", cp1$optimal_cutpoint)
## [1] "the optimal cut-off point is >= 6"
cp1$AUC
## [1] 0.8413443
plot_metric(cp1)

dat$UCLA3_DIAG42_OPTI= ifelse(
  dat$UCLA3_TOT >= cp1$optimal_cutpoint, 1, 0)

UCLA-20 (>38)

roc2 = roc(UCLA20_DIAG_38txt~UCLA3_TOT, data=dat)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
res2 = data.frame(cbind(roc2$thresholds, roc2$sensitivities, roc2$specificities))
names(res2) = c("UCLA3 threshold", "Sensitivity", "Specificity")
res2
##   UCLA3 threshold Sensitivity Specificity
## 1            -Inf   1.0000000   0.0000000
## 2             3.5   0.9672131   0.3038869
## 3             4.5   0.9032787   0.5609541
## 4             5.5   0.7721311   0.7181979
## 5             6.5   0.4803279   0.9116608
## 6             7.5   0.2803279   0.9726148
## 7             8.5   0.1524590   0.9911661
## 8             Inf   0.0000000   1.0000000
cp2 <- cutpointr(dat, 
                UCLA3_TOT,
                UCLA20_DIAG_38txt, 
                direction = ">=", 
                pos_class = "yes",
                method = maximize_metric,
                metric = sum_sens_spec)
paste0("the optimal cut-off point is >= ", cp2$optimal_cutpoint)
## [1] "the optimal cut-off point is >= 6"
cp2$AUC
## [1] 0.8200726
plot_metric(cp2)

dat$UCLA3_DIAG38_OPTI= ifelse(
  dat$UCLA3_TOT >= cp2$optimal_cutpoint, 1, 0)

UCLA-20 (>52)

roc3 = roc(UCLA20_DIAG_52txt~UCLA3_TOT, data=dat)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
res3 = data.frame(cbind(roc3$thresholds, roc3$sensitivities, roc3$specificities))
names(res3) = c("UCLA3 threshold", "Sensitivity", "Specificity")
res3
##   UCLA3 threshold Sensitivity Specificity
## 1            -Inf   1.0000000   0.0000000
## 2             3.5   0.9945946   0.2331407
## 3             4.5   0.9783784   0.4431599
## 4             5.5   0.9351351   0.6037251
## 5             6.5   0.7405405   0.8355812
## 6             7.5   0.5567568   0.9364162
## 7             8.5   0.3567568   0.9762364
## 8             Inf   0.0000000   1.0000000
cp3 <- cutpointr(dat, 
                UCLA3_TOT,
                UCLA20_DIAG_52txt, 
                direction = ">=", 
                pos_class = "yes",
                method = maximize_metric,
                metric = sum_sens_spec)

paste0("the optimal cut-off point is >= ", cp3$optimal_cutpoint)
## [1] "the optimal cut-off point is >= 7"
cp3$AUC
## [1] 0.8754049
plot_metric(cp3)

Comparisons

Comparisons of prevalence for various cut-offs of UCLA-3 and the standard cut-off for UCLA-20

UCLA-LS-3 (opti - 6)

mcnemar.test(table(dat$UCLA20_DIAG_42,
                   dat$UCLA3_DIAG6))
## 
##  McNemar's Chi-squared test with continuity correction
## 
## data:  table(dat$UCLA20_DIAG_42, dat$UCLA3_DIAG6)
## McNemar's chi-squared = 256.84, df = 1, p-value < 2.2e-16

UCLA-LS-3 (>=7)

mcnemar.test(table(dat$UCLA20_DIAG_42,
                   dat$UCLA3_DIAG7))
## 
##  McNemar's Chi-squared test with continuity correction
## 
## data:  table(dat$UCLA20_DIAG_42, dat$UCLA3_DIAG7)
## McNemar's chi-squared = 4.1738, df = 1, p-value = 0.04105

UCLA-LS-3 (severe)

mcnemar.test(table(dat$UCLA20_DIAG_42,
                   dat$UCLA3_DIAG_SEV))
## 
##  McNemar's Chi-squared test with continuity correction
## 
## data:  table(dat$UCLA20_DIAG_42, dat$UCLA3_DIAG_SEV)
## McNemar's chi-squared = 3.4745, df = 1, p-value = 0.06232