Skip to contents

Generate survival data with a constant baseline hazard

We simulate 100 participants, all entering at age 50 and administratively censored at age 70. The baseline hazard is constant at 0.005, with a two covariates and log-hazard ratio 0.5.

set.seed(0)
n <- 1000
entry_age <- 50
censor_age <- 70
beta <- c(0.5, 0.5)
p <- length(beta)
covariates <- matrix(rnorm(n * p), ncol = p)

sim <- HDccAnalysis::sim_cox_age_data(
  n = n,
  entry_age = entry_age,
  censor_age = censor_age,
  beta = beta,
  covariates = covariates,
  baseline_hazard = 0.01
)

head(sim)
# # A tibble: 6 × 6
#      id entry_age censor_age event   Exp1    Exp2
#   <int>     <dbl>      <dbl> <int>  <dbl>   <dbl>
# 1     1        50       70       0  1.26  -0.287 
# 2     2        50       70       0 -0.326  1.84  
# 3     3        50       50.5     1  1.33  -0.157 
# 4     4        50       70       0  1.27  -1.39  
# 5     5        50       70       0  0.415 -1.47  
# 6     6        50       70       0 -1.54  -0.0695

Quick checks

fit <- survival::coxph(survival::Surv(entry_age, censor_age,event) ~ Exp1 + Exp2, data = sim)
print(fit)
# Call:
# survival::coxph(formula = survival::Surv(entry_age, censor_age, 
#     event) ~ Exp1 + Exp2, data = sim)

#         coef exp(coef) se(coef)     z        p
# Exp1 0.50790   1.66180  0.06965 7.293 3.04e-13
# Exp2 0.61586   1.85124  0.06960 8.849  < 2e-16

# Likelihood ratio test=128  on 2 df, p=< 2.2e-16
# n= 1000, number of events= 207 

The achieved incidence rate is stored as an attribute (events / person-time):

print(attr(sim, "achieved_incidence_rate"))
# [1] 0.01163777

adjust the baseline hazard to acquire a target incidence rate

We can rerun the previous simulation with the target_avg_baseline_hazard to obtain a required incidence rate. This feature is mainly used when you wish to vary beta but adjust teh baseline hazard to maintain the incidence rate:

set.seed(0)
sim <- HDccAnalysis::sim_cox_age_data(
  n = n,
  entry_age = entry_age,
  censor_age = censor_age,
  beta = beta,
  covariates = covariates,
  baseline_hazard = 0.01,
  target_avg_baseline_hazard = 0.02
)

fit <- survival::coxph(survival::Surv(entry_age, censor_age,event) ~ Exp1 + Exp2, data = sim)
print(fit)
# Call:
# survival::coxph(formula = survival::Surv(entry_age, censor_age, 
#     event) ~ Exp1 + Exp2, data = sim)

#         coef exp(coef) se(coef)     z        p
# Exp1 0.46686   1.59498  0.05846 7.986 1.39e-15
# Exp2 0.46808   1.59692  0.05649 8.286  < 2e-16

# Likelihood ratio test=126.7  on 2 df, p=< 2.2e-16
# n= 1000, number of events= 316 
print(attr(sim, "achieved_incidence_rate"))
# [1] 0.01893861

beta_2 <- c(1.5, 1.5)
sim_2 <- HDccAnalysis::sim_cox_age_data(
  n = n,
  entry_age = entry_age,
  censor_age = censor_age,
  beta = beta_2,
  covariates = covariates,
  baseline_hazard = 0.01,
  target_avg_baseline_hazard = 0.02
)

fit_2 <- survival::coxph(survival::Surv(entry_age, censor_age,event) ~ Exp1 + Exp2, data = sim_2)
print(fit_2)
# Call:
# survival::coxph(formula = survival::Surv(entry_age, censor_age, 
#     event) ~ Exp1 + Exp2, data = sim_2)

#         coef exp(coef) se(coef)     z      p
# Exp1 1.46640   4.33360  0.07387 19.85 <2e-16
# Exp2 1.50335   4.49673  0.07297 20.60 <2e-16

# Likelihood ratio test=744.9  on 2 df, p=< 2.2e-16
# n= 1000, number of events= 331
print(attr(sim_2, "achieved_incidence_rate"))
# [1] 0.02102166