You are on page 1of 2

import numpy as np

import pandas as pd

# Setting the parameters


n = 100 # Total number of participants
t = 3 # Number of time points
n_per_group = n // 2 # Number of participants per treatment condition

# Generating the time and treatment variables


time = np.tile([0, 1, 2], n) # Time points for each participant
tx = np.repeat([1, 0], repeats=n_per_group * t) # Treatment condition

# Coefficients for the model


beta_10 = 0 # Overall intercept for depression
beta_11 = 0 # Rate of change in depression for control
beta_12 = 0 # Mean difference in depression at baseline between CBT and control
beta_13 = -0.5 # Treatment effect for depression

beta_20 = 0 # Overall intercept for quality of life


beta_21 = 0 # Rate of change in quality of life for control
beta_22 = 0 # Mean difference in quality of life at baseline between CBT and control
beta_23 = 0 # Treatment effect for quality of life (none)

# Random effects variance-covariance matrix (multivariate model)


sigma_G = np.array([
[0.2, 0.05, 0.1, 0.05], # Covariances between u1, v1, u2, v2
[0.05, 0.2, 0.05, 0.1],
[0.1, 0.05, 0.2, 0.05],
[0.05, 0.1, 0.05, 0.2]
])

# Residual errors variance-covariance matrix


sigma_R = np.array([
[0.1, 0.05], # Covariance between e1 and e2
[0.05, 0.1]
])

# Simulating random effects


random_effects = np.random.multivariate_normal([0, 0, 0, 0], sigma_G, n)

# Simulating residual errors


residual_errors = np.random.multivariate_normal([0, 0], sigma_R, n * t)

# Calculating the outcomes


y1 = beta_10 + beta_11 * time + beta_12 * tx + beta_13 * time * tx + random_effects.repeat(t,
axis=0)[:, 0] + residual_errors[:, 0]
y2 = beta_20 + beta_21 * time + beta_22 * tx + beta_23 * time * tx + random_effects.repeat(t,
axis=0)[:, 2] + residual_errors[:, 1]

# Creating a DataFrame
data = pd.DataFrame({
'ParticipantID': np.repeat(np.arange(n), t),
'Time': time,
'Treatment': tx,
'Depression': y1,
'QualityOfLife': y2
})

data.head() # Displaying the first few rows of the dataset

You might also like