I managaged to align the y-axis title using the margin
argument instead of hjust
. hjust
did not work as expected, due to using the argument angle = 0
. This is because justification works relative to the text direction.
To add the p-value labels I then used geom_text
.
Plot produced:
Code used to produce the plot:
###################################################################################
# CREATE PLOTS-------------------------------------------------------------------#
###################################################################################
# Packages
library(tidyverse) # for maipulating data frames
# Amended forest plot function
forestplot_trend <- function (df, name = name, estimate = estimate, se = se, pvalue = NULL,
ptrend = NULL, colour = NULL, shape = NULL, logodds = FALSE,
psignif = 0.05, facet_var = NULL, ci = 0.95, ...)
{
# Ensure input is a data frame and logodds is a logical value
stopifnot(is.data.frame(df))
stopifnot(is.logical(logodds))
# Convert input column names to quosures for tidy evaluation
name <- enquo(name)
estimate <- enquo(estimate)
se <- enquo(se)
pvalue <- enquo(pvalue)
ptrend <- enquo(ptrend)
facet_var <- enquo(facet_var)
colour <- enquo(colour)
shape <- enquo(shape)
# Capture additional arguments passed to the function
args <- list(...)
# Compute the confidence interval constant (Z-score for normal distribution)
const <- stats::qnorm(1 - (1 - ci)/2)
# Prepare data frame: factorize `name` column, compute confidence intervals, and format labels
df <- df %>%
dplyr::mutate(
`:=`(!!name, factor(!!name, levels = !!name %>% unique() %>% rev(), ordered = TRUE)), # Reverse factor levels
.xmin = !!estimate - const * !!se, # Lower CI bound
.xmax = !!estimate + const * !!se, # Upper CI bound
.filled = TRUE, # Default filled state
.label = sprintf("%.2f", !!estimate) # Format estimate values as strings
)
# If logodds is TRUE, exponentiate the estimates and confidence intervals
if (logodds) {
df <- df %>% mutate(
.xmin = exp(.data$.xmin),
.xmax = exp(.data$.xmax),
`:=`(!!estimate, exp(!!estimate))
)
}
# If p-value is provided, mark significant values based on threshold (psignif)
if (!rlang::quo_is_null(pvalue)) {
df <- df %>% dplyr::mutate(.filled = !!pvalue < !!psignif)
}
# Initialize ggplot with estimate on x-axis and name on y-axis and ptrend values
g <- ggplot2::ggplot(df, aes(x = !!estimate, y = !!name)) +
geom_text(
aes(x = Inf, y = !!name, label = sprintf("%.3f", !!ptrend)),
hjust = 1.3, vjust = 0.5, family = "Arial"
)
# Apply a forest plot theme and add background elements
g <- g + theme_forest() + scale_colour_ng_d() + scale_fill_ng_d() +
geom_stripes() +
geom_vline(xintercept = ifelse(logodds, 1, 0), linetype = "solid", size = 0.4, colour = "black") # Reference line
# Adjust the x-axis to give more space for the labels
g <- g + coord_cartesian(xlim = c(min(df[[as_label(estimate)]]) - 0.5, max(df[[as_label(estimate)]]) + 0.5)) # Increase xlim range
# Add effect estimates with confidence intervals
g <- g + geom_effect(
ggplot2::aes(
xmin = .data$.xmin, xmax = .data$.xmax,
colour = !!colour, shape = !!shape, filled = .data$.filled
),
position = ggstance::position_dodgev(height = 0.5)
) +
ggplot2::scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
guides(colour = guide_legend(reverse = TRUE), shape = guide_legend(reverse = TRUE))
# Add optional title, subtitle, caption, x-label, and y-label if provided
if ("title" %in% names(args)) {
g <- g + labs(title = args$title)
}
if ("subtitle" %in% names(args)) {
g <- g + labs(subtitle = args$subtitle)
}
if ("caption" %in% names(args)) {
g <- g + labs(caption = args$caption)
}
if ("xlab" %in% names(args)) {
g <- g + labs(x = args$xlab)
}
if (!"ylab" %in% names(args)) {
args$ylab <- ""
}
g <- g + labs(y = args$ylab)
# Set axis limits if provided
if ("xlim" %in% names(args)) {
g <- g + coord_cartesian(xlim = args$xlim)
}
if ("ylim" %in% names(args)) {
g <- g + ylim(args$ylim)
}
# Adjust x-axis tick marks if provided and logodds is FALSE
if ("xtickbreaks" %in% names(args) & !logodds) {
g <- g + scale_x_continuous(breaks = args$xtickbreaks)
}
return(g)
}
# Create the forest plot function with added labels
forest_plot_data <- function(proteins, data, main_comparison) {
# Define the desired order for Carrier_comparison based on main_comparison
comparison_levels <- if (main_comparison == "C") {
c("CvsB", "AvsB")
} else {
c("AvsB", "CvsB")
}
# Filter data for selected proteins
data <- data %>%
filter(Assay %in% proteins)
# Ensure the Carrier_comparison is a factor with the desired level order
data$Carrier_comparison <- factor(data$Carrier_comparison, levels = comparison_levels)
# Plot
plot <- forestplot_trend(
df = data,
name = Assay, # Use combined label for y-axis
estimate = BETA,
se = SE,
pvalue = P_uncorrected,
ptrend = QM_P_trend, # Add QM_P_trend values
psignif = 0.01, # fill estimate if significant at this level
ci = 0.95,
logodds = FALSE,
xlab = "Beta (95% confidence intervals)",
ylab = "Analyte",
colour = Tertile,
shape = Tertile,
position = position_dodge(width = 0.7) # Increase dodge width for better separation
) +
facet_grid(. ~ Carrier_comparison, scales = "free", space = "free") +
theme(
text = element_text(family = "Arial"), # Change "Arial" to your preferred font
strip.text = element_text(size = 16, color = "black", hjust = 0.5, vjust = 5.5, margin = margin(t = 15, b = 15)),
axis.text.x = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black"),
axis.title.x = element_text(size = 14, color = "black", vjust = 0.5),
axis.title.y = element_text(size = 14, color = "black", face = "bold", angle = 0, vjust = 1.00, margin = margin(r = -50)), # Rotated y-axis title
legend.title = element_text(size = 14, color = "black"),
legend.text = element_text(size = 12, color = "black"), # Customize legend labels
legend.key.size = unit(1.5, "lines")
)
# Add "p-value" label above p-values in each panel
plot <- plot +
geom_text(
data = data.frame(Carrier_comparison = comparison_levels,
label = "bolditalic('p') * bold('-value')"), # Correct expression for bold and italic
aes(x = Inf, y = Inf, label = label),
hjust = 1.1, vjust = 1.5, size = 4, family = "Arial", color = "black",
parse = TRUE # Use parse = TRUE to interpret the label as an expression
)
return(plot)
}
# Create list of proteins
proteins <- levels(data$Assay)
# Generate plots
CvsB_forest_plot <- forest_plot_data(proteins, data, main_comparison = "C")
# Display the plot
CvsB_forest_plot