Tutorial 4: Analysis of Starmap hippocampus
In this tutorial, we will conduct a downstream analysis of the reconstructed data obtained after training the stCAMBL model. The trained data can be obtained from the network disk. Please note that this is an R tutorial file, please run Celina analysis in the R environment.
Read the data
[ ]:
# read raw data
load('/media/data1/ai4bread_lab/lyfu/experiment/Celina/Data/hippocampus_scRNA_reference.RData')
load('/media/data1/ai4bread_lab/lyfu/experiment/Celina/Data/starmap_plus_data_use.RData')
# data trained by stCAMBL
load('/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/Hippocampus_Analysis/hip_stCAMBL.RData')
names(data_use_stCAMBL)
expr_use <- data_use_stCAMBL$X_recon
location_use <- data_use_stCAMBL$location
celltype_use <- data_use$celltype_proportion
- 'count'
- 'meta'
- 'location'
- 'X_emb'
- 'X_recon'
Perform deconvolution
[ ]:
options(warn = -1)
library(CARD)
location_use2 <- location_use
colnames(location_use2) <- c("x", "y")
CARD_obj <- createCARDObject(
sc_count = scRNA_count_subset,
sc_meta = sc_meta_in_subset,
spatial_count = as.matrix(expr_use),
spatial_location = location_use2,
ct.varname = "cellType",
ct.select = unique(sc_meta_in_subset$cellType),
sample.varname = "sampleInfo",
minCountGene = 100,
minCountSpot = 20
)
CARD_obj <- CARD_deconvolution(CARD_object = CARD_obj)
Registered S3 methods overwritten by 'registry':
method from
print.registry_field proxy
print.registry_entry proxy
## QC on scRNASeq dataset! ...
## QC on spatially-resolved dataset! ...
## create reference matrix from scRNASeq...
载入需要的程序包:nnls
载入需要的程序包:ggplot2
载入需要的程序包:TOAST
载入需要的程序包:EpiDISH
载入需要的程序包:limma
载入程序包:‘limma’
The following object is masked from ‘package:BiocGenerics’:
plotMA
载入需要的程序包:quadprog
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
MuSiC v1.0.0 support SingleCellExperiment! See Tutorial: https://xuranw.github.io/MuSiC/articles/MuSiC.html
MuSiC2 for multi-condition bulk RNA-seq data is also available!
## Select Informative Genes! ...
## Deconvolution Starts! ...
## Deconvolution Finish! ...
Perform CELINA
[ ]:
library(CELINA)
Obj <- Create_Celina_Object(
celltype_mat = t(celltype_use), # spot × celltype
gene_expression_mat = as.matrix(expr_use), # gene × spot
location = as.matrix(location_use), # spot × 2, class must be data.frame!
covariates = NULL,
project = "starmap"
)
# if you want to further filter cell types based on their total proportion across spots, or you only want to test a subset of cell types, you can select the cell types here:
filtered_cell_types = colnames(data_use$celltype_proportion)[which(colSums(data_use$celltype_proportion) >150)]
Obj = preprocess_input(Obj,
# Celina object
cell_types_to_test = filtered_cell_types,
# a vector of cell types to be used for testing
scRNA_count = as.matrix(scRNA_count_subset),
# a gene x cell expression matrix of reference scRNA-seq data
sc_cell_type_labels = sc_meta_in_subset$cellType,
# a vector of cell type labels for each cell in scRNA_count
threshold = 5e-5)
print(names(Obj@genes_list))
Obj <- Calculate_Kernel(Obj, approximation = FALSE)
Obj = Testing_interaction_all(Obj,celltype_to_test = 'Microglia', num_cores=10)
save(Obj, file = "/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/Hippocampus_Analysis/CELINA_Obj_workspace.RData")
filtering genes based on threshold = 5e-05
Get gene list for each cell type.
[1] "Interneuron" "Entorhinal cortex"
[3] "Deep layer subiculum" "Entorhinal cortex (IEG)"
[5] "Subiculum" "Dentate Principal cells"
[7] "Medial entorrhinal cortex" "Medial entorrhinal cortexm"
[9] "Postsubiculum" "CA3 Principal cells"
[11] "Astrocyte" "Oligodendrocyte"
[13] "MyelinProcesses" "Microglia"
[15] "Resident macrophage" "Ependymal"
[17] "Endothelial_Stalk"
Screen genes
[ ]:
library(CELINA)
tmp_env <- new.env()
load("/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/Hippocampus_Analysis/CELINA_Obj_workspace.RData", envir = tmp_env)
Obj <- tmp_env$Obj
names(Obj@result)
for (celltype in names(Obj@result)) {
colnames(Obj@result[[celltype]]) <- "pval"
}
Significant_gene_list_Celina <- list()
fdr_threshold <- 0.00005
for (celltype in names(Obj@result)) {
df <- Obj@result[[celltype]]
if (is.null(df) || nrow(df) == 0) next
pvals <- df[[1]]
fdr <- p.adjust(pvals, method = "BH")
sig_genes <- rownames(df)[which(fdr < fdr_threshold)]
Significant_gene_list_Celina[[celltype]] <- sig_genes
}
[ ]:
Significant_gene_list_Celina[['Microglia']]
- 'Abl1'
- 'Actr2'
- 'Adam10'
- 'Anpep'
- 'Arhgap39'
- 'Arhgdia'
- 'Arhgef2'
- 'Arrb2'
- 'Atp6v1f'
- 'Baz1a'
- 'Bin1'
- 'C1qa'
- 'Ccl6'
- 'Ccnd1'
- 'Ccr2'
- 'Cd81'
- 'Cd83'
- 'Cd9'
- 'Cdc42'
- 'Cfl1'
- 'Clcf1'
- 'Creb1'
- 'Csrnp1'
- 'Cst3'
- 'Cst7'
- 'Ctsl'
- 'Ctss'
- 'Cul1'
- 'Cxcl10'
- 'Dbnl'
- 'Dnm2'
- 'Dock10'
- 'Dtnbp1'
- 'Ebf3'
- 'Eef2k'
- 'Elavl1'
- 'Fcgr1'
- 'Fcrls'
- 'Ftl1'
- 'Fubp1'
- 'Gdf15'
- 'Gdi2'
- 'Golm1'
- 'Grn'
- 'Hexb'
- 'Hnrnpa3'
- 'Hnrnpab'
- 'Hnrnph1'
- 'Hnrnpk'
- 'Hsd17b4'
- 'Itgb5'
- 'Itm2b'
- 'Itpr2'
- 'Kansl1'
- 'Kcnj2'
- 'Kctd12'
- 'Lhfpl2'
- 'Lpcat2'
- 'Lrpap1'
- 'Lrpprc'
- 'Ly86'
- 'Lyn'
- 'Maml3'
- 'Manf'
- 'Mapk14'
- 'Marcks'
- 'Mertk'
- 'Nav3'
- 'Nf1'
- 'Nr3c1'
- 'Nufip1'
- 'Numb'
- 'Olfml3'
- 'Ophn1'
- 'P2ry12'
- 'Pabpc1'
- 'Pacsin2'
- 'Pak2'
- 'Picalm'
- 'Pld1'
- 'Plekho1'
- 'Ppp1ca'
- 'Ppp1cc'
- 'Ptpra'
- 'Ptpro'
- 'Pum2'
- 'Rab14'
- 'Rab1b'
- 'Rab5a'
- 'Rac1'
- 'Rap1a'
- 'Rela'
- 'Rgs2'
- 'Rin3'
- 'Rinl'
- 'Sdcbp'
- 'Sdhb'
- 'Selplg'
- 'Sema4d'
- 'Serpine2'
- 'Sfpq'
- 'Smad7'
- 'Sparc'
- 'Srrm2'
- 'Stag1'
- 'Stat3'
- 'Stau1'
- 'Stk40'
- 'Tapbp'
- 'Tbxas1'
- 'Tfap2c'
- 'Timp2'
- 'Tmem176b'
- 'Tmsb4x'
- 'Trem2'
- 'Ttr'
- 'Ubc'
- 'Vapa'
- 'Vcp'
- 'Vps16'
- 'Vps33b'
- 'Whrn'
- 'Xbp1'
- 'Zc3h7a'
- 'Zcwpw1'
- 'Zdhhc5'
Visualize cell type distribution, gene expression, and Aβ plaques
[ ]:
library(ggplot2)
library(scater)
library(ggpubr)
library(pdist)
library(viridis)
# Data preprocessing
expr_use_scater_norm <- normalizeCounts(expr_use)
plaque <- read.csv("/media/data1/ai4bread_lab/lyfu/experiment/Celina/Data/plaque_13months-disease-replicate_1.csv")
# Calculate distance from each cell to the nearest plaque
calculate_plaque_distance <- function(location_data, plaque_data) {
dist_p <- c()
for(cc in 1:nrow(location_data)){
dist_p[cc] <- min(pdist(location_data[cc,], plaque_data[,1:2])@dist)
}
return(dist_p)
}
# Plotting function
plot_factor_value <- function(location, feature, textmethod, pointsize = 2, textsize = 15,
shape = 15, lowcolor = "#05a8aa", midcolor = "#edede9",
highcolor = "#bc412b", featurename = "Feature") {
location <- as.data.frame(location)
datt <- data.frame(feature = feature, x = location[, 1], y = location[, 2])
p <- ggplot(datt, aes(x = x, y = y, color = feature)) +
geom_point(size = pointsize, alpha = 0.9, shape = shape) +
scale_color_gradient2(low = lowcolor, mid = midcolor, high = highcolor) +
ggtitle(textmethod) +
theme_void() +
theme(plot.title = element_text(size = textsize),
text = element_text(size = textsize),
legend.position = "right",
legend.title = element_text(family = "serif"),
legend.text = element_text(family = "serif")) +
labs(color = featurename)
return(p)
}
# Plaque plotting function
plot_plaque_cells <- function(location_data, plaque_data, textsize = 15) {
# Prepare plaque data
datt_plaque <- plaque_data[, c("m.cx", "m.cy", "s.radius.mean")]
colnames(datt_plaque) <- c("x", "y", "radius")
datt_plaque$Type <- "Plaque"
datt_plaque$Size <- (plaque_data$s.radius.mean - min(plaque_data$s.radius.mean)) /
(max(plaque_data$s.radius.mean) - min(plaque_data$s.radius.mean)) * 4
# Prepare cell data
dattt_cells <- location_data
dattt_cells$radius <- 0.5
dattt_cells$Size <- 0.5
colnames(dattt_cells) <- c("x", "y", "radius", "Size")
dattt_cells$Type <- "Cell"
# Combine data
dat_fig <- rbind(dattt_cells, datt_plaque)
dat_fig$Type <- factor(dat_fig$Type, levels = c("Plaque", "Cell"), ordered = TRUE)
# Plot
p <- ggplot() +
scale_color_viridis(discrete = TRUE) +
geom_point(data = dat_fig, aes(x = x, y = y, color = Type, shape = Type),
alpha = 0.9, size = dat_fig$Size) +
ggtitle("Plaques & Cells Distribution") +
theme_void() +
theme(plot.title = element_text(size = textsize, family = 'serif'),
text = element_text(size = textsize, family = 'serif'),
legend.position = "right",
legend.title = element_text(family = "serif"),
legend.text = element_text(family = "serif")) +
labs(color = NULL, shape = NULL) +
guides(colour = guide_legend(override.aes = list(size = 4)))
return(p)
}
# Comprehensive plotting function
plot_comprehensive_spatial <- function(celltype, gene_name,
plaque_file_path = NULL,
output_file = "Data/hippocampus/comprehensive_spatial.pdf",
width = 18, height = 6) {
# Check if cell type exists
if (!celltype %in% colnames(data_use$celltype_proportion)) {
stop(paste("Cell type", celltype, "does not exist in the data"))
}
# Check if gene exists
if (!gene_name %in% rownames(expr_use_scater_norm)) {
stop(paste("Gene", gene_name, "does not exist in the expression matrix"))
}
message("Plotting cell type:", celltype)
message("Plotting gene:", gene_name)
# Plot cell type proportion
prop <- data_use$celltype_proportion[, celltype]
prop_size <- ((prop - min(prop)) / (max(prop) - min(prop))) * 3
p1 <- plot_factor_value(location_use, prop,
textmethod = paste(celltype, "Proportion"),
pointsize = prop_size,
textsize = 14,
shape = 16,
lowcolor = "#05a8aa",
midcolor = "#edede9",
highcolor = "#bc412b",
featurename = "Proportion") +
scale_color_viridis(option = "C") +
labs(color = NULL)
# Plot gene expression
expr <- expr_use_scater_norm[gene_name, ]
expr_size <- ((expr - min(expr)) / (max(expr) - min(expr))) * 3
p2 <- plot_factor_value(location_use, expr,
textmethod = paste(gene_name, "Expression"),
pointsize = expr_size,
textsize = 14,
shape = 16,
lowcolor = "#faedcd",
midcolor = "#edede9",
highcolor = "#0077b6",
featurename = "Expression") +
scale_color_viridis(option = "D") +
labs(color = NULL)
# Plot plaque distribution
if (!is.null(plaque_file_path)) {
plaque_data <- read.csv(plaque_file_path)
} else {
# Use global variable plaque
if (exists("plaque")) {
plaque_data <- plaque
} else {
stop("Please provide plaque data file path or ensure plaque data is loaded")
}
}
p3 <- plot_plaque_cells(data_use$location_use, plaque_data, textsize = 14)
# Combine plots
combined_plot <- ggarrange(p1, p2, p3, ncol = 3, nrow = 1)
# Save PDF
ggsave(output_file, combined_plot,
width = width, height = height,
device = "pdf")
message("Plot saved to:", output_file)
# Calculate and display distance information
dist_p <- calculate_plaque_distance(data_use$location_use, plaque_data[,1:2])
message("Average distance from cells to nearest plaque:", round(mean(dist_p), 2))
message("Median distance from cells to nearest plaque:", round(median(dist_p), 2))
return(list(plot = combined_plot, distances = dist_p))
}
# Simplified usage function
quick_comprehensive_plot <- function(celltype, gene, filename = NULL, plaque_path = NULL) {
# Auto-generate filename if not specified
if (is.null(filename)) {
filename <- paste0("comprehensive_", celltype, "_", gene, ".pdf")
}
result <- plot_comprehensive_spatial(
celltype = celltype,
gene_name = gene,
plaque_file_path = plaque_path,
output_file = filename,
width = 18,
height = 6
)
return(result)
}
# Call function
# Basic usage
result <- plot_comprehensive_spatial(
celltype = "Microglia",
gene_name = "P2ry12",
plaque_file_path = "/media/data1/ai4bread_lab/lyfu/experiment/Celina/Data/plaque_13months-disease-replicate_1.csv",
output_file = "/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_P2ry12_comprehensive.pdf"
)
载入需要的程序包:SingleCellExperiment
载入需要的程序包:SummarizedExperiment
载入需要的程序包:MatrixGenerics
载入需要的程序包:matrixStats
载入程序包:‘matrixStats’
The following objects are masked from ‘package:wrMisc’:
colSds, rowSds
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
载入程序包:‘MatrixGenerics’
The following objects are masked from ‘package:matrixStats’:
colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
colWeightedMeans, colWeightedMedians, colWeightedSds,
colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
rowWeightedSds, rowWeightedVars
The following objects are masked from ‘package:wrMisc’:
colSds, rowSds
The following object is masked from ‘package:Biobase’:
rowMedians
载入需要的程序包:GenomicRanges
载入需要的程序包:stats4
载入需要的程序包:S4Vectors
载入程序包:‘S4Vectors’
The following object is masked from ‘package:utils’:
findMatches
The following objects are masked from ‘package:base’:
expand.grid, I, unname
载入需要的程序包:IRanges
载入需要的程序包:GenomeInfoDb
载入需要的程序包:scuttle
载入程序包:‘scater’
The following object is masked from ‘package:limma’:
plotMDS
载入需要的程序包:viridisLite
Plotting cell type:Microglia
Plotting gene:P2ry12
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Plot saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_P2ry12_comprehensive.pdf
Average distance from cells to nearest plaque:327.26
Median distance from cells to nearest plaque:261.5
Plot the distribution of stCAMBL reconstruction data at a distance from the plaque for specific cell types
Microglia often cluster around amyloid-β plaques, including so-called disease-associated microglia (DAMs), which are highly expressive of genes associated with phagocytosis and inflammation, such as Grn. You can also replace gene names to analyze other genes.
[ ]:
library(ggplot2)
library(scater)
library(ggpubr)
library(pdist)
library(viridis)
library(dplyr)
library(RColorBrewer)
PLAQUE_COLOR <- "#8B4B9F"
plaque = read.csv("/media/data1/ai4bread_lab/lyfu/experiment/Celina/Data/plaque_13months-disease-replicate_1.csv")
# Data preprocessing
expr_use_scater_norm <- normalizeCounts(expr_use)
# Cell distance classification function
classify_cells_by_plaque_distance_celltype <- function(location_data, plaque_data,
celltype_proportion, celltype_name,
celltype_threshold = 0.5, # Cell type threshold
distance_groups = list(c(10, 20), c(50, Inf)),
group_names = NULL) {
# Check if cell type exists
if (!celltype_name %in% colnames(celltype_proportion)) {
stop(paste("Cell type", celltype_name, "does not exist in cell type proportion matrix"))
}
cell_info <- data.frame(
x = location_data[,1],
y = location_data[,2],
cell_id = 1:nrow(location_data),
min_distance = NA,
nearest_plaque_id = NA,
distance_group = "Other",
celltype_prop = celltype_proportion[, celltype_name], # Cell type proportion
is_target_celltype = FALSE
)
# Filter target cell types
cell_info$is_target_celltype <- cell_info$celltype_prop >= celltype_threshold
message(paste("Cell type threshold:", celltype_threshold))
message(paste("Number of qualified cells:", sum(cell_info$is_target_celltype)))
message(paste("Total number of cells:", nrow(cell_info)))
message(paste("Cell type proportion range:", round(min(cell_info$celltype_prop), 3), "-", round(max(cell_info$celltype_prop), 3)))
# Calculate distance from each cell to all plaques
for(i in 1:nrow(cell_info)) {
distances <- pdist(location_data[i,], plaque_data[,1:2])@dist
cell_info$min_distance[i] <- min(distances)
cell_info$nearest_plaque_id[i] <- which.min(distances)
}
# Distance grouping for target cell types
target_cells <- which(cell_info$is_target_celltype)
# Group by distance intervals
for(i in 1:length(distance_groups)) {
group_range <- distance_groups[[i]]
lower_bound <- group_range[1]
upper_bound <- group_range[2]
# Create group names
if(!is.null(group_names) && length(group_names) >= i) {
group_name <- group_names[i]
} else {
if(is.infinite(upper_bound)) {
group_name <- paste0(">", lower_bound, "um")
} else if(lower_bound == 0) {
group_name <- paste0("≤", upper_bound, "um")
} else {
group_name <- paste0(lower_bound, "-", upper_bound, "um")
}
}
# Group target cell types
if(is.infinite(upper_bound)) {
in_group <- target_cells[cell_info$min_distance[target_cells] > lower_bound]
} else {
in_group <- target_cells[cell_info$min_distance[target_cells] >= lower_bound &
cell_info$min_distance[target_cells] <= upper_bound]
}
cell_info$distance_group[in_group] <- group_name
}
# Add numeric group labels for subsequent analysis
unique_groups <- unique(cell_info$distance_group[cell_info$is_target_celltype])
cell_info$group_numeric <- NA
cell_info$group_numeric[cell_info$is_target_celltype] <- as.numeric(factor(
cell_info$distance_group[cell_info$is_target_celltype],
levels = unique_groups
))
return(cell_info)
}
# Create plaque boundary circle data
create_plaque_circles <- function(plaque_data, n_points = 100) {
circle_data <- data.frame()
for(i in 1:nrow(plaque_data)) {
# Create circle points
theta <- seq(0, 2*pi, length.out = n_points)
x_circle <- plaque_data$m.cx[i] + plaque_data$s.radius.mean[i] * cos(theta)
y_circle <- plaque_data$m.cy[i] + plaque_data$s.radius.mean[i] * sin(theta)
temp_circle <- data.frame(
x = x_circle,
y = y_circle,
plaque_id = i,
radius = plaque_data$s.radius.mean[i]
)
circle_data <- rbind(circle_data, temp_circle)
}
return(circle_data)
}
# Modified main plotting function - adding cell type analysis
plot_plaque_celltype_distance_expression <- function(gene_name,
celltype_name, # Cell type name
celltype_threshold = 0.5, # Cell type threshold
plaque_file_path = NULL,
distance_groups = list(c(0, 20), c(50, Inf)),
group_names = NULL,
output_file = NULL,
save_individual = TRUE,
show_non_target_cells = TRUE,
width = 14, height = 10) {
# Check if gene exists
if (!gene_name %in% rownames(expr_use_scater_norm)) {
stop(paste("Gene", gene_name, "does not exist in expression matrix"))
}
# Check if cell type exists
if (!celltype_name %in% colnames(data_use$celltype_proportion)) {
available_types <- colnames(data_use$celltype_proportion)
stop(paste("Cell type", celltype_name, "does not exist in cell type proportion matrix.\nAvailable cell types:",
paste(available_types, collapse = ", ")))
}
# Read plaque data
if (!is.null(plaque_file_path)) {
plaque_data <- read.csv(plaque_file_path)
} else {
if (exists("plaque")) {
plaque_data <- plaque
} else {
stop("Please provide plaque data file path or ensure plaque data is loaded")
}
}
message("Analyzing stCAMBL data - Cell type specific analysis")
message("Analyzing gene:", gene_name)
message("Cell type:", celltype_name)
message("Distance groups:", paste(sapply(distance_groups, function(x) {
if(is.infinite(x[2])) paste0(">", x[1]) else paste0(x[1], "-", x[2])
}), collapse = ", "))
# Classify cells
cell_info <- classify_cells_by_plaque_distance_celltype(
data_use$location_use,
plaque_data,
data_use$celltype_proportion,
celltype_name,
celltype_threshold,
distance_groups,
group_names
)
# Get gene expression data
gene_expr <- expr_use_scater_norm[gene_name, ]
cell_info$gene_expression <- gene_expr
# Create plaque boundary circles
plaque_circles <- create_plaque_circles(plaque_data)
# Prepare plotting data
if (show_non_target_cells) {
plot_data <- cell_info
# Set different transparency and size for different cell types
plot_data$alpha_val <- ifelse(!plot_data$is_target_celltype, 0.1,
ifelse(plot_data$distance_group == "Other", 0.3, 0.8))
plot_data$size_val <- ifelse(!plot_data$is_target_celltype, 0.3,
ifelse(plot_data$distance_group == "Other", 0.8, 1.5))
} else {
# Only show target cell types
plot_data <- cell_info[cell_info$is_target_celltype, ]
plot_data$alpha_val <- ifelse(plot_data$distance_group == "Other", 0.5, 0.9)
plot_data$size_val <- ifelse(plot_data$distance_group == "Other", 1.0, 1.8)
}
# Get all groups in target cell type and set colors
target_cell_data <- cell_info[cell_info$is_target_celltype, ]
all_groups <- unique(target_cell_data$distance_group)
n_groups <- length(all_groups)
# Set colors for different groups
if(n_groups <= 8) {
group_colors <- brewer.pal(max(3, n_groups), "Set2")[1:n_groups]
} else {
group_colors <- rainbow(n_groups)
}
names(group_colors) <- all_groups
# Add gray for non-target cells if showing them
if (show_non_target_cells) {
group_colors["Non_Target"] <- "lightgray"
plot_data$display_group <- ifelse(plot_data$is_target_celltype,
plot_data$distance_group, "Non_Target")
} else {
plot_data$display_group <- plot_data$distance_group
}
# Statistics
group_stats <- target_cell_data %>%
group_by(distance_group) %>%
summarise(
count = n(),
mean_expr = mean(gene_expression, na.rm = TRUE),
median_expr = median(gene_expression, na.rm = TRUE),
mean_celltype_prop = mean(celltype_prop, na.rm = TRUE),
.groups = 'drop'
)
message("Target cell type group statistics:")
print(group_stats)
# Create main plot 1: Cell type proportion plot
p1 <- ggplot() +
# Plot cell points colored by cell type proportion
geom_point(data = plot_data,
aes(x = x, y = y, color = celltype_prop, size = display_group, alpha = display_group)) +
# Plot plaque boundaries
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = PLAQUE_COLOR, size = 1.2, alpha = 0.8) +
# Color and size settings
scale_color_viridis_c(option = "A", name = paste(celltype_name, "\nProportion")) +
scale_size_manual(values = setNames(plot_data$size_val[match(unique(plot_data$display_group), plot_data$display_group)],
unique(plot_data$display_group)), guide = "none") +
scale_alpha_manual(values = setNames(plot_data$alpha_val[match(unique(plot_data$display_group), plot_data$display_group)],
unique(plot_data$display_group)), guide = "none") +
# Theme settings
theme_void() +
theme(
plot.title = element_text(size = 26, hjust = 0.5),
text = element_text(size = 26),
legend.position = "right",
legend.title = element_text(size = 26),
legend.text = element_text(size = 26),
legend.key.size = unit(1.0, "cm")
) +
ggtitle(paste0(celltype_name, " Distribution")) +
coord_fixed()
# Create main plot 2: Distance group plot
target_plot_data <- plot_data[plot_data$is_target_celltype, ]
p2 <- ggplot() +
# Plot cell points colored by distance group
geom_point(data = target_plot_data,
aes(x = x, y = y, color = distance_group),
size = 1.5, alpha = 0.8) +
# Plot plaque boundaries
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = PLAQUE_COLOR, size = 1.2, alpha = 0.8) +
# Color settings
scale_color_manual(values = group_colors, name = "Distance Group") +
# Theme settings
theme_void() +
theme(
plot.title = element_text(size = 26, hjust = 0.5),
text = element_text(size = 26),
legend.position = "right",
legend.title = element_text(size = 26),
legend.text = element_text(size = 26),
legend.key.size = unit(1.0, "cm")
) +
ggtitle(paste0(celltype_name, " Distance Groups")) +
coord_fixed()
# Create spatial gene expression plot
p3 <- ggplot() +
# Plot cell points colored by gene expression
geom_point(data = target_plot_data,
aes(x = x, y = y, color = gene_expression, shape = distance_group),
size = 1.8, alpha = 0.8) +
# Plot plaque boundaries
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = PLAQUE_COLOR, size = 1.2, alpha = 0.8) +
# Color settings
scale_color_viridis_c(option = "D", name = paste(gene_name, "\nExpression")) +
scale_shape_manual(values = c(16, 17, 15, 18, 19, 8, 11, 12)[1:n_groups], name = "Distance Group") +
# Theme settings
theme_void() +
theme(
plot.title = element_text(size = 26, hjust = 0.5),
text = element_text(size = 26),
legend.position = "right",
legend.title = element_text(size = 26),
legend.text = element_text(size = 26),
legend.key.size = unit(0.8, "cm")
) +
ggtitle(paste0(gene_name, " in ", celltype_name)) +
coord_fixed()
# Create expression comparison boxplot
plot_data_for_box <- target_cell_data[target_cell_data$distance_group != "Other", ]
p4 <- ggplot(plot_data_for_box, aes(x = distance_group, y = gene_expression, fill = distance_group)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.6, size = 1.0) +
scale_fill_manual(values = group_colors) +
labs(x = "Distance Group", y = paste(gene_name, "Expression"),
title = paste(gene_name, "in", celltype_name)) +
theme_minimal() +
theme(
text = element_text(size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
axis.title.x = element_text(size = 26),
axis.title.y = element_text(size = 26),
axis.text.x = element_text(size = 26, angle = 0, hjust = 0.5),
axis.text.y = element_text(size = 26),
legend.position = "none"
) +
stat_compare_means(method = "kruskal.test", label = "p.format",
size = 5)
# Combine plots
combined_plot <- ggarrange(
ggarrange(p1, p2, ncol = 2, widths = c(1, 1)),
ggarrange(p3, p4, ncol = 2, widths = c(1, 1)),
nrow = 2, heights = c(1, 1)
)
# Save plots
if (!is.null(output_file)) {
ggsave(output_file, combined_plot,
width = width, height = height,
device = "pdf")
message("Combined plot saved to:", output_file)
if (save_individual) {
file_parts <- tools::file_path_sans_ext(output_file)
file_ext <- tools::file_ext(output_file)
if (file_ext == "") file_ext <- "pdf"
# Save individual subplots
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_celltype_distribution.", "_no_csl", file_ext), p1,
width = width * 0.5, height = height * 0.5, device = "pdf")
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_distance_groups.","_no_csl", file_ext), p2,
width = width * 0.5, height = height * 0.5, device = "pdf")
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_expression_spatial.","_no_csl", file_ext), p3,
width = width * 0.5, height = height * 0.5, device = "pdf")
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_expression_comparison.","_no_csl", file_ext), p4,
width = width * 0.5, height = height * 0.5, device = "pdf")
}
}
# Return results
return(list(
combined_plot = combined_plot,
celltype_distribution_plot = p1,
distance_groups_plot = p2,
expression_spatial_plot = p3,
expression_comparison_plot = p4,
cell_info = cell_info,
target_cell_stats = group_stats,
celltype_summary = list(
total_cells = nrow(cell_info),
target_cells = sum(cell_info$is_target_celltype),
target_percentage = round(sum(cell_info$is_target_celltype) / nrow(cell_info) * 100, 2)
)
))
}
# Set cell type name
cc <- "Microglia"
result_celltype_detailed <- plot_plaque_celltype_distance_expression(
# Picalm, Grn, Cfl1 ,Trem2
gene_name = "Grn",
celltype_name = cc,
celltype_threshold = 0.1,
distance_groups = list(c(0, 15), c(20, 40), c(50, 80), c(500, Inf)),
output_file = paste0("/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Grn_", cc, "_detailed.pdf"),
save_individual = FALSE,
show_non_target_cells = FALSE,
width = 16,
height = 12
)
Analyzing stCAMBL data - Cell type specific analysis
Analyzing gene:Trem2
Cell type:Microglia
Distance groups:0-15, 20-40, 50-80, >500
Cell type threshold: 0.1
Number of qualified cells: 1768
Total number of cells: 9244
Cell type proportion range: 0 - 0.933
Target cell type group statistics:
# A tibble: 5 × 5
distance_group count mean_expr median_expr mean_celltype_prop
<chr> <int> <dbl> <dbl> <dbl>
1 20-40um 131 0.688 0.672 0.475
2 50-80um 177 0.678 0.653 0.393
3 >500um 251 0.556 0.537 0.343
4 Other 1195 0.591 0.575 0.303
5 ≤15um 14 0.661 0.675 0.553
Combined plot saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Trem2_Microglia_detailed.pdf
For comparison, we do the same analysis of the raw data
[ ]:
library(ggplot2)
library(scater)
library(ggpubr)
library(pdist)
library(viridis)
library(dplyr)
library(RColorBrewer)
PLAQUE_COLOR <- "#8B4B9F"
plaque = read.csv("/media/data1/ai4bread_lab/lyfu/experiment/Celina/Data/plaque_13months-disease-replicate_1.csv")
# Data preprocessing
expr_use_scater_norm_raw <- normalizeCounts(data_use$count_use)
# Cell distance classification function
classify_cells_by_plaque_distance_celltype <- function(location_data, plaque_data,
celltype_proportion, celltype_name,
celltype_threshold = 0.5,
distance_groups = list(c(10, 20), c(50, Inf)),
group_names = NULL) {
# Check if cell type exists
if (!celltype_name %in% colnames(celltype_proportion)) {
stop(paste("Cell type", celltype_name, "does not exist in cell type proportion matrix"))
}
cell_info <- data.frame(
x = location_data[,1],
y = location_data[,2],
cell_id = 1:nrow(location_data),
min_distance = NA,
nearest_plaque_id = NA,
distance_group = "Other",
celltype_prop = celltype_proportion[, celltype_name],
is_target_celltype = FALSE # Whether it is target cell type
)
# Filter target cell types
cell_info$is_target_celltype <- cell_info$celltype_prop >= celltype_threshold
message(paste("Cell type threshold:", celltype_threshold))
message(paste("Number of qualified cells:", sum(cell_info$is_target_celltype)))
message(paste("Total number of cells:", nrow(cell_info)))
message(paste("Cell type proportion range:", round(min(cell_info$celltype_prop), 3), "-", round(max(cell_info$celltype_prop), 3)))
# Calculate distance from each cell to all plaques
for(i in 1:nrow(cell_info)) {
distances <- pdist(location_data[i,], plaque_data[,1:2])@dist
cell_info$min_distance[i] <- min(distances)
cell_info$nearest_plaque_id[i] <- which.min(distances)
}
# Distance grouping for target cell types
target_cells <- which(cell_info$is_target_celltype)
# Group by distance intervals
for(i in 1:length(distance_groups)) {
group_range <- distance_groups[[i]]
lower_bound <- group_range[1]
upper_bound <- group_range[2]
# Create group names
if(!is.null(group_names) && length(group_names) >= i) {
group_name <- group_names[i]
} else {
if(is.infinite(upper_bound)) {
group_name <- paste0(">", lower_bound, "um")
} else if(lower_bound == 0) {
group_name <- paste0("≤", upper_bound, "um")
} else {
group_name <- paste0(lower_bound, "-", upper_bound, "um")
}
}
# Group target cell types
if(is.infinite(upper_bound)) {
in_group <- target_cells[cell_info$min_distance[target_cells] > lower_bound]
} else {
in_group <- target_cells[cell_info$min_distance[target_cells] >= lower_bound &
cell_info$min_distance[target_cells] <= upper_bound]
}
cell_info$distance_group[in_group] <- group_name
}
# Add numeric group labels for subsequent analysis
unique_groups <- unique(cell_info$distance_group[cell_info$is_target_celltype])
cell_info$group_numeric <- NA
cell_info$group_numeric[cell_info$is_target_celltype] <- as.numeric(factor(
cell_info$distance_group[cell_info$is_target_celltype],
levels = unique_groups
))
return(cell_info)
}
# Create plaque boundary circle data
create_plaque_circles <- function(plaque_data, n_points = 100) {
circle_data <- data.frame()
for(i in 1:nrow(plaque_data)) {
# Create circle points
theta <- seq(0, 2*pi, length.out = n_points)
x_circle <- plaque_data$m.cx[i] + plaque_data$s.radius.mean[i] * cos(theta)
y_circle <- plaque_data$m.cy[i] + plaque_data$s.radius.mean[i] * sin(theta)
temp_circle <- data.frame(
x = x_circle,
y = y_circle,
plaque_id = i,
radius = plaque_data$s.radius.mean[i]
)
circle_data <- rbind(circle_data, temp_circle)
}
return(circle_data)
}
# Plotting function
plot_plaque_celltype_distance_expression <- function(gene_name,
celltype_name, # Cell type name
celltype_threshold = 0.5, # Cell type threshold
plaque_file_path = NULL,
distance_groups = list(c(0, 20), c(50, Inf)),
group_names = NULL,
output_file = NULL,
save_individual = TRUE,
show_non_target_cells = TRUE, # Whether to show non-target cell types
width = 14, height = 10) {
# Check if gene exists
if (!gene_name %in% rownames(expr_use_scater_norm_raw)) {
stop(paste("Gene", gene_name, "does not exist in expression matrix"))
}
# Check if cell type exists
if (!celltype_name %in% colnames(data_use$celltype_proportion)) {
available_types <- colnames(data_use$celltype_proportion)
stop(paste("Cell type", celltype_name, "does not exist in cell type proportion matrix.\nAvailable cell types:",
paste(available_types, collapse = ", ")))
}
# Read plaque data
if (!is.null(plaque_file_path)) {
plaque_data <- read.csv(plaque_file_path)
} else {
if (exists("plaque")) {
plaque_data <- plaque
} else {
stop("Please provide plaque data file path or ensure plaque data is loaded")
}
}
message("Analyzing Raw data - Cell type specific analysis")
message("Analyzing gene:", gene_name)
message("Cell type:", celltype_name)
message("Distance groups:", paste(sapply(distance_groups, function(x) {
if(is.infinite(x[2])) paste0(">", x[1]) else paste0(x[1], "-", x[2])
}), collapse = ", "))
# Classify cells
cell_info <- classify_cells_by_plaque_distance_celltype(
data_use$location_use,
plaque_data,
data_use$celltype_proportion,
celltype_name,
celltype_threshold,
distance_groups,
group_names
)
# Get gene expression data
gene_expr <- expr_use_scater_norm_raw[gene_name, ]
cell_info$gene_expression <- gene_expr
# Create plaque boundary circles
plaque_circles <- create_plaque_circles(plaque_data)
# Prepare plotting data
if (show_non_target_cells) {
plot_data <- cell_info
# Set different transparency and size for different cell types
plot_data$alpha_val <- ifelse(!plot_data$is_target_celltype, 0.1,
ifelse(plot_data$distance_group == "Other", 0.3, 0.8))
plot_data$size_val <- ifelse(!plot_data$is_target_celltype, 0.3,
ifelse(plot_data$distance_group == "Other", 0.8, 1.5))
} else {
# Show target cell types
plot_data <- cell_info[cell_info$is_target_celltype, ]
plot_data$alpha_val <- ifelse(plot_data$distance_group == "Other", 0.5, 0.9)
plot_data$size_val <- ifelse(plot_data$distance_group == "Other", 1.0, 1.8)
}
# Get all groups in target cell type and set colors
target_cell_data <- cell_info[cell_info$is_target_celltype, ]
all_groups <- unique(target_cell_data$distance_group)
n_groups <- length(all_groups)
# Set colors for different groups
if(n_groups <= 8) {
group_colors <- brewer.pal(max(3, n_groups), "Set2")[1:n_groups]
} else {
group_colors <- rainbow(n_groups)
}
names(group_colors) <- all_groups
# Add gray for non-target cells if showing them
if (show_non_target_cells) {
group_colors["Non_Target"] <- "lightgray"
plot_data$display_group <- ifelse(plot_data$is_target_celltype,
plot_data$distance_group, "Non_Target")
} else {
plot_data$display_group <- plot_data$distance_group
}
# Statistics
group_stats <- target_cell_data %>%
group_by(distance_group) %>%
summarise(
count = n(),
mean_expr = mean(gene_expression, na.rm = TRUE),
median_expr = median(gene_expression, na.rm = TRUE),
mean_celltype_prop = mean(celltype_prop, na.rm = TRUE),
.groups = 'drop'
)
message("Target cell type group statistics:")
print(group_stats)
# Create main plot 1: Cell type proportion plot
p1 <- ggplot() +
# Plot cell points colored by cell type proportion
geom_point(data = plot_data,
aes(x = x, y = y, color = celltype_prop, size = display_group, alpha = display_group)) +
# Plot plaque boundaries
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = PLAQUE_COLOR, size = 1.2, alpha = 0.8) +
# Color and size settings
scale_color_viridis_c(option = "A", name = paste(celltype_name, "\nProportion")) +
scale_size_manual(values = setNames(plot_data$size_val[match(unique(plot_data$display_group), plot_data$display_group)],
unique(plot_data$display_group)), guide = "none") +
scale_alpha_manual(values = setNames(plot_data$alpha_val[match(unique(plot_data$display_group), plot_data$display_group)],
unique(plot_data$display_group)), guide = "none") +
# Theme settings
theme_void() +
theme(
plot.title = element_text(size = 26, hjust = 0.5),
text = element_text(size = 26),
legend.position = "right",
legend.title = element_text(size = 26),
legend.text = element_text(size = 26),
legend.key.size = unit(1.0, "cm")
) +
ggtitle(paste0(celltype_name, " Distribution")) +
coord_fixed()
# Create main plot 2: Distance group plot
target_plot_data <- plot_data[plot_data$is_target_celltype, ]
p2 <- ggplot() +
# Plot cell points colored by distance group
geom_point(data = target_plot_data,
aes(x = x, y = y, color = distance_group),
size = 1.5, alpha = 0.8) +
# Plot plaque boundaries
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = PLAQUE_COLOR, size = 1.2, alpha = 0.8) +
# Color settings
scale_color_manual(values = group_colors, name = "Distance Group") +
# Theme settings
theme_void() +
theme(
plot.title = element_text(size = 26, hjust = 0.5),
text = element_text(size = 26),
legend.position = "right",
legend.title = element_text(size = 26),
legend.text = element_text(size = 26),
legend.key.size = unit(1.0, "cm")
) +
ggtitle(paste0(celltype_name, " Distance Groups")) +
coord_fixed()
# Create spatial gene expression plot
p3 <- ggplot() +
# Plot cell points colored by gene expression
geom_point(data = target_plot_data,
aes(x = x, y = y, color = gene_expression, shape = distance_group),
size = 1.8, alpha = 0.8) +
# Plot plaque boundaries
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = PLAQUE_COLOR, size = 1.2, alpha = 0.8) +
# Color settings
scale_color_viridis_c(option = "D", name = paste(gene_name, "\nExpression")) +
scale_shape_manual(values = c(16, 17, 15, 18, 19, 8, 11, 12)[1:n_groups], name = "Distance Group") +
# Theme settings
theme_void() +
theme(
plot.title = element_text(size = 26, hjust = 0.5),
text = element_text(size = 26),
legend.position = "right",
legend.title = element_text(size = 26),
legend.text = element_text(size = 26),
legend.key.size = unit(0.8, "cm")
) +
ggtitle(paste0(gene_name, " in ", celltype_name)) +
coord_fixed()
# Create expression comparison boxplot
plot_data_for_box <- target_cell_data[target_cell_data$distance_group != "Other", ]
p4 <- ggplot(plot_data_for_box, aes(x = distance_group, y = gene_expression, fill = distance_group)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.6, size = 1.0) +
scale_fill_manual(values = group_colors) +
labs(x = "Distance Group", y = paste(gene_name, "Expression"),
title = paste(gene_name, "in", celltype_name)) +
theme_minimal() +
theme(
text = element_text(size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
axis.title.x = element_text(size = 26),
axis.title.y = element_text(size = 26),
axis.text.x = element_text(size = 26, angle = 0, hjust = 0.5),
axis.text.y = element_text(size = 26),
legend.position = "none"
) +
stat_compare_means(method = "kruskal.test", label = "p.format",
size = 5)
# Combine plots
combined_plot <- ggarrange(
ggarrange(p1, p2, ncol = 2, widths = c(1, 1)),
ggarrange(p3, p4, ncol = 2, widths = c(1, 1)),
nrow = 2, heights = c(1, 1)
)
# Save plots
if (!is.null(output_file)) {
ggsave(output_file, combined_plot,
width = width, height = height,
device = "pdf")
message("Combined plot saved to:", output_file)
if (save_individual) {
file_parts <- tools::file_path_sans_ext(output_file)
file_ext <- tools::file_ext(output_file)
if (file_ext == "") file_ext <- "pdf"
# Save individual subplots
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_celltype_distribution_raw.", file_ext), p1,
width = width * 0.5, height = height * 0.5, device = "pdf")
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_distance_groups_raw.", file_ext), p2,
width = width * 0.5, height = height * 0.5, device = "pdf")
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_expression_spatial_raw.", file_ext), p3,
width = width * 0.5, height = height * 0.5, device = "pdf")
ggsave(paste0(file_parts, "_", gsub("[^A-Za-z0-9]", "_", celltype_name), "_expression_comparison_raw.", file_ext), p4,
width = width * 0.5, height = height * 0.5, device = "pdf")
}
}
# Return results
return(list(
combined_plot = combined_plot,
celltype_distribution_plot = p1,
distance_groups_plot = p2,
expression_spatial_plot = p3,
expression_comparison_plot = p4,
cell_info = cell_info,
target_cell_stats = group_stats,
celltype_summary = list(
total_cells = nrow(cell_info),
target_cells = sum(cell_info$is_target_celltype),
target_percentage = round(sum(cell_info$is_target_celltype) / nrow(cell_info) * 100, 2)
)
))
}
# Call function
result_celltype_detailed <- plot_plaque_celltype_distance_expression(
# Picalm, Grn, Cfl1 ,Trem2
gene_name = "Grn",
celltype_name = cc,
celltype_threshold = 0.1,
distance_groups = list(c(0, 15), c(20, 40), c(50, 80), c(100, Inf)),
output_file = paste0("/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Grn_", cc, "_detailed_raw.pdf"),
save_individual = FALSE,
show_non_target_cells = FALSE,
width = 16,
height = 12
)
Analyzing Raw data - Cell type specific analysis
Analyzing gene:Trem2
Cell type:Microglia
Distance groups:0-15, 20-40, 50-80, >100
Cell type threshold: 0.1
Number of qualified cells: 1768
Total number of cells: 9244
Cell type proportion range: 0 - 0.933
Target cell type group statistics:
# A tibble: 5 × 5
distance_group count mean_expr median_expr mean_celltype_prop
<chr> <int> <dbl> <dbl> <dbl>
1 20-40um 131 1.21 1.28 0.475
2 50-80um 177 0.801 0 0.393
3 >100um 1292 0.241 0 0.299
4 Other 154 0.678 0 0.398
5 ≤15um 14 1.64 1.61 0.553
Combined plot saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Trem2_Microglia_detailed_raw.pdf
Analysis of the relationship between gene expression and distance to Aβ plaques
[ ]:
library(ggplot2)
library(scater)
library(ggpubr)
library(pdist)
library(viridis)
library(dplyr)
library(corrplot)
library(ggcorrplot)
# Data preprocessing
expr_use_scater_norm <- normalizeCounts(expr_use)
plaque <- read.csv("/media/data1/ai4bread_lab/lyfu/experiment/Celina/Data/plaque_13months-disease-replicate_1.csv")
# Gene expression heatmap based on cell type
plot_gene_expression_heatmap_celltype <- function(gene_name,
celltype_name = NULL,
celltype_threshold = 0.5,
grid_size = 80,
smooth_radius = 40,
output_file = NULL,
width = 10, height = 8) {
message("Creating expression heatmap for", gene_name, "in", ifelse(is.null(celltype_name), "all cells", celltype_name), "...")
# Check if gene exists
if (!gene_name %in% rownames(expr_use_scater_norm)) {
stop(paste("Gene", gene_name, "does not exist in expression matrix"))
}
# Filter cell types
if (!is.null(celltype_name)) {
if (!celltype_name %in% colnames(data_use$celltype_proportion)) {
available_types <- colnames(data_use$celltype_proportion)
stop(paste("Cell type", celltype_name, "does not exist. Available types:", paste(available_types, collapse = ", ")))
}
# Filter target cell types
celltype_prop <- data_use$celltype_proportion[, celltype_name]
target_cells <- which(celltype_prop >= celltype_threshold)
if (length(target_cells) == 0) {
stop(paste("No cells meet the threshold", celltype_threshold, "for", celltype_name))
}
message(paste("Using", length(target_cells), celltype_name, "cells (threshold:", celltype_threshold, ")"))
# Use only target cell type data
location_data <- data_use$location_use[target_cells, ]
gene_expr <- expr_use_scater_norm[gene_name, target_cells]
} else {
# Use all cells
location_data <- data_use$location_use
gene_expr <- expr_use_scater_norm[gene_name, ]
message("Using all", nrow(location_data), "cells")
}
# Create grid
x_range <- range(location_data[,1])
y_range <- range(location_data[,2])
x_seq <- seq(x_range[1], x_range[2], length.out = grid_size)
y_seq <- seq(y_range[1], y_range[2], length.out = grid_size)
grid_data <- expand.grid(x = x_seq, y = y_seq)
grid_data$expression <- NA
# Calculate weighted average expression for each grid point
message("Calculating grid expression values...")
for(i in 1:nrow(grid_data)) {
distances <- sqrt((location_data[,1] - grid_data$x[i])^2 +
(location_data[,2] - grid_data$y[i])^2)
weights <- exp(-distances^2 / (2 * smooth_radius^2))
weights <- weights / sum(weights)
grid_data$expression[i] <- sum(gene_expr * weights, na.rm = TRUE)
}
# Read plaque data
if (exists("plaque")) {
plaque_data <- plaque
} else {
stop("Please ensure plaque data is loaded")
}
# Create plaque circles
create_plaque_circles <- function(plaque_data, n_points = 100) {
circle_data <- data.frame()
for(i in 1:nrow(plaque_data)) {
theta <- seq(0, 2*pi, length.out = n_points)
x_circle <- plaque_data$m.cx[i] + plaque_data$s.radius.mean[i] * cos(theta)
y_circle <- plaque_data$m.cy[i] + plaque_data$s.radius.mean[i] * sin(theta)
temp_circle <- data.frame(
x = x_circle, y = y_circle, plaque_id = i,
radius = plaque_data$s.radius.mean[i]
)
circle_data <- rbind(circle_data, temp_circle)
}
return(circle_data)
}
plaque_circles <- create_plaque_circles(plaque_data)
# Create cell scatter layer
cell_layer <- NULL
if (!is.null(celltype_name)) {
cell_data <- data.frame(
x = location_data[,1],
y = location_data[,2],
expression = gene_expr
)
cell_layer <- geom_point(data = cell_data,
aes(x = x, y = y, color = expression),
alpha = 0.6, size = 0.8)
}
# Set title
title_text <- if (!is.null(celltype_name)) {
paste0(gene_name, " in ", celltype_name, " Cells")
} else {
paste0(gene_name, " Expression Heatmap")
}
# Plot heatmap
p <- ggplot() +
# Gene expression heatmap
geom_raster(data = grid_data, aes(x = x, y = y, fill = expression),
alpha = 0.75, interpolate = TRUE) +
scale_fill_viridis_c(option = "D", name = paste(gene_name, "\nExpression")) +
# Add cell scatter
cell_layer +
{if (!is.null(celltype_name)) scale_color_viridis_c(option = "D", guide = "none")} +
# Plaque boundaries
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = "white", size = 3, alpha = 0.9) +
geom_path(data = plaque_circles,
aes(x = x, y = y, group = plaque_id),
color = "red", size = 2, alpha = 0.8) +
# Theme settings
theme_void() +
theme(
plot.title = element_text(size = 26, hjust = 0.5),
text = element_text(size = 26),
legend.position = "right",
legend.text = element_text(size = 26),
legend.title = element_text(size = 26),
panel.background = element_rect(fill = "white")
) +
ggtitle(title_text) +
coord_fixed()
# Save
if (!is.null(output_file)) {
ggsave(output_file, p, width = width, height = height, device = "pdf")
message("Heatmap saved to:", output_file)
}
return(list(
plot = p,
grid_data = grid_data,
cell_count = nrow(location_data)
))
}
# Gene expression and distance correlation analysis based on cell type
analyze_expression_distance_correlation_celltype <- function(gene_list,
celltype_name = NULL,
celltype_threshold = 0.5,
output_file = NULL,
method = "spearman") {
celltype_text <- ifelse(is.null(celltype_name), "all cells", celltype_name)
message("Analyzing gene expression and plaque distance correlation in", celltype_text, "...")
# Read plaque data
if (exists("plaque")) {
plaque_data <- plaque
} else {
stop("Please ensure plaque data is loaded")
}
# Filter cell types
if (!is.null(celltype_name)) {
if (!celltype_name %in% colnames(data_use$celltype_proportion)) {
available_types <- colnames(data_use$celltype_proportion)
stop(paste("Cell type", celltype_name, "does not exist. Available types:", paste(available_types, collapse = ", ")))
}
celltype_prop <- data_use$celltype_proportion[, celltype_name]
target_cells <- which(celltype_prop >= celltype_threshold)
if (length(target_cells) == 0) {
stop(paste("No cells meet the threshold", celltype_threshold, "for", celltype_name))
}
message(paste("Using", length(target_cells), celltype_name, "cells"))
location_data <- data_use$location_use[target_cells, ]
expr_data <- expr_use_scater_norm[, target_cells]
} else {
location_data <- data_use$location_use
expr_data <- expr_use_scater_norm
message("Using all", nrow(location_data), "cells")
}
# Calculate distance from each cell to nearest plaque
calculate_distances <- function(location_data, plaque_data) {
distances <- apply(location_data, 1, function(cell_pos) {
min(apply(plaque_data[,1:2], 1, function(plaque_pos) {
sqrt(sum((cell_pos - plaque_pos)^2))
}))
})
return(distances)
}
distances <- calculate_distances(location_data, plaque_data)
# Check if genes exist
existing_genes <- gene_list[gene_list %in% rownames(expr_data)]
if (length(existing_genes) == 0) {
stop("No valid genes for analysis")
}
if (length(existing_genes) < length(gene_list)) {
missing_genes <- gene_list[!gene_list %in% rownames(expr_data)]
warning("The following genes do not exist:", paste(missing_genes, collapse = ", "))
}
# Create gene expression data frame
gene_expr_data <- data.frame(row.names = 1:length(distances))
for (gene in existing_genes) {
gene_expr_data[[gene]] <- expr_data[gene, ]
}
# Calculate inter-gene correlation matrix
gene_cor_matrix <- cor(gene_expr_data, method = method, use = "complete.obs")
# Calculate gene-distance correlations
distance_correlations <- sapply(existing_genes, function(gene) {
cor(distances, expr_data[gene, ], method = method, use = "complete.obs")
})
# Create gene-distance correlation results data frame
distance_cor_results <- data.frame(
gene = names(distance_correlations),
correlation = as.numeric(distance_correlations),
abs_correlation = abs(as.numeric(distance_correlations))
) %>%
arrange(desc(abs_correlation))
message("=== Gene-Plaque Distance Correlation Results in", celltype_text, "===")
print(distance_cor_results)
# Inter-gene correlation heatmap
p1 <- ggcorrplot(gene_cor_matrix,
method = "circle",
type = "full",
ggtheme = theme_minimal(),
title = paste("Gene-Gene Correlation in", celltype_text, "(", method, ")"),
lab = FALSE,
lab_size = 8) +
theme(
text = element_text( size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
legend.text = element_text(size = 26),
legend.title = element_text(size = 26),
axis.text.x = element_text(angle = 45, hjust = 1, size = 26),
axis.text.y = element_text(size = 26)
)
# Gene-distance correlation dot plot
p2 <- ggplot(distance_cor_results, aes(x = reorder(gene, correlation), y = correlation)) +
geom_col(aes(fill = correlation > 0), alpha = 0.8, width = 0.7) +
geom_point(size = 3, color = "black") +
scale_fill_manual(values = c("TRUE" = "coral", "FALSE" = "skyblue"),
labels = c("Positive", "Negative"), name = "Correlation") +
coord_flip() +
labs(x = "Gene", y = paste("Correlation with Distance (", method, ")"),
title = paste("Gene vs Distance in", celltype_text)) +
theme_minimal() +
theme(
text = element_text( size = 26),
legend.text = element_text(size = 26),
legend.title = element_text(size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
axis.text.x = element_text(size = 26),
axis.text.y = element_text(size = 26),
axis.title.x = element_text(size = 26),
axis.title.y = element_text(size = 26)
) +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.7, color = "gray50")
# Scatter plots: Top 4 most correlated genes vs distance
top_genes <- head(distance_cor_results$gene, 4)
scatter_plots <- list()
for (i in 1:length(top_genes)) {
gene <- top_genes[i]
correlation_val <- distance_cor_results$correlation[distance_cor_results$gene == gene]
plot_data <- data.frame(
distance = distances,
expression = expr_data[gene, ]
)
p_scatter <- ggplot(plot_data, aes(x = distance, y = expression)) +
geom_point(alpha = 0.6, size = 0.8, color = "steelblue") +
geom_smooth(method = "lm", color = "red", alpha = 0.8, se = TRUE) +
labs(x = "Distance to Plaque", y = paste(gene, "Expression"),
title = paste0(gene, " vs Distance\nr = ", round(correlation_val, 3))) +
theme_minimal() +
theme(
text = element_text( size = 26),
legend.text = element_text(size = 26),
legend.title = element_text(size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
axis.text.x = element_text(size = 26),
axis.text.y = element_text(size = 26),
axis.title.x = element_text(size = 26),
axis.title.y = element_text(size = 26)
)
scatter_plots[[i]] <- p_scatter
}
p3 <- ggarrange(plotlist = scatter_plots, ncol = 2, nrow = 2)
# Combine all plots
combined_plot <- ggarrange(
ggarrange(p1, p2, ncol = 2, widths = c(1.2, 1)),
p3,
nrow = 2, heights = c(1, 1)
)
# Save
if (!is.null(output_file)) {
ggsave(output_file, combined_plot,
width = 16, height = 12, device = "pdf")
message("Correlation analysis results saved to:", output_file)
}
return(list(
plot = combined_plot,
gene_correlation_matrix = gene_cor_matrix,
gene_distance_correlations = distance_cor_results,
gene_expression_data = gene_expr_data,
cell_count = nrow(location_data),
celltype_info = list(
celltype = celltype_name,
threshold = celltype_threshold,
cell_count = nrow(location_data)
)
))
}
# Gene network correlation analysis based on cell type
analyze_gene_network_celltype <- function(gene_list,
celltype_name = NULL,
celltype_threshold = 0.5,
correlation_threshold = 0.3,
output_file = NULL,
method = "spearman") {
celltype_text <- ifelse(is.null(celltype_name), "all cells", celltype_name)
message("Analyzing gene co-expression network in", celltype_text, "...")
# Filter cell types
if (!is.null(celltype_name)) {
if (!celltype_name %in% colnames(data_use$celltype_proportion)) {
available_types <- colnames(data_use$celltype_proportion)
stop(paste("Cell type", celltype_name, "does not exist. Available types:", paste(available_types, collapse = ", ")))
}
celltype_prop <- data_use$celltype_proportion[, celltype_name]
target_cells <- which(celltype_prop >= celltype_threshold)
if (length(target_cells) == 0) {
stop(paste("No cells meet the threshold", celltype_threshold, "for", celltype_name))
}
message(paste("Using", length(target_cells), celltype_name, "cells"))
expr_data <- expr_use_scater_norm[, target_cells]
} else {
expr_data <- expr_use_scater_norm
message("Using all", ncol(expr_data), "cells")
}
# Check if genes exist
existing_genes <- gene_list[gene_list %in% rownames(expr_data)]
if (length(existing_genes) < 2) {
stop("At least 2 valid genes are required for correlation analysis")
}
# Create gene expression matrix
gene_expr_matrix <- t(expr_data[existing_genes, ])
colnames(gene_expr_matrix) <- existing_genes
# Calculate correlation matrix
cor_matrix <- cor(gene_expr_matrix, method = method, use = "complete.obs")
# Extract strongly correlated gene pairs
strong_correlations <- data.frame()
for(i in 1:(nrow(cor_matrix)-1)) {
for(j in (i+1):ncol(cor_matrix)) {
cor_val <- cor_matrix[i, j]
if(abs(cor_val) >= correlation_threshold) {
strong_correlations <- rbind(strong_correlations, data.frame(
Gene1 = rownames(cor_matrix)[i],
Gene2 = colnames(cor_matrix)[j],
Correlation = cor_val,
Abs_Correlation = abs(cor_val)
))
}
}
}
if(nrow(strong_correlations) > 0) {
strong_correlations <- strong_correlations %>% arrange(desc(Abs_Correlation))
message("=== Strongly correlated gene pairs in", celltype_text, "(|r| >= ", correlation_threshold, ") ===")
print(strong_correlations)
} else {
message("No strongly correlated gene pairs found in", celltype_text, "(threshold:", correlation_threshold, ")")
}
# Correlation heatmap
p1 <- ggcorrplot(cor_matrix,
method = "circle",
type = "upper",
ggtheme = theme_minimal(),
title = paste("Gene Network in", celltype_text, "(", method, ")"),
lab = FALSE,
lab_size = 8,
show.diag = TRUE) +
theme(
text = element_text( size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
legend.text = element_text(size = 26),
legend.title = element_text(size = 26),
axis.text.x = element_text(angle = 45, hjust = 1, size = 26),
axis.text.y = element_text(size = 26)
)
# Correlation distribution histogram
cor_values <- cor_matrix[upper.tri(cor_matrix)]
p2 <- ggplot(data.frame(correlation = cor_values), aes(x = correlation)) +
geom_histogram(bins = 20, fill = "skyblue", color = "black", alpha = 0.7) +
geom_vline(xintercept = c(-correlation_threshold, correlation_threshold),
color = "red", linetype = "dashed", size = 1) +
labs(x = "Correlation Coefficient", y = "Frequency",
title = paste("Correlation Distribution in", celltype_text)) +
theme_minimal() +
theme(
text = element_text( size = 26),
legend.text = element_text(size = 26),
legend.title = element_text(size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
axis.text.x = element_text(size = 26),
axis.text.y = element_text(size = 26),
axis.title.x = element_text(size = 26),
axis.title.y = element_text(size = 26)
)
# Strong correlation gene pairs bar plot
if(nrow(strong_correlations) > 0) {
strong_correlations$Pair <- paste(strong_correlations$Gene1, "-", strong_correlations$Gene2)
p3 <- ggplot(strong_correlations, aes(x = reorder(Pair, Abs_Correlation),
y = Correlation, fill = Correlation > 0)) +
geom_col(alpha = 0.8) +
scale_fill_manual(values = c("TRUE" = "coral", "FALSE" = "lightblue"),
labels = c("Positive", "Negative"), name = "Correlation") +
coord_flip() +
labs(x = "Gene Pairs", y = "Correlation Coefficient",
title = paste0("Strong Correlations in ", celltype_text, " (|r| ≥ ", correlation_threshold, ")")) +
theme_minimal() +
theme(
text = element_text( size = 26),
legend.text = element_text(size = 26),
legend.title = element_text(size = 26),
plot.title = element_text(size = 26, hjust = 0.5),
axis.text.x = element_text(size = 23),
axis.text.y = element_text(size = 23),
axis.title.x = element_text(size = 26),
axis.title.y = element_text(size = 26)
) +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5)
} else {
p3 <- ggplot() +
annotate("text", x = 0.5, y = 0.5,
label = paste("No strong correlations found in", celltype_text, "\n(threshold:", correlation_threshold, ")"),
size = 10) +
theme_void() +
labs(title = paste("Strong Correlations in", celltype_text)) +
theme(plot.title = element_text(size = 26, hjust = 0.5))
}
# Combine plots
combined_plot <- ggarrange(
p1,
ggarrange(p2, p3, ncol = 2, widths = c(1, 1)),
nrow = 2, heights = c(1.2, 1)
)
# Save
if (!is.null(output_file)) {
ggsave(output_file, combined_plot,
width = 14, height = 10, device = "pdf")
message("Gene network analysis results saved to:", output_file)
}
return(list(
plot = combined_plot,
correlation_matrix = cor_matrix,
strong_correlations = strong_correlations,
correlation_distribution = cor_values,
celltype_info = list(
celltype = celltype_name,
threshold = celltype_threshold,
cell_count = ncol(gene_expr_matrix)
)
))
}
# Multi-gene heatmap comparison based on cell type
plot_multi_gene_heatmaps_celltype <- function(gene_list,
celltype_name = NULL,
celltype_threshold = 0.5,
grid_size = 60,
smooth_radius = 35,
output_file = NULL,
ncol = 2) {
celltype_text <- ifelse(is.null(celltype_name), "all cells", celltype_name)
message("Creating multi-gene heatmap comparison in", celltype_text, "...")
# Check genes
existing_genes <- gene_list[gene_list %in% rownames(expr_use_scater_norm)]
if (length(existing_genes) == 0) {
stop("No valid genes for analysis")
}
plot_list <- list()
for (gene in existing_genes) {
result <- plot_gene_expression_heatmap_celltype(
gene_name = gene,
celltype_name = celltype_name,
celltype_threshold = celltype_threshold,
grid_size = grid_size,
smooth_radius = smooth_radius
)
plot_list[[gene]] <- result$plot
}
# Combine plots
nrow <- ceiling(length(plot_list) / ncol)
combined_plot <- ggarrange(plotlist = plot_list, ncol = ncol, nrow = nrow)
# Save
if (!is.null(output_file)) {
ggsave(output_file, combined_plot,
width = 6 * ncol, height = 5 * nrow,
device = "pdf")
message("Multi-gene heatmap saved to:", output_file)
}
return(combined_plot)
}
# Comprehensive analysis function based on cell type
comprehensive_plaque_analysis_celltype <- function(gene_list,
celltype_name = NULL,
celltype_threshold = 0.5,
correlation_threshold = 0.3,
output_dir = "comprehensive_analysis") {
celltype_text <- ifelse(is.null(celltype_name), "all_cells", celltype_name)
analysis_dir <- file.path(output_dir, paste0("analysis_", celltype_text))
if (!dir.exists(analysis_dir)) {
dir.create(analysis_dir, recursive = TRUE)
}
message("=== Starting comprehensive plaque analysis for", ifelse(is.null(celltype_name), "all cells", celltype_name), "===")
# Generate heatmaps
message("1. Generating expression heatmaps...")
heatmap_result <- plot_multi_gene_heatmaps_celltype(
gene_list = gene_list,
celltype_name = celltype_name,
celltype_threshold = celltype_threshold,
output_file = file.path(analysis_dir, "expression_heatmaps.pdf")
)
# Correlation analysis
message("2. Performing gene-distance correlation analysis...")
correlation_result <- analyze_expression_distance_correlation_celltype(
gene_list = gene_list,
celltype_name = celltype_name,
celltype_threshold = celltype_threshold,
output_file = file.path(analysis_dir, "gene_distance_correlation.pdf")
)
# Gene network analysis
message("3. Performing gene co-expression network analysis...")
network_result <- analyze_gene_network_celltype(
gene_list = gene_list,
celltype_name = celltype_name,
celltype_threshold = celltype_threshold,
correlation_threshold = correlation_threshold,
output_file = file.path(analysis_dir, "gene_network_analysis.pdf")
)
# Save statistical results
write.csv(correlation_result$gene_distance_correlations,
file.path(analysis_dir, "gene_distance_correlations.csv"),
row.names = FALSE)
write.csv(correlation_result$gene_correlation_matrix,
file.path(analysis_dir, "gene_gene_correlation_matrix.csv"),
row.names = TRUE)
if(nrow(network_result$strong_correlations) > 0) {
write.csv(network_result$strong_correlations,
file.path(analysis_dir, "strong_gene_correlations.csv"),
row.names = FALSE)
}
# Save analysis summary
analysis_summary <- data.frame(
Analysis_Type = c("Cell_Type", "Threshold", "Total_Cells", "Genes_Analyzed", "Strong_Correlations"),
Value = c(
ifelse(is.null(celltype_name), "All_Cells", celltype_name),
celltype_threshold,
correlation_result$cell_count,
length(gene_list),
nrow(network_result$strong_correlations)
)
)
write.csv(analysis_summary,
file.path(analysis_dir, "analysis_summary.csv"),
row.names = FALSE)
message("=== Analysis completed ===")
message("Results saved to:", analysis_dir)
return(list(
heatmaps = heatmap_result,
distance_correlations = correlation_result,
gene_network = network_result,
analysis_info = analysis_summary
))
}
# Call functions
gene_list <- c('Picalm', 'Cfl1', 'Grn', 'Trem2')
cc <- "Microglia"
# Single gene heatmap (optional individual analysis)
heatmap_result <- plot_gene_expression_heatmap_celltype(
gene_name = "Grn",
celltype_name = cc,
celltype_threshold = 0.3,
grid_size = 80,
smooth_radius = 40,
output_file = paste0("/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Grn_", cc, "_heatmap.pdf")
)
# Gene-distance correlation analysis
correlation_result <- analyze_expression_distance_correlation_celltype(
gene_list = gene_list,
celltype_name = cc,
celltype_threshold = 0.1,
output_file = paste0("/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/", cc, "_gene_distance_correlations.pdf"),
method = "spearman"
)
# Multi-gene heatmap comparison (specific cell type)
multi_heatmap <- plot_multi_gene_heatmaps_celltype(
gene_list = gene_list,
celltype_name = cc,
celltype_threshold = 0.1,
output_file = paste0("/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/", cc, "_multi_gene_heatmaps.pdf"),
ncol = 3
)
# Comprehensive analysis
comprehensive_result <- comprehensive_plaque_analysis_celltype(
gene_list = gene_list,
celltype_name = cc,
celltype_threshold = 0.1,
correlation_threshold = 0.3,
output_dir = paste0("/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/", cc, "_comprehensive_analysis")
)
# Display results
print("=== Analysis completed ===")
print(paste("Cell type:", cc))
print(paste("Number of cells used:", correlation_result$cell_count))
print("Gene-distance correlations:")
print(correlation_result$gene_distance_correlations)
corrplot 0.95 loaded
Creating expression heatmap forGrninMicroglia...
Using 748 Microglia cells (threshold: 0.3 )
Calculating grid expression values...
Heatmap saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Grn_Microglia_heatmap.pdf
Analyzing gene expression and plaque distance correlation inMicroglia...
Using 1768 Microglia cells
=== Gene-Plaque Distance Correlation Results inMicroglia===
gene correlation abs_correlation
1 Trem2 -0.2954035 0.2954035
2 Grn -0.2932961 0.2932961
3 Picalm -0.2794015 0.2794015
4 Cfl1 0.1074841 0.1074841
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Correlation analysis results saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_gene_distance_correlations.pdf
Creating multi-gene heatmap comparison inMicroglia...
Creating expression heatmap forPicalminMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Creating expression heatmap forCfl1inMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Creating expression heatmap forGrninMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Creating expression heatmap forTrem2inMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Multi-gene heatmap saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_multi_gene_heatmaps.pdf
=== Starting comprehensive plaque analysis forMicroglia===
1. Generating expression heatmaps...
Creating multi-gene heatmap comparison inMicroglia...
Creating expression heatmap forPicalminMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Creating expression heatmap forCfl1inMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Creating expression heatmap forGrninMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Creating expression heatmap forTrem2inMicroglia...
Using 1768 Microglia cells (threshold: 0.1 )
Calculating grid expression values...
Multi-gene heatmap saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_comprehensive_analysis/analysis_Microglia/expression_heatmaps.pdf
2. Performing gene-distance correlation analysis...
Analyzing gene expression and plaque distance correlation inMicroglia...
Using 1768 Microglia cells
=== Gene-Plaque Distance Correlation Results inMicroglia===
gene correlation abs_correlation
1 Trem2 -0.2954035 0.2954035
2 Grn -0.2932961 0.2932961
3 Picalm -0.2794015 0.2794015
4 Cfl1 0.1074841 0.1074841
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Correlation analysis results saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_comprehensive_analysis/analysis_Microglia/gene_distance_correlation.pdf
3. Performing gene co-expression network analysis...
Analyzing gene co-expression network inMicroglia...
Using 1768 Microglia cells
=== Strongly correlated gene pairs inMicroglia(|r| >= 0.3) ===
Gene1 Gene2 Correlation Abs_Correlation
1 Grn Trem2 0.9997741 0.9997741
2 Picalm Grn 0.9580008 0.9580008
3 Picalm Trem2 0.9540979 0.9540979
4 Picalm Cfl1 -0.4255421 0.4255421
5 Cfl1 Grn -0.3559943 0.3559943
6 Cfl1 Trem2 -0.3544326 0.3544326
Gene network analysis results saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_comprehensive_analysis/analysis_Microglia/gene_network_analysis.pdf
=== Analysis completed ===
Results saved to:/media/data1/ai4bread_lab/wkcui/experiments/Celina/Data/hip_results/Microglia_comprehensive_analysis/analysis_Microglia
[1] "=== Analysis completed ==="
[1] "Cell type: Microglia"
[1] "Number of cells used: 1768"
[1] "Gene-distance correlations:"
gene correlation abs_correlation
1 Trem2 -0.2954035 0.2954035
2 Grn -0.2932961 0.2932961
3 Picalm -0.2794015 0.2794015
4 Cfl1 0.1074841 0.1074841