工具選擇
一、Heatmap 套件比較
| 套件 | 優點 | 缺點 |
|---|---|---|
pheatmap | 預設漂亮、語法簡單 | 客製彈性有限 |
ComplexHeatmap | 極度彈性 | API 較複雜 |
ggplot2 + geom_tile | 與 ggplot 一致 | 不自動聚類 |
heatmaply | 互動式 | 不適合靜態圖 |
選擇法:第一張原型 →
pheatmap。要登 Nature → ComplexHeatmap。要互動探索 → heatmaply。要塞進 Shiny dashboard → plotly + geom_tile。
Picking: first draft → pheatmap. Submit to Nature → ComplexHeatmap. Interactive exploration → heatmaply. Shiny dashboard → plotly + geom_tile.
pheatmap
二、pheatmap:最快速的熱圖
# install.packages('pheatmap')
library(pheatmap)
# 1) 基本:基因 × 樣本 矩陣 / Gene × sample matrix
set.seed(7)
mat <- matrix(rnorm(200), nrow = 20,
dimnames = list(paste0('gene', 1:20),
paste0('S', 1:10)))
pheatmap(mat)
# 2) Z-score 縮放(每基因標準化)── 熱圖必備!
pheatmap(mat, scale = 'row',
color = colorRampPalette(c('navy','white','firebrick'))(50))
# 3) 加樣本/基因註解 / Add column & row annotations
ann_col <- data.frame(condition = rep(c('ctrl','trt'), each = 5),
batch = rep(c('A','B'), 5),
row.names = paste0('S', 1:10))
ann_row <- data.frame(pathway = sample(c('Cell cycle','Apoptosis','Immune'), 20, TRUE),
row.names = paste0('gene', 1:20))
ann_colors <- list(condition = c(ctrl = 'steelblue', trt = 'firebrick'),
batch = c(A = '#fdd', B = '#dfd'))
pheatmap(mat, scale = 'row',
annotation_col = ann_col,
annotation_row = ann_row,
annotation_colors = ann_colors,
show_rownames = FALSE,
cluster_cols = TRUE,
cluster_rows = TRUE,
clustering_distance_rows = 'correlation',
clustering_method = 'average',
cutree_cols = 2,
cutree_rows = 3,
border_color = NA,
fontsize = 9,
filename = 'results/heatmap.pdf', # 直接存 PDF
width = 7, height = 8)
ComplexHeatmap
三、ComplexHeatmap:出版品級終極武器
# BiocManager::install('ComplexHeatmap')
library(ComplexHeatmap); library(circlize)
set.seed(7)
mat <- matrix(rnorm(200), nrow = 20,
dimnames = list(paste0('gene', 1:20),
paste0('S', 1:10)))
mat_z <- t(scale(t(mat))) # row z-score (每基因標準化)
# 1) 顏色函式 / Color function
col_fun <- colorRamp2(c(-2, 0, 2), c('#1f4e8c','white','#c0392b'))
# 2) 樣本註解 (top annotation)
ann_top <- HeatmapAnnotation(
condition = rep(c('ctrl','trt'), each = 5),
batch = rep(c('A','B'), 5),
age = anno_barplot(rnorm(10, 60, 10)),
col = list(condition = c(ctrl = 'steelblue', trt = 'firebrick'),
batch = c(A = '#fdd', B = '#dfd'))
)
# 3) 主圖 / Main heatmap
ht <- Heatmap(mat_z,
name = 'z-score',
col = col_fun,
top_annotation = ann_top,
cluster_rows = TRUE,
cluster_columns = TRUE,
show_row_names = FALSE,
column_split = rep(c('ctrl','trt'), each = 5),
row_km = 3, # k-means 分 row 群
column_title = 'DEGs heatmap',
row_dend_width = unit(15, 'mm'),
heatmap_legend_param = list(title_position = 'topcenter'))
draw(ht)
# 4) 多 heatmap 並列 / Multi-heatmap
# ht1 + ht2 # 並列 (同一 row)
# ht1 %v% ht2 # 上下疊 (同一 column)
# 5) 存圖 / Save
# pdf('results/heatmap.pdf', width = 8, height = 8); draw(ht); dev.off()
# 6) Oncoprint (突變視覺化)
# alterations <- read_tsv('mutations.tsv') # gene × sample 突變類型
# oncoPrint(alterations,
# alter_fun = ...,
# col = c(MUT='black', AMP='red', DEL='blue'))
火山圖
四、EnhancedVolcano:最常用火山圖
# BiocManager::install('EnhancedVolcano')
library(EnhancedVolcano)
# 假設 res 是 DESeq2 results / Assume res is from DESeq2
# res <- results(dds)
# 模擬資料 / Simulated
set.seed(7)
res <- data.frame(
gene_symbol = paste0('G', 1:1000),
log2FoldChange = rnorm(1000, sd = 1.2),
pvalue = runif(1000)
)
res$padj <- p.adjust(res$pvalue, 'BH')
EnhancedVolcano(res,
lab = res$gene_symbol,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 1,
pointSize = 1.5,
labSize = 3.5,
col = c('grey60','steelblue','goldenrod','firebrick'),
legendLabels = c('NS','log2FC','padj','padj & log2FC'),
legendPosition = 'right',
drawConnectors = TRUE,
widthConnectors = 0.4,
title = 'Treated vs Control',
subtitle = 'DESeq2 results',
caption = paste0('Total = ', nrow(res), ' genes'))
# 自製 ggplot 版(更彈性)/ Custom ggplot version
library(ggplot2); library(ggrepel); library(dplyr)
res$sig <- with(res, ifelse(padj < 0.05 & abs(log2FoldChange) > 1,
ifelse(log2FoldChange > 0, 'Up', 'Down'), 'NS'))
top_lab <- res |> filter(sig != 'NS') |> arrange(padj) |> head(15)
ggplot(res, aes(log2FoldChange, -log10(padj), color = sig)) +
geom_point(alpha = 0.7, size = 1.5) +
scale_color_manual(values = c(Up = 'firebrick', Down = 'steelblue', NS = 'grey70')) +
geom_vline(xintercept = c(-1, 1), linetype = 'dashed', color = 'grey50') +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed', color = 'grey50') +
geom_text_repel(data = top_lab, aes(label = gene_symbol), size = 3, max.overlaps = 20) +
labs(x = 'log2 fold change', y = '-log10 adjusted p',
color = NULL, title = 'Volcano plot') +
theme_classic(base_size = 12)
其他常用圖
五、MA plot、PCA、其他常見圖
# MA plot — log mean expression vs log fold-change
# library(DESeq2)
# plotMA(dds, ylim = c(-5, 5)) # base plot
# plotMA(res_shrunk, ylim = c(-5, 5)) # 推薦:用 shrunken LFC
# Custom MA in ggplot
library(ggplot2)
res$mean <- rowMeans(matrix(rnorm(2000, mean=10), ncol=2))
ggplot(res, aes(mean, log2FoldChange, color = padj < 0.05)) +
geom_point(alpha = 0.5, size = 1) +
scale_color_manual(values = c('grey70','firebrick')) +
geom_hline(yintercept = 0) +
scale_x_log10() +
labs(x = 'Mean expression', y = 'log2 fold change') +
theme_classic()
# PCA — DESeq2 內建
# vsd <- vst(dds)
# plotPCA(vsd, intgroup = c('condition','batch'))
# 自製 PCA / Custom PCA
m <- matrix(rnorm(500), nrow = 50) # 50 genes × 10 samples
rownames(m) <- paste0('g', 1:50); colnames(m) <- paste0('S', 1:10)
pca <- prcomp(t(m), scale. = TRUE)
pca_df <- as.data.frame(pca$x[, 1:2])
pca_df$condition <- rep(c('ctrl','trt'), each = 5)
var_pct <- round(100 * (pca$sdev^2) / sum(pca$sdev^2), 1)
ggplot(pca_df, aes(PC1, PC2, color = condition)) +
geom_point(size = 3) +
labs(x = paste0('PC1 (', var_pct[1], '%)'),
y = paste0('PC2 (', var_pct[2], '%)')) +
theme_classic()
# Dotplot (用於 clusterProfiler / 多基因表現比較)
# enrichplot::dotplot(ego, showCategory = 15)
# Boxplot/violin of selected genes
# boxplot(expr['TP53', ] ~ sample_info$condition)
調色盤
六、出版品配色建議
| 情境 | 推薦調色盤 |
|---|---|
| 連續+方向 (logFC、z-score) | RdBu / BrBG / RdYlBu (diverging,0 設白) |
| 連續+單向 (表現量、密度) | viridis / magma / Blues / YlOrRd |
| 離散 ≤ 8 類 (細胞類型) | RColorBrewer::Set2 / Dark2 / Paired |
| 離散 > 8 類 | ggsci 各 journal 配色 / scales::hue_pal |
| 色盲友善 | viridis, Set2, colorblind_pal() |
# 常用配色組合 / Common palette snippets
library(RColorBrewer); library(viridis); library(circlize)
# diverging(heatmap z-score 用)
colorRampPalette(c('#1f4e8c','white','#c0392b'))(50)
colorRampPalette(rev(brewer.pal(11, 'RdBu')))(100)
# ComplexHeatmap 的 colorRamp2(連續 → 顏色)
colorRamp2(c(-2, 0, 2), c('navy','white','firebrick'))
# viridis 適合連續單向
viridis(50)
viridis(50, option = 'magma')
# 離散
brewer.pal(8, 'Set2')
brewer.pal(12, 'Paired')
📝 自我檢測
1. 畫 heatmap 為什麼要先 row z-score?
1. Why row-z-score before heatmap?
2. 想做「20,000 個基因的火山圖、自動標出 top 10 顯著基因避免重疊」最方便的選擇?
2. Easy way to volcano-plot 20,000 genes and label top 10 without overlap?
3. 想呈現 logFC 連續變數(負紅、零白、正藍),最佳調色盤類型?
3. Best palette type for continuous logFC (negative red, 0 white, positive blue)?