Step 12: Heatmap & Volcano — R Bioinformatics Tutorial

工具選擇

一、Heatmap 套件比較

套件	優點	缺點
`pheatmap`	預設漂亮、語法簡單	客製彈性有限
`ComplexHeatmap`	極度彈性	API 較複雜
`ggplot2 + geom_tile`	與 ggplot 一致	不自動聚類
`heatmaply`	互動式	不適合靜態圖

💡

選擇法：第一張原型 → pheatmap。要登 Nature → ComplexHeatmap。要互動探索 → heatmaply。要塞進 Shiny dashboard → plotly + geom_tile。 Picking: first draft → pheatmap. Submit to Nature → ComplexHeatmap. Interactive exploration → heatmaply. Shiny dashboard → plotly + geom_tile.

pheatmap

二、pheatmap：最快速的熱圖

# install.packages('pheatmap')
library(pheatmap)

# 1) 基本：基因 × 樣本 矩陣 / Gene × sample matrix
set.seed(7)
mat <- matrix(rnorm(200), nrow = 20,
              dimnames = list(paste0('gene', 1:20),
                              paste0('S', 1:10)))

pheatmap(mat)

# 2) Z-score 縮放（每基因標準化）── 熱圖必備！
pheatmap(mat, scale = 'row',
         color = colorRampPalette(c('navy','white','firebrick'))(50))

# 3) 加樣本/基因註解 / Add column & row annotations
ann_col <- data.frame(condition = rep(c('ctrl','trt'), each = 5),
                      batch     = rep(c('A','B'), 5),
                      row.names = paste0('S', 1:10))
ann_row <- data.frame(pathway = sample(c('Cell cycle','Apoptosis','Immune'), 20, TRUE),
                      row.names = paste0('gene', 1:20))
ann_colors <- list(condition = c(ctrl = 'steelblue', trt = 'firebrick'),
                   batch     = c(A = '#fdd', B = '#dfd'))

pheatmap(mat, scale = 'row',
         annotation_col   = ann_col,
         annotation_row   = ann_row,
         annotation_colors = ann_colors,
         show_rownames    = FALSE,
         cluster_cols     = TRUE,
         cluster_rows     = TRUE,
         clustering_distance_rows = 'correlation',
         clustering_method = 'average',
         cutree_cols      = 2,
         cutree_rows      = 3,
         border_color     = NA,
         fontsize         = 9,
         filename         = 'results/heatmap.pdf',  # 直接存 PDF
         width = 7, height = 8)

ComplexHeatmap

三、ComplexHeatmap：出版品級終極武器

# BiocManager::install('ComplexHeatmap')
library(ComplexHeatmap); library(circlize)

set.seed(7)
mat <- matrix(rnorm(200), nrow = 20,
              dimnames = list(paste0('gene', 1:20),
                              paste0('S', 1:10)))
mat_z <- t(scale(t(mat)))    # row z-score (每基因標準化)

# 1) 顏色函式 / Color function
col_fun <- colorRamp2(c(-2, 0, 2), c('#1f4e8c','white','#c0392b'))

# 2) 樣本註解 (top annotation)
ann_top <- HeatmapAnnotation(
  condition = rep(c('ctrl','trt'), each = 5),
  batch     = rep(c('A','B'), 5),
  age       = anno_barplot(rnorm(10, 60, 10)),
  col = list(condition = c(ctrl = 'steelblue', trt = 'firebrick'),
             batch     = c(A = '#fdd', B = '#dfd'))
)

# 3) 主圖 / Main heatmap
ht <- Heatmap(mat_z,
              name             = 'z-score',
              col              = col_fun,
              top_annotation   = ann_top,
              cluster_rows     = TRUE,
              cluster_columns  = TRUE,
              show_row_names   = FALSE,
              column_split     = rep(c('ctrl','trt'), each = 5),
              row_km           = 3,                    # k-means 分 row 群
              column_title     = 'DEGs heatmap',
              row_dend_width   = unit(15, 'mm'),
              heatmap_legend_param = list(title_position = 'topcenter'))
draw(ht)

# 4) 多 heatmap 並列 / Multi-heatmap
# ht1 + ht2          # 並列 (同一 row)
# ht1 %v% ht2        # 上下疊 (同一 column)

# 5) 存圖 / Save
# pdf('results/heatmap.pdf', width = 8, height = 8); draw(ht); dev.off()

# 6) Oncoprint (突變視覺化)
# alterations <- read_tsv('mutations.tsv')      # gene × sample 突變類型
# oncoPrint(alterations,
#           alter_fun = ...,
#           col = c(MUT='black', AMP='red', DEL='blue'))

火山圖

四、EnhancedVolcano：最常用火山圖

# BiocManager::install('EnhancedVolcano')
library(EnhancedVolcano)

# 假設 res 是 DESeq2 results / Assume res is from DESeq2
# res <- results(dds)

# 模擬資料 / Simulated
set.seed(7)
res <- data.frame(
  gene_symbol = paste0('G', 1:1000),
  log2FoldChange = rnorm(1000, sd = 1.2),
  pvalue         = runif(1000)
)
res$padj <- p.adjust(res$pvalue, 'BH')

EnhancedVolcano(res,
  lab           = res$gene_symbol,
  x             = 'log2FoldChange',
  y             = 'padj',
  pCutoff       = 0.05,
  FCcutoff      = 1,
  pointSize     = 1.5,
  labSize       = 3.5,
  col           = c('grey60','steelblue','goldenrod','firebrick'),
  legendLabels  = c('NS','log2FC','padj','padj & log2FC'),
  legendPosition = 'right',
  drawConnectors = TRUE,
  widthConnectors = 0.4,
  title         = 'Treated vs Control',
  subtitle      = 'DESeq2 results',
  caption       = paste0('Total = ', nrow(res), ' genes'))

# 自製 ggplot 版（更彈性）/ Custom ggplot version
library(ggplot2); library(ggrepel); library(dplyr)
res$sig <- with(res, ifelse(padj < 0.05 & abs(log2FoldChange) > 1,
                             ifelse(log2FoldChange > 0, 'Up', 'Down'), 'NS'))
top_lab <- res |> filter(sig != 'NS') |> arrange(padj) |> head(15)

ggplot(res, aes(log2FoldChange, -log10(padj), color = sig)) +
  geom_point(alpha = 0.7, size = 1.5) +
  scale_color_manual(values = c(Up = 'firebrick', Down = 'steelblue', NS = 'grey70')) +
  geom_vline(xintercept = c(-1, 1), linetype = 'dashed', color = 'grey50') +
  geom_hline(yintercept = -log10(0.05), linetype = 'dashed', color = 'grey50') +
  geom_text_repel(data = top_lab, aes(label = gene_symbol), size = 3, max.overlaps = 20) +
  labs(x = 'log2 fold change', y = '-log10 adjusted p',
       color = NULL, title = 'Volcano plot') +
  theme_classic(base_size = 12)

其他常用圖

五、MA plot、PCA、其他常見圖

# MA plot — log mean expression vs log fold-change
# library(DESeq2)
# plotMA(dds, ylim = c(-5, 5))         # base plot
# plotMA(res_shrunk, ylim = c(-5, 5))  # 推薦：用 shrunken LFC

# Custom MA in ggplot
library(ggplot2)
res$mean <- rowMeans(matrix(rnorm(2000, mean=10), ncol=2))
ggplot(res, aes(mean, log2FoldChange, color = padj < 0.05)) +
  geom_point(alpha = 0.5, size = 1) +
  scale_color_manual(values = c('grey70','firebrick')) +
  geom_hline(yintercept = 0) +
  scale_x_log10() +
  labs(x = 'Mean expression', y = 'log2 fold change') +
  theme_classic()

# PCA — DESeq2 內建
# vsd <- vst(dds)
# plotPCA(vsd, intgroup = c('condition','batch'))

# 自製 PCA / Custom PCA
m <- matrix(rnorm(500), nrow = 50)   # 50 genes × 10 samples
rownames(m) <- paste0('g', 1:50); colnames(m) <- paste0('S', 1:10)
pca <- prcomp(t(m), scale. = TRUE)
pca_df <- as.data.frame(pca$x[, 1:2])
pca_df$condition <- rep(c('ctrl','trt'), each = 5)
var_pct <- round(100 * (pca$sdev^2) / sum(pca$sdev^2), 1)

ggplot(pca_df, aes(PC1, PC2, color = condition)) +
  geom_point(size = 3) +
  labs(x = paste0('PC1 (', var_pct[1], '%)'),
       y = paste0('PC2 (', var_pct[2], '%)')) +
  theme_classic()

# Dotplot (用於 clusterProfiler / 多基因表現比較)
# enrichplot::dotplot(ego, showCategory = 15)

# Boxplot/violin of selected genes
# boxplot(expr['TP53', ] ~ sample_info$condition)

調色盤

六、出版品配色建議

情境	推薦調色盤
連續+方向 (logFC、z-score)	`RdBu` / `BrBG` / `RdYlBu` (diverging，0 設白)
連續+單向 (表現量、密度)	`viridis` / `magma` / `Blues` / `YlOrRd`
離散 ≤ 8 類 (細胞類型)	`RColorBrewer::Set2` / `Dark2` / `Paired`
離散 > 8 類	`ggsci` 各 journal 配色 / `scales::hue_pal`
色盲友善	`viridis`, `Set2`, `colorblind_pal()`

# 常用配色組合 / Common palette snippets
library(RColorBrewer); library(viridis); library(circlize)

# diverging（heatmap z-score 用）
colorRampPalette(c('#1f4e8c','white','#c0392b'))(50)
colorRampPalette(rev(brewer.pal(11, 'RdBu')))(100)

# ComplexHeatmap 的 colorRamp2（連續 → 顏色）
colorRamp2(c(-2, 0, 2), c('navy','white','firebrick'))

# viridis 適合連續單向
viridis(50)
viridis(50, option = 'magma')

# 離散
brewer.pal(8, 'Set2')
brewer.pal(12, 'Paired')

📝 自我檢測

1. 畫 heatmap 為什麼要先 row z-score？

1. Why row-z-score before heatmap?

A. 不需要，直接畫 raw counts 即可A. Not necessary; raw counts work

B. 為了讓圖比較好看B. Just looks prettier

C. 不同基因表現範圍差很大；z-score 後每基因相對於自己平均，才能一起視覺化C. Genes have wildly different ranges; z-scoring puts each gene on its own scale

D. heatmap 要求整數D. Heatmaps require integers

2. 想做「20,000 個基因的火山圖、自動標出 top 10 顯著基因避免重疊」最方便的選擇？

2. Easy way to volcano-plot 20,000 genes and label top 10 without overlap?

A. 自己寫 base R plot()A. Hand-roll with base plot()

B. ggplot + geom_text()B. ggplot + geom_text()

C. EnhancedVolcano() 或 ggplot + ggrepel::geom_text_repel()C. EnhancedVolcano() or ggplot + ggrepel::geom_text_repel()

D. ExcelD. Excel

3. 想呈現 logFC 連續變數（負紅、零白、正藍），最佳調色盤類型？

3. Best palette type for continuous logFC (negative red, 0 white, positive blue)?

A. Diverging (如 RdBu)A. Diverging (e.g. RdBu)

B. Sequential (如 Blues)B. Sequential (e.g. Blues)

C. Qualitative (如 Set2)C. Qualitative (e.g. Set2)

D. 灰階D. Grayscale