Jankowski RNA-seq Human Skeletal Muscle Specimens

Analysis Goals

  • Find genes that are correlated to patient serum IL-6 level concentration
  • Given KEGG pathway of interest, color code correlated genes on the KEGG pathway figure

Experimental Design

Human skeletal muscle specimens were collected from 6 patients with increasing level of serum IL-6

library(knitr)
IL6.level = c(1.05, 2.51, 1.02, 1.31, 0.77, 0.97)
cat('The 6 patient serum levels: ', IL6.level, '\n')
## The 6 patient serum levels:  1.05 2.51 1.02 1.31 0.77 0.97

Build an trascript annotaiton object from UCSC by using “gene” to define each entry:

library(rtracklayer)
library(GenomicRanges)
library(Rsamtools)
library(GenomicFeatures)
txdb2 = makeTranscriptDbFromUCSC(genome = 'hg19', tablename='knownGene')
saveDb(txdb2, file = 'UCSC_hg19_Transcript.sqlite')
eByg2 = exonsBy(txdb2, by='gene')

Import each of the 6 RNA-seq sample in BAM format and overlap with the transcript annotation to build a table that show number of reads gene hit

bam.file.list = c(
    '18032_ATCACG_sorted.bam',
    '18049_CGATGT_sorted.bam',
    '18108_TTAGGC_sorted.bam',
    '18114_TGACCA_sorted.bam',
    '18117_ACAGTG_sorted.bam',
    '18131_CAGATC_sorted.bam'
)

countDF2 = data.frame(row.names=names(eByg2))
for(i in 1:length(bam.file.list)){
  cat('Read and map ', bam.file.list[i], ' now ...')
  
  ptm = proc.time()
  
  aligns = readGAlignmentsFromBam(bam.file.list[i])
  counts = countOverlaps(eByg2, aligns, ignore.strand = T)
  countDF2 = cbind(countDF2, counts)
  
  time.ellapsed = proc.time() - ptm
  cat('Ellapsed time is ' , time.ellapsed[3]/60, ' minutes\n')
  cat('-----------------------------------------\n')
  
}
colnames(countDF2) = bam.file.list
## Object to store gene-based read count table
countDF.Entrez = countDF2
save(countDF.Entrez, file = 'countDF.Entrez.Rdata')

Let’s take a look at the count table (countDF.Entrez)

load('countDF.Entrez.Rdata')
head(countDF.Entrez)
##           18032_ATCACG_sorted.bam 18049_CGATGT_sorted.bam
## 1                              10                       9
## 10                              1                       0
## 100                            43                      35
## 1000                            6                       5
## 10000                         165                     217
## 100008586                       0                       0
##           18108_TTAGGC_sorted.bam 18114_TGACCA_sorted.bam
## 1                               8                       9
## 10                              2                       2
## 100                            67                      58
## 1000                            9                      10
## 10000                         213                     309
## 100008586                       0                       0
##           18117_ACAGTG_sorted.bam 18131_CAGATC_sorted.bam
## 1                               8                       4
## 10                              0                       0
## 100                            65                      24
## 1000                            8                      10
## 10000                         260                     166
## 100008586                       0                       0

Output the count table

library(annotate)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist, unsplit
## 
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Loading required package: GenomeInfoDb
## Loading required package: S4Vectors
## Creating a generic function for 'nchar' from package 'base' in package 'S4Vectors'
## Loading required package: IRanges
## Loading required package: XML
library('org.Hs.eg.db')
## Loading required package: DBI
table.Symbol = unlist(lookUp(rownames(countDF.Entrez), 'org.Hs.eg.db', 'SYMBOL'))
table.GeneName = unlist(lookUp(rownames(countDF.Entrez), 'org.Hs.eg.db', 'GENENAME'))

count.table.out = cbind(table.Symbol, table.GeneName, countDF.Entrez)
write.table(count.table.out, file = 'Jankowski.full.count.table.txt', quote = F, sep = '\t', row.names = F)

Compute the correlation of all genes to IL-6 serum level and output into a text file

all.rank.cor = all.rank.pValue.cor = rep(0, dim(countDF.Entrez)[1])

for(i in 1:length(all.rank.cor)){
  tmp.cor = unlist(cor.test(rank(IL6.level), rank(as.numeric(countDF.Entrez[i,]))))
  if(!is.na(tmp.cor[1])){
      all.rank.pValue.cor[i] = tmp.cor[3]
      all.rank.cor[i] = tmp.cor[4]
  }
}

all.rank.cor = as.numeric(all.rank.cor)
names(all.rank.cor) = rownames(countDF.Entrez)

all.rank.pValue.cor = as.numeric(all.rank.pValue.cor)
names(all.rank.pValue.cor) = rownames(countDF.Entrez)

suppressMessages(library(annotate))
suppressMessages(library(org.Hs.eg.db))
all.geneSymbol = lookUp(rownames(countDF.Entrez), 'org.Hs.eg.db', 'SYMBOL')
all.geneName = lookUp(rownames(countDF.Entrez), 'org.Hs.eg.db', 'GENENAME')

cor.output = cbind(all.geneSymbol, all.geneName, all.rank.cor, all.rank.pValue.cor)

write.table(cor.output, file = 'All.cor.value.txt', quote = F, sep = '\t', row.names=F)

Create KEGG pathway figues

suppressMessages(library(pathview))
library('org.Hs.eg.db')
kegg.id = c('04630','04920','04066','04068','00190')
kegg.name = c('JAK-STAT','Adipocytokine','HIF-1','FOXO','Oxidative_Phosphorylation')

for(i in 1:length(kegg.id)){
  pv.out <- pathview(gene.data = all.rank.cor, pathway.id = kegg.id[i], species= "hsa", out.suffix = paste(kegg.name[i], 'native', sep = '-'), kegg.native = T)
  pv.out <- pathview(gene.data = all.rank.cor, pathway.id = kegg.id[i], species = "hsa", out.suffix = paste(kegg.name[i], 'denative', sep = '-'), kegg.native = T, same.layer=F)
  pv.out <- pathview(gene.data = all.rank.cor, pathway.id = kegg.id[i], species = "hsa", out.suffix = paste(kegg.name[i], 'graphviz', sep = '-'), kegg.native = F)
  write.table(pv.out$plot.data.gene, file = paste('hsa',kegg.id[i],'plot.data',kegg.name[i],'txt',sep = '.'), row.names=F, quote=F, sep='\t')
}

Let’s take a look at JAK-STAT example for all 3 plots:

This is the KEGG native plot alt text

This is KEGG plot with gene names layer, therefore not using KEGG originate naming scheme alt text

This is pathway plot in GraphViz format, including relationship keys

Here are some session information:

sessionInfo()
## R version 3.2.1 (2015-06-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pathview_1.8.0       KEGGgraph_1.26.0     graph_1.46.0        
##  [4] org.Hs.eg.db_3.1.2   RSQLite_1.0.0        DBI_0.3.1           
##  [7] annotate_1.46.1      XML_3.98-1.3         AnnotationDbi_1.30.1
## [10] GenomeInfoDb_1.4.2   IRanges_2.2.7        S4Vectors_0.6.3     
## [13] Biobase_2.28.0       BiocGenerics_0.14.0  knitr_1.11          
## 
## loaded via a namespace (and not attached):
##  [1] XVector_0.8.0     magrittr_1.5      zlibbioc_1.14.0  
##  [4] xtable_1.7-4      R6_2.1.0          httr_1.0.0       
##  [7] stringr_1.0.0     tools_3.2.1       grid_3.2.1       
## [10] png_0.1-7         htmltools_0.2.6   digest_0.6.8     
## [13] Rgraphviz_2.12.0  formatR_1.2       KEGGREST_1.8.0   
## [16] evaluate_0.7.2    rmarkdown_0.7     stringi_0.5-5    
## [19] Biostrings_2.36.3