-
Set up annotation by converting FlyBase annotation to TxDb object (GenomicFeatures package version 1.36.4) and extract genic sequences using the BSgenome package (version 1.52).
txdb <- makeTxDbFromGFF("dmel-all-r6.17.gtf", format="gtf")
seqlevelsStyle(txdb) <- "UCSC"
seqlevels(txdb) <- gsub("mitochondrion_genome","chrM", seqlevels(txdb))
chromosomes <- c("chrX","chr2L","chr2R","chr3L","chr3R","chr4","chrM","chrY")
ranges_genes <- genes(txdb)
ranges_genes <- keepSeqlevels(ranges_genes, chromosomes, pruning.mode = "coarse")
genic_seq <- BSgenome::getSeq(BSgenome.Dmelanogaster.UCSC.dm6, ranges_genes)
-
Calculate genic nucleotide frequencies using the oligonucleotideFrequency function (Biostrings package version 2.52). Set oligonucleotide width to either 4, 5, or 6, sliding window step to 1 and as.prob to TRUE.
genic_4merfreq <- oligonucleotideFrequency(genic_seq, width = 4, step = 1, as.prob = T)
rownames(genic_4merfreq) <- names(genic_seq)
-
Select genes, which are significantly enriched (adjusted p-value < 0.01 and log2[IP/Input] > 0).
res.sign_enriched <- subset(res, padj < 0.01 & res$log2FoldChange > 0)
select_enriched <- rownames(genic_4merfreq) %in% rownames(res.sign_enriched)
select_notenriched <- !(rownames(genic_4merfreq) %in% rownames(res.sign_enriched))
freqs_enriched <- genic_4merfreq[select_enriched,]
freqs_notenriched <- genic_4merfreq[select_notenriched,]
-
Visualize nucleotide frequencies for genes enriched or not enriched as boxplots. Sort sequences by their median frequency.
freq_order <- order(apply(freqs_enriched, 2, median), decreasing = T)
freqs_enriched <- freqs_enriched[,freq_order]
freqs_notenriched <- freqs_notenriched[,freq_order]
freqs_merged <- c(as.list(data.frame(freqs_enriched)),
as.list(data.frame(freqs_notenriched)))
boxplot(freqs_merged[rep(1:25, each=2)+c(0,ncol(freqs_enriched))],
col =c("darkred","darkgrey"), las=2, outline=F,
ylab = "Frequency")
-
Visualize association between vitRIP enrichment (i.e., log2[IP/Input]) and nucleotide frequencies as scatterplot. Calculate Spearman´s correlation to assess the relationship (Figure 2F).
res_freq <- merge(res, genic_4merfreq, by="row.names")
plot(res_freq$log2FoldChange, res_freq[,"TTTT"],
main = "", xlab = "log2 Fold Change", ylab = "Frequency UUUU",
xlim = c(-8,8), col = rgb(0,0,0,0.1), pch = 19, cex = 0.25)
abline(v=0, col="grey32")
corr <- cor(res_freq$log2FoldChange, res_freq[,"TTTT"], method = "spearman")
title(paste("cor =", round(corr,2)), line = 0.5, cex.main=1)