1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
| filein <-'B1_family' ##Input file
dataIn <- as.matrix(read.table(filein)) ##Read the data in file
dim(dataIn) ##Check if the number of columns is as expected
cols<-dim(dataIn)[2]
colsm1<-cols-1
dataIn2<-(dataIn[which(rowSums(dataIn[,1:cols])>colsm1),]) ##Retain only those rows that do not have low counts in all samples combined
dataIn3<-dataIn2[which(rowSums(sign(dataIn2[,1:cols]))>1),] ##Retain rows that have non-zero counts in at least one column
dataIn<-as.matrix(dataIn3)
dim(dataIn) ##Number of transcripts that remained after clean up
##Quasiseq
upper.quartiles<-apply(dataIn,2,function(x) quantile(x,0.75)) ##Normalization - calculating upper quartile
scalingFactors<-mean(upper.quartiles)/upper.quartiles ##Generate scaling factor
range(scalingFactors)
dataIn.norm<-round((sweep(dataIn,2,scalingFactors,FUN="*"))) ##Generate normalized counts
trt <- as.factor(c(1,1,1,1,1,1,1,1,2,1,1,1,1,1,2,2,2,2,2,2,2,2)) ##Assigning treatment groups to samples
mn <- as.numeric(rep(1,22)) ##Control
design.list<-vector('list',2)
design.list[[1]]<-model.matrix(~trt)
design.list[[2]]<-mn
log.offset<-log(apply(dataIn, 2, quantile,.75)) ##Normalization. calculate log offset
fit2<-QL.fit(round(dataIn), design.list,log.offset=log.offset, Model='NegBin')
outfile-"B1_resvssus"
QLresultsPvaluePlot(fit2,outfile)
##EdgeR-QLFit
group <- factor(c(1,1,1,1,1,1,1,1,2,1,1,1,1,1,2,2,2,2,2,2,2,2))
y <- DGEList(counts=dataIn,group=group)
y <- calcNormFactors(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y,design)
qlf <- glmQLFTest(fit,coef=2)
tablrt <- topTags(qlf, n=Inf)
write.table(tablrt, file = "B1_resvssus_QLfit.txt")
##EdgeR-LRT
fit <- glmFit(y,design)
lrt <- glmLRT(fit,coef=2)
tablrt <- topTags(lrt, n=Inf)
write.table(tablrt, file = "B1_resvssus_LRT.txt")
|