### Two Sample and Paired Comparison Functions # Mark Reimers # Last Change: Nov 6, 2003 # ########### # t.scores # computes t scores for Group 1 MINUS Group 2 ############ t.scores <- function( data, group1, group2, paired=FALSE) { N = dim(data)[1] dx = apply(data[,group1],1,mean) - apply(data[,group2],1,mean) if ( paired) { if (length(group1) != length(group2) ) error("groups must be same size for paired test") diffs = data[,group1] - data[,group2] se = sqrt( apply( diffs, 1, var )) } else se = sqrt( apply( data[,group1], 1, var) / length(group1) + apply( data[,group2], 1,var)/length(group2)) t = dx / se list( t=t, dx=dx) } #################################################### # t.quantile.plot # returns t scores and plots and annotates quantile plot for t-test between two groups # nicer if the names() attribute of the data matrix is set to accession numbers # Arguments: # data matrix of N genes x m experiments - usually log signal levels # group1 vector of indices in 1:m that indicate first group in comparison # group2 " " " " " second group # labels # manual.selection flag for whether to click on points to annotate them # stop by ctrl-C on unix or ESC in Windows # # Author: Mark Reimers Last change Oct 22 2003 ################################################## t.quantile.plot <- function (data, group1, group2, paired=FALSE, labels=NULL, manual.selection=FALSE) { N = dim(data)[1] dx = apply(data[,group1],1,mean) - apply(data[,group2],1,mean) if ( paired) { if (length(group1) != length(group2) ) error("groups must be same size for paired test") diffs = data[,group1] - data[,group2] se = sqrt( apply( diffs, 1, var )) } else se = sqrt( apply( data[,group1], 1, var) / length(group1) + apply( data[,group2], 1,var)/length(group2)) t = dx / se returned = NULL # bookkeeping remove NA's if ( is.null(labels)) labels = names(t) if ( sum( is.na(t)) > 0) { warning(paste(sum(is.na(t)),"genes with 0 denominator in t score")) N = N - sum( is.na(t)) } sorted.t = sort(t[!is.na(t)]) # order t statistics & labels excluding NA's labels = labels[order(t[!is.na(t)])] ### do the plot df = length(group1) + length(group2) - 2 quantile.t = qt((1:N - .5)/N,df) ## construct ideal t-scores plot( quantile.t, sorted.t, pch="o", main="Quantile t-Plot",xlab="Theoretical t-scores", ylab="Experimental t-scores", sub="Blue line indicates random t-scores. Red circles are highly significant") abline(0,1,col="blue",lwd=2) # define layout parameters for annotation t.max = max(sorted.t) ; t.min = min(sorted.t) qt.max = max(quantile.t) ; qt.min = min(quantile.t) ID.scale = (t.max-t.min) / 40 # room for 50 gene names in a plot left.pos = qt.min + .1 * (qt.max-qt.min) # 10% of screen width for names right.pos = qt.max - .1 * (qt.max-qt.min) # 10% of screen width for names # Color the up and down regulated genes spots red, and label them. down.reg = (1:N)[sorted.t < qt(.5/N, df)] # indices of down regulated genes down.num = length(down.reg) if ( length(down.reg) > 0 ) { points(quantile.t[down.reg], sorted.t[down.reg], pch=19, col="red") segments( quantile.t[down.reg],sorted.t[down.reg], right.pos, t.min+ID.scale* (1:down.num) ) text( rep(right.pos,down.num), t.min+ID.scale*(1:down.num), labels[down.reg],pos=4 ) } returned=c(returned,labels[down.reg]) up.reg = (1:N)[sorted.t > qt( 1-.5/N, df)] # " " up regulated genes up.num = length(up.reg) # of identified up-regulated genes if ( length(up.reg) > 0 ) { points(quantile.t[up.reg], sorted.t[up.reg], pch=19, col="red") segments( quantile.t[up.reg],sorted.t[up.reg], left.pos, t.max-ID.scale* (up.num:1) ) text( rep(left.pos,up.num), t.max-ID.scale*(1:up.num), labels[up.reg],pos=2 ) } returned=c(returned,labels[up.reg]) ### Manual Annotation if ( manual.selection ) { # start indefinite loop -- closed off by ctrl-C or ESC (windows) done = F while (! done ) { index = identify( quantile.t, sorted.t, n=1, plot=F) if (length(index)>0) { points( quantile.t[index],sorted.t[index],pch=19,col="red") if ( sorted.t[index] > 0) { up.num = up.num + 1 segments( quantile.t[index],sorted.t[index], left.pos, t.max-ID.scale* up.num ) text( left.pos, t.max-ID.scale* up.num, labels[index],pos=2 ) } else { # t < 0 down.num = down.num + 1 segments( quantile.t[index],sorted.t[index], right.pos, t.min+ID.scale* down.num ) text(right.pos, t.min+ID.scale* down.num, labels[index],pos=4 ) } returned=c(returned,labels[index]) } else { done = T } } } returned }