You are on page 1of 4

################################################################################ ################ # updated MaptoGene.2 MaptoGene.vista######################################### #################################### #MaptoGene.2 dataframe: # "chrID", "ProbeID", "start", "stop" # #c1 c2 c3 c4 #1 2 3 a #1 2 3 b #1 2 3 c #2 4 1 h #.... #MaptoGene.

Gene.vista #c1 c2 c3 c4 #1 2 3 a,b,c #2 4 1 h

MaptoGene.vista<-function(objectProbe, Tss_Txpt_gene, Upstream=NULL, Downstream= NULL){ if (mode(objectProbe)!='list'){ stop( "object must be a data frame") } if (length(grep("start", colnames(objectProbe)))!=1 | length(grep("stop", colnames(objectProbe)))!=1|length(grep("chrID",colnames( objectProbe)))!=1){ if (length(grep("stop",colnames(objectProbe)))==0 &length(grep("end",colname s(objectProbe)))==1){ colnames(objectProbe)<-sub("end","stop",colnames(objectProbe)) } if (length(grep("chrID", colnames(objectProbe)))==0){ colnames(objectProbe)<-sub("chr", "chrID",colnames(objectProbe)) }else{ stop( "column names must contain both start and stop in uniuqe") } } # objectProbe=objectProbe[order(objectProbe$ProbeID),] nstt=grep("start", colnames(objectProbe),fixed=TRUE) nstp=grep("stop", colnames(objectProbe), fixed=TRUE) objectProbe$middle=apply(objectProbe, 1, function(x){ mid=(as.numeric(x[nstt])+as.numeric(x[ns tp]))/2}) setTss_Txpt_gene<-SetLength.2(Tss_Txpt_gene, Upstream=Upstream, Downstream=Dow nstream) ProbeLinkingGene<-GetProbeLinkingGene.vista(objectP=objectProbe,setTss_Txpt=se tTss_Txpt_gene)# # ProbeLinkingGene<-ProbeLinkingGene[order(ProbeLinkingGene$X),] # ProbeLinkingGene<-ProbeLinkingGene[order(ProbeLinkingGene$Y),] return(ProbeLinkingGene) } ############################################################################ ##########fast version############## GetProbeLinkingGene.vista<-function(objectP, setTss_Txpt){ chrs=c(paste("chr", 1:22, sep=""), "chrX", "chrY")

chrs<-c("chr1", "chr2", "chr3", "chr4", "chr5") detain<-data.frame(matrix(ncol=ncol(objectP)+6, nrow=nrow(objectP))) colnames(detain)<-c(colnames(objectP),"GeneSymbol","Accession","strand","posT oTss", "ProbePos","Description") count1<-1 for ( chr in chrs){ cat(chr, " ") objectProbe_chr=objectP[which(objectP$chrID==chr),]# dataframe with chr as c hrID only count2<-nrow(objectProbe_chr)+count1-1 if (nrow(objectProbe_chr)==0){ next }else{ Tss_chr=setTss_Txpt[which(setTss_Txpt$chrID==chr),]# Tss_Txpt_gene with chr as chrID only target<-setGeneList.vista(objectProbe_chr, Tss_chr)# list of list objectProbe_chr$GeneSymbol<-target$GeneSymbol objectProbe_chr$Accession<-target$Accession objectProbe_chr$strand<-target$strand objectProbe_chr$posToTss<-target$posToTss objectProbe_chr$ProbePosition<-target$ProbePosition objectProbe_chr$Description<-target$Description for (j in 1:(ncol(objectProbe_chr)-9)){ objectProbe_chr[,j]<-as.character(objectProbe_chr[,j]) } objectProbe_chr<-PushToBox(objectProbe_chr) detain[count1:count2, ]<-objectProbe_chr count1<-count2+1 } } return(detain) } ################set Upsteam && Downstream####################################### ## # used to specify the upstream of Tss and downstream of Tss # and add two columns to the dat SetLength.2<-function(dat,Upstream=NULL, Downstream=NULL){ nam=grep("Tss", colnames(dat)) plus=dat[which(dat$strand=="+"),] minus=dat[which(dat$strand=="-"),] if (length(nam)==0){ stop ( " no variables named \" Tss\" ") } if (!is.null(Upstream) & !is.null(Downstream)){ if ( mode(Upstream)!='numeric' | mode(Downstream)!='numeric'){ stop ("Upstream and Downstream must both be numeric") } else { set<-function(x){ n=as.numeric(x[nam])-Upstream if(n<0){ n=0 } return(n) } TssStart1=apply(plus, 1, set)

TssStop1=apply(plus, 1, function(x){n=as.numeric(x[nam])+Downstream}) plus$TssStart=TssStart1 plus$TssStop=TssStop1 setminus=function(x){ n=as.numeric(x[nam])-Downstream if(n<0) n=0 return(n) } TssStart2=apply(minus, 1, setminus) TssStop2=apply(minus, 1, function(x){n=as.numeric(x[nam])+Upstream}) minus$TssStart=TssStart2 minus$TssStop=TssStop2 datt=rbind(plus, minus) return(datt) } } else{ return(dat) } } ############################################################ ##################################### setGeneList.vista<-function(dat, GeneInfo){ GeneSymbol<-vector(length=nrow(dat)) Accession<-vector(length=nrow(dat)) strand<-vector(length=nrow(dat)) posToTss<-vector(length=nrow(dat)) ProbePosition<-vector(length=nrow(dat)) Description<-vector(length=nrow(dat)) for (i in 1:nrow(dat)){ cat(i,sep=",") ind=which(GeneInfo$TssStart<=as.numeric(dat$middle[i]) & GeneInfo$TssStop>=as.numeric(dat$middle[i])) if (length(ind)==0){ GeneSymbol[i]<-list("NA0") Accession[i]<-list("NA0") strand[i]<-list("NA0") posToTss[i]<-list("NA0") ProbePosition[i]<-list("NA0") Description[i]<-list("NA0") next } else{ g=GeneInfo$GeneSymbol[ind] ac=GeneInfo$Accession[ind] des=GeneInfo$Description[ind] strand=as.character(GeneInfo$strand[ind]) tss=GeneInfo$Tss[ind] rangeTrans=abs(as.numeric(GeneInfo$Txpt1[ind])-as.numeric(GeneInfo$Txpt2[i nd])) posToTss=NULL for ( j in 1:length(strand)){ if (strand[j]=='+'){

pos=round(as.numeric(dat$middle[i])-tss[j]) } if (strand[j]=='-'){ pos=round(tss[j]-as.numeric(dat$middle[i])) } posToTss=c(posToTss, pos) } position=NULL for (jj in 1:length(rangeTrans)){ if (posToTss[jj]>0 & posToTss[jj]<rangeTrans[jj]){ mark="Intragenic" } if (posToTss[jj]<0){ mark="Promoter" } if (posToTss[jj]>0 & posToTss[jj]>rangeTrans[jj]){ mark="Intergenic" } position=c(position, mark) } GeneSymbol[i]=list(g) Accession[i]=list(ac) strand[i]=list(strand) posToTss[i]=list(posToTss) ProbePosition[i]=list(position) Description[i]=list(des) } } return(list(GeneSymbol=GeneSymbol, Accession=Accession, strand=strand, posToTs s=posToTss, ProbePosition=ProbePosition, Description=Description)) } ############################################### ############################################### trans<-function(x){paste(unlist(x), sep=",", collapse=",")} PushToBox<-function(detain){ for ( i in which(sapply(detain,is.list ))){ detain[i]<-apply(detain[i], 1, trans) } return(detain) } ################################################################### SlicingMapping<-function(dat, begin=1, end=nrow(dat)){ dat<-MaptoGene.vista(dat[begin:end,], Tss_Txpt_gene,Upstream=7250, Downstre am=3250) return(dat) } ################################################ #result1.10000<-SlicingMapping(ProbeNameArray, 1, 10000) #write.table(result1.10000$ProbeLinkingGene, "xxxx.txt", quote=F) #result<-SlicingMapping(n_dmr_primitive)

You might also like