library(graphics)
qm.read.data<-function(file)
{
	temp.data<-scan(file, what="list", sep="\n", quiet=T)
	xdim<-as.numeric(strsplit(temp.data[[1]], "\t")[[1]][1])
	ydim<-as.numeric(strsplit(temp.data[[1]], "\t")[[1]][2])
	data<-as.list(1:(length(temp.data)-1))
	for(i in 1:length(data))
	{
		data[[i]]<-as.numeric(strsplit(temp.data[[i+1]], "\t")[[1]])	
	}
	print(paste("read in ", xdim, "*", ydim, "=", xdim*ydim, " distributions!", sep=""))
	return(list(xdim=xdim, ydim=ydim, data=data))
}

########### read in data in the format of yingying's data ######
## files: a list of file names to be read in, each contains 400 dimensions
qm.read.data.yingying<-function(files)
{
	data<-vector("list", 400)
	for(i in 1:length(files))
	{
		temp<-scan(files[[i]], what="list", sep="\n", quiet=T, skip=2)
		for(j in 1:400)
		{
			a<-strsplit(temp[[j]], " ")[[1]]
			if(as.numeric(a[3])>0)
			{
				data[[j]]<-c(data[[j]], as.numeric(a[4:(as.numeric(a[3])+3)]))
			}
		}
		print(paste("read in file", i))
	}
	data
}

############## MLE estimate for Fisher Info ########
qm.MLE.sigma2.hat<-function(data)
{
	sigma.MLE.2<-rep(NA, length(data))
	for(i in 1:length(data))
	{
		if(length(data[[i]])>1)
			sigma.MLE.2[i]<- sum( (data[[i]]-mean(data[[i]]))^2 ) / length(data[[i]])
	}
	return(sigma.MLE.2)
}

qm.MLE.FI<-function(data)
{
	len<-sapply(data, length)
	return(sqrt(len)/sqrt(qm.MLE.sigma2.hat(data)))
}

######### Empirical Bayes estimate for Fisher Info ########
###### Empirical Bayes with constant non-informative prior for mu, the sample means #####
qm.EB.FI<-function(data)
{
	for(i in 1:length(data))
	{
		if(is.null(data[[i]]))
			data[[i]]<-NA
	}
	len<-sapply(data, length)
	sqrt(len)/sqrt(qm.EB.sigma2.hat(data))
}

qm.EB.sigma2.hat<-function(data)
{
	hp<-qm.EB.estHyperprior(data)
	v0.hat.1<-hp[1]
	sigma0.hat.2.1<-hp[2]
	mu.EB<- sapply(data, sum)/sapply(data, length)
	sigma.EB.2<- rep(NA, length(data))
	for(i in 1:length(data))
	{
		if(length(data[[i]])>1)
			sigma.EB.2[i]<-( sum((data[[i]]-mu.EB[i])^2) + v0.hat.1*sigma0.hat.2.1) / (v0.hat.1+length(data[[i]])+2)
	}
	return(sigma.EB.2)
}

qm.EB.estHyperprior<-function(data)
{
	# estimate v0, sigma0.2 through moments #
	mu.hat<-sapply(data, mean, na.rm=T)
	sigma.hat.2<-sapply(data, var, na.rm=T)
	sigma.tilde.2<-mean(sigma.hat.2, na.rm=T)

	v0.hat<-4+(2*(length(sigma.hat.2)-1)*(sigma.tilde.2)^2)/(sum((sigma.hat.2-sigma.tilde.2)^2, na.rm=T))
	sigma0.hat.2<-sigma.tilde.2*(v0.hat-2)/v0.hat

	#result<-nlm(qm.EB.loglikeli, c(v0.hat, sigma0.hat.2), data=data)$estimate
	
	#v0.hat<-result[[1]]
	#sigma0.hat.2<-result[[2]]
	##result<-optim(c(v0.hat, sigma0.hat.2), qm.EB.loglikeli, data=data)$estimate
	result<-c(v0.hat, sigma0.hat.2)
}

qm.EB.loglikeli<-function(x, data)
{
	v0<-x[1]
	sigma0.2<-x[2]
	result<-0.5*(v0+1)*log( v0*sigma0.2/(v0+1)) - ( log(gamma((v0+1)/2)) - log(gamma(v0/2)) + 0.5*v0*log(v0) - 0.5*(v0+1)*log(v0+1) + 0.5*v0*log(sigma0.2) )
	return(result)
}


##### only plot the median for each distribution ######
qm.median.plot<-function(data, xdim, xlab=NULL, ylab=NULL, bg.0=0.8)
{
	col.high<-quantile(sapply(data, max, na.rm=T), 0.95)
	col.low<-quantile(sapply(data, min, na.rm=T), 0.05)
	
	col.strength<-rep(NA, length(data))
	for(i in 1:length(data))
	{
		if(length(data[[i]])>0)
		{
			a<-median(data[[i]], na.rm=T)
			if(a>col.high)
				col.strength[i]<-1
			else if(a<col.low) col.strength[i]<-0
			else col.strength[i]<-(a-col.low)/(col.high-col.low)
		}
	}

	ydim<-ceiling(length(data)/xdim)
	x<-rep(NA, length(data))
	y<-rep(NA, length(data))
	for(i in 1:length(data))
	{
		y[i]<-ceiling(i/xdim)
		x[i]<-i-(ceiling(i/xdim)-1)*xdim
	}

	plot.info<-na.omit(cbind(x, y, col.strength))
	par(mar=c(1,1,1,1), bg=rgb(bg.0, bg.0, bg.0))
	plot(c(2.2*1, 2.2*(xdim+3)), c(2.2*1, 2.2*(ceiling(length(data)/xdim)+1)), type="n", axes=F, xlab="", ylab="")
	symbols(2.2+2.2*plot.info[,1], 2.2*(ceiling(length(data)/xdim)-plot.info[,2]+1), rectangles=matrix(2, nrow(plot.info), 2), inches=F, add=T)
	symbols(2.2+2.2*plot.info[,1], 2.2*(ceiling(length(data)/xdim)-plot.info[,2]+1), rectangles=matrix(2, nrow(plot.info), 2), inches=F, add=T, bg=rgb(1-plot.info[,3], 1-plot.info[,3], 1-plot.info[,3]))
	symbols((2.2*(xdim+2)+0.5)*rep(1, 101), 2.2*(ydim)-2.2*(ydim)*0.4*(0:100)/100, rectangles=cbind(rep(2.2, 101), rep(2.2*(ydim)*0.4/100, 101)), inches=F, add=T, bg=rgb((0:100)/100, (0:100)/100, (0:100)/100), fg=rgb((0:100)/100, (0:100)/100, (0:100)/100))	
	text((2.2*(xdim+3)+0.5)*rep(1, 3), 2.2*(ydim)-2.2*(ydim)*0.4*c(0, 50, 100)/100, c(col.high, (col.high+col.low)/2, col.low), cex=0.7)
	
	if(!is.null(ylab))
		text(2.2, 2.2*(20:1), ylab)
	if(!is.null(xlab))
		text(2.2*(2:21), 2.2*21, xlab)
}

############# quantile map plot ###########
qm.ellipse<-function(x, y, xradius, yradius, col, n)
{
	i<-(-n+1):n
	symbols(x+(i-0.5)*xradius/n, rep(y, length(i)), rectangles=cbind(rep(xradius/n, length(i)), 2*yradius*sqrt(1-  ((i-0.5)*xradius/n)^2/xradius^2)), inches=F, add=T, bg=col, fg=col)	
}

### xdim: number of columns; 
qm.plot<-function(data, xdim, FI=NULL, xlab=NULL, ylab=NULL, quan.num=NULL, outline=F, smooth=10, bg.0=0.8, col.palette="HCL")
{
	if(length(data)==1)
	{
		plot(c(0, 2), c(0, 2), type="n", axes=F, xlab="", ylab="")
		if(is.null(quan.num)) {d<-sort(data[[1]])} else {d<-quantile(data[[1]], ((1:quan.num)-0.5)/quan.num)}
		for(k in 1:length(d))
		{
			col.strength<-(d[k]-min(data[[1]]))/(max(data[[1]])-min(data[[1]]))
			qm.ellipse(1, 1, (length(d)-k+1)/length(d), (length(d)-k+1)/length(d), rgb(1-col.strength, 1-col.strength, 1-col.strength), smooth)
			##if(outline==T) symbols(c(1), c(1), circles=c((length(d)-k+1)/length(d)), inches=F, add=T, fg=rgb(0, 0, 0))
		}
	}
	else
	{
	ydim<-ceiling(length(data)/xdim)
	col.high<-max(sapply(data, max))
	col.low<-min(sapply(data, min))
	if(!is.null(FI))
	{FI.high<-quantile(FI, 0.7, na.rm=T)[[1]]
	FI.low<-0}
	par(mar=c(1,1,1,1), bg=rgb(bg.0, bg.0, bg.0))
	plot(c(2.2*0, 2.2*(xdim+2)), c(2.2*1, 2.2*(ydim+1)), type="n", axes=F, xlab="", ylab="")
	for(p in 1:length(data))
	{
		if(length(data[[p]])>1)
		{
			j<- ceiling(p/xdim)
			i<- p-(j-1)*xdim
			if(is.null(FI))	radius<-1
			else	radius<-max(min((FI[p]-FI.low)/(FI.high-FI.low), 1), 0)			
			if(is.null(quan.num)) d<-sort(data[[p]])
			else d<-quantile(data[[p]], ((1:quan.num)-0.5)/quan.num)
			for(k in 1:length(d))
			{	
				col.strength<-(d[k]-col.low)/(col.high-col.low)
				if(col.palette=="RGB") qm.ellipse(2.2*i, 2.2*(ydim-j+1), radius*(length(d)-k+1)/length(d), (length(d)-k+1)/length(d), rgb(1-col.strength, 1-col.strength, 1-col.strength), smooth)
				if(col.palette=="HCL") qm.ellipse(2.2*i, 2.2*(ydim-j+1), radius*(length(d)-k+1)/length(d), (length(d)-k+1)/length(d), hcl(h=90-col.strength*(90-0), c=30-col.strength*(30-100), l=90-col.strength*(90-50)), smooth)
			}		
		}
	}
	if(col.palette=="RGB")	symbols((2.2*(xdim+1)+0.5)*rep(1, 101), 2.2*(ydim)-2.2*(ydim)*0.4*(0:100)/100, rectangles=cbind(rep(2.2, 101), rep(2.2*(ydim)*0.4/100, 101)), inches=F, add=T, bg=rgb((0:100)/100, (0:100)/100, (0:100)/100), fg=rgb((0:100)/100, (0:100)/100, (0:100)/100))	
	if(col.palette=="HCL")	
	{
		col.strength<-(100:0)/100
		symbols((2.2*(xdim+1)+0.5)*rep(1, 101), 2.2*(ydim)-2.2*(ydim)*0.4*(0:100)/100, rectangles=cbind(rep(2.2, 101), rep(2.2*(ydim)*0.4/100, 101)), inches=F, add=T, bg=hcl(h=90-col.strength*(90-0), c=30-col.strength*(30-100), l=90-col.strength*(90-50)), fg=hcl(h=90-col.strength*(90-0), c=30-col.strength*(30-100), l=90-col.strength*(90-50)))	
	}

	text((2.2*(xdim+2)+0.5)*rep(1, 3), 2.2*(ydim)-2.2*(ydim)*0.4*c(0, 50, 100)/100, c(round(col.high, 2), round((col.high+col.low)/2,2), round(col.low,2)), cex=0.7)
	if(!is.null(ylab))
		text(0, 2.2*(20:1), ylab)
	if(!is.null(xlab))
		text(2.2*(1:20), 2.2*21, xlab)
	}
}

### xdim: number of columns; 
qm.plot.hclust<-function(data, xdim, FI=NULL, xlab=NULL, ylab=NULL, quan.num=NULL, outline=F, smooth=10, bg.0=0.8, col.palette="HCL")
{
	hclust.tree.h<-5
	ydim<-length(data)/xdim
	missing.rows<-which(apply(is.na(matrix(FI, nrow=ydim, ncol=xdim, byrow=T)), 1, sum)>=xdim/2)
	missing.cols<-which(apply(is.na(matrix(FI, nrow=ydim, ncol=xdim, byrow=T)), 2, sum)>=ydim/2)
	
	a<-dist.data.row(data, ydim)
	dimnames(a)<-list(ylab, ylab)
	if(length(missing.rows)>0) aa<-a[-missing.rows, -missing.rows] else aa<-a
	row.hclust<-hclust(as.dist(aa))

	a<-dist.data.col(data, xdim)
	dimnames(a)<-list(xlab, xlab)
	if(length(missing.cols)>0) aa<-a[-missing.cols, -missing.cols] else aa<-a
	col.hclust<-hclust(as.dist(aa))
	
	if(length(missing.rows)>0) row.index<-(1:ydim)[-missing.rows] else row.index<-1:ydim
	if(length(missing.cols)>0) col.index<-(1:xdim)[-missing.cols] else col.index<-1:xdim
	new.xdim<-length(col.index)
	new.ydim<-length(row.index)
	new.data<-vector("list", new.xdim*new.ydim)
	new.FI<-rep(NA, new.xdim*new.ydim)
	new.xlab<-rep(NA, new.xdim)
	new.ylab<-rep(NA, new.ydim)

	for(i in 1:new.ydim)
	{
		for(j in 1:new.xdim)
		{
			temp.row.index<-row.index[row.hclust$order[i]]
			temp.col.index<-col.index[col.hclust$order[j]]
			new.data[[(i-1)*new.xdim+j]]<-data[[(temp.row.index-1)*xdim+temp.col.index]]
			new.xlab[j]<-xlab[temp.col.index]
			new.ylab[i]<-ylab[temp.row.index]
			new.FI[(i-1)*new.xdim+j]<-FI[(temp.row.index-1)*xdim+temp.col.index]
		}
	}
	
	
	if(length(new.data)==1)
	{
		plot(c(0, 2), c(0, 2), type="n", axes=F, xlab="", ylab="")
		if(is.null(quan.num)) {d<-sort(new.data[[1]])} else {d<-quantile(new.data[[1]], ((1:quan.num)-0.5)/quan.num)}
		for(k in 1:length(d))
		{
			col.strength<-(d[k]-min(new.data[[1]]))/(max(new.data[[1]])-min(new.data[[1]]))
			qm.ellipse(1, 1, (length(d)-k+1)/length(d), (length(d)-k+1)/length(d), rgb(1-col.strength, 1-col.strength, 1-col.strength), smooth)
			##if(outline==T) symbols(c(1), c(1), circles=c((length(d)-k+1)/length(d)), inches=F, add=T, fg=rgb(0, 0, 0))
		}
	} else
	{
	new.ydim<-ceiling(length(new.data)/new.xdim)
	col.high<-max(sapply(new.data, max))
	col.low<-min(sapply(new.data, min))
	if(!is.null(new.FI))
	{FI.high<-quantile(new.FI, 0.7, na.rm=T)[[1]]
	FI.low<-0}
	par(mar=c(1,1,1,1), bg=rgb(bg.0, bg.0, bg.0))
	plot(c(2.2*0, hclust.tree.h+1.1+2.2*(new.xdim+2)), c(2.2*1, hclust.tree.h+2.2*(new.ydim+1)), type="n", axes=F, xlab="", ylab="")
	for(p in 1:length(new.data))
	{
		if(length(new.data[[p]])>1)
		{
			j<- ceiling(p/new.xdim)
			i<- p-(j-1)*new.xdim
			if(is.null(new.FI)) {radius<-1} else {radius<-max(min((new.FI[p]-FI.low)/(FI.high-FI.low), 1), 0) }
			if(is.null(quan.num)) d<-sort(new.data[[p]]) else d<-quantile(new.data[[p]], ((1:quan.num)-0.5)/quan.num)
			for(k in 1:length(d))
			{	
				col.strength<-(d[k]-col.low)/(col.high-col.low)
				if(col.palette=="RGB") qm.ellipse(hclust.tree.h+1.1+2.2*i, 2.2*(new.ydim-j+1), radius*(length(d)-k+1)/length(d), (length(d)-k+1)/length(d), rgb(1-col.strength, 1-col.strength, 1-col.strength), smooth)
				if(col.palette=="HCL") qm.ellipse(hclust.tree.h+1.1+2.2*i, 2.2*(new.ydim-j+1), radius*(length(d)-k+1)/length(d), (length(d)-k+1)/length(d), hcl(h=90-col.strength*(90-0), c=30-col.strength*(30-100), l=90-col.strength*(90-50)), smooth)
			}		
		}
	}
	if(col.palette=="RGB")	symbols(hclust.tree.h+1.1+(2.2*(new.xdim+1)+0.5)*rep(1, 101), 2.2*(new.ydim)-2.2*(new.ydim)*0.4*(0:100)/100, rectangles=cbind(rep(2.2, 101), rep(2.2*(new.ydim)*0.4/100, 101)), inches=F, add=T, bg=rgb((0:100)/100, (0:100)/100, (0:100)/100), fg=rgb((0:100)/100, (0:100)/100, (0:100)/100))	
	if(col.palette=="HCL")	
	{
		col.strength<-(100:0)/100
		symbols(hclust.tree.h+1.1+(2.2*(new.xdim+1)+0.5)*rep(1, 101), 2.2*(new.ydim)-2.2*(new.ydim)*0.4*(0:100)/100, rectangles=cbind(rep(2.2, 101), rep(2.2*(new.ydim)*0.4/100, 101)), inches=F, add=T, bg=hcl(h=90-col.strength*(90-0), c=30-col.strength*(30-100), l=90-col.strength*(90-50)), fg=hcl(h=90-col.strength*(90-0), c=30-col.strength*(30-100), l=90-col.strength*(90-50)))	
	}

	text(hclust.tree.h+1.1+(2.2*(new.xdim+2)+0.5)*rep(1, 3), 2.2*(new.ydim)-2.2*(new.ydim)*0.4*c(0, 50, 100)/100, c(col.high, (col.high+col.low)/2, col.low), cex=0.7)
	if(!is.null(new.ylab))
		text(hclust.tree.h+1.1+0, 2.2*(new.ydim:1), new.ylab)
	if(!is.null(new.xlab))
		text(hclust.tree.h+1.1+2.2*(1:new.xdim), 2.2*(new.ydim+1), new.xlab)
	}

	## plot hclust trees
	new.y.position<-rep(NA, length(row.hclust$height))
	for(i in 1:length(row.hclust$height))
	{
		
		if(row.hclust$merge[i,1]<0) {y1<-2.2*(1+new.ydim-which(row.hclust$order==-row.hclust$merge[i,1]))} else {y1<-(new.y.position[row.hclust$merge[i,1]])}

		if(row.hclust$merge[i,2]<0) {y2<-2.2*(1+new.ydim-which(row.hclust$order==-row.hclust$merge[i,2]))} else {y2<-(new.y.position[row.hclust$merge[i,2]])}

		new.y.position[i]<-(y1+y2)/2
		
		if(row.hclust$merge[i,1]<0) {x1<-hclust.tree.h} else {x1<-hclust.tree.h*(1-row.hclust$height[i-1])}
		x2<-hclust.tree.h*(1-row.hclust$height[i])
		lines(c(x1, x2), c(y1, y1))
		if(row.hclust$merge[i,2]<0) {x1<-hclust.tree.h} else {x1<-hclust.tree.h*(1-row.hclust$height[i-1])}
		lines(c(x1, x2), c(y2, y2))		
		lines(c(x2, x2), c(y1, y2))
	}
	new.x.position<-rep(NA, length(col.hclust$height))
	for(i in 1:length(col.hclust$height))
	{
		
		if(col.hclust$merge[i,1]<0) {x1<-hclust.tree.h+1.1+2.2*(which(col.hclust$order==-col.hclust$merge[i,1]))} else {x1<-(new.x.position[col.hclust$merge[i,1]])}

		if(col.hclust$merge[i,2]<0) {x2<-hclust.tree.h+1.1+2.2*(which(col.hclust$order==-col.hclust$merge[i,2]))} else {x2<-(new.x.position[col.hclust$merge[i,2]])}

		new.x.position[i]<-(x1+x2)/2
		
		if(col.hclust$merge[i,1]<0) {y1<-2.2*(new.ydim+1)} else {y1<-2.2*(new.ydim+1)+hclust.tree.h*col.hclust$height[i-1]}
		y2<-2.2*(new.ydim+1)+hclust.tree.h*col.hclust$height[i]
		lines(c(x1, x1), c(y1+1.1, y2+1.1))
		if(col.hclust$merge[i,2]<0) {y1<-2.2*(new.ydim+1)} else {y1<-2.2*(new.ydim+1)+hclust.tree.h*col.hclust$height[i-1]}
		lines(c(x2, x2), c(y1+1.1, y2+1.1))		
		lines(c(x1, x2), c(y2+1.1, y2+1.1))
	}
}

########## hierarchical clustering to reorder rows and columns #######
## xdim: number of columns, ydim: number of rows, length(data)=xdim*ydim
dist.data.row<-function(data, xdim)
{
	ydim<-length(data)/xdim
	dist.row<-matrix(0, ydim, ydim)
	for(i in 2:ydim)
	{
		for(j in 1:(i-1))
		{
			temp.sum<-0
			count<-0
			for(k in 1:xdim)
			{	
				if(length(data[[(i-1)*xdim+k]])>0 & length(data[[(j-1)*xdim+k]])>0)
				{
					##temp.sum<-temp.sum+mean(abs(outer(data[[(i-1)*xdim+k]], data[[(j-1)*xdim+k]], "-")))
					temp.sum<-temp.sum+dist.two.rv(data[[(i-1)*xdim+k]], data[[(j-1)*xdim+k]])
					count<-count+1
				}
			}
			if(count>0)
			{
				dist.row[i,j]<-temp.sum/count
				dist.row[j,i]<-dist.row[i,j]
			}
			else 
			{
				dist.row[i,j]<-NA
				dist.row[j,i]<-NA
			}
		}	
	}
	return(dist.row)
}

## xdim: number of columns, ydim: number of rows, length(data)=xdim*ydim
dist.data.col<-function(data, xdim)
{
	ydim<-length(data)/xdim
	dist.col<-matrix(0, xdim, xdim)
	for(i in 2:xdim)
	{
		for(j in 1:(i-1))
		{
			temp.sum<-0
			count<-0
			for(k in 1:ydim)
			{	
				if(length(data[[(k-1)*xdim+i]])>0 & length(data[[(k-1)*xdim+j]])>0)
				{
					##temp.sum<-temp.sum+mean(abs(outer(data[[(k-1)*xdim+i]], data[[(k-1)*xdim+j]], "-")))
					temp.sum<-temp.sum+dist.two.rv(data[[(k-1)*xdim+i]], data[[(k-1)*xdim+j]])
					count<-count+1
				}
			}
			if(count>0)
			{
				dist.col[i,j]<-temp.sum/count
				dist.col[j,i]<-dist.col[i,j]
			}
			else 
			{
				dist.col[i,j]<-NA
				dist.col[j,i]<-NA
			}
		}	
	}
	return(dist.col)
}

dist.two.rv<-function(xobs, yobs)
{
	x.n<-length(xobs)
	y.n<-length(yobs)
	xobs.sort<-sort(xobs)
	yobs.sort<-sort(yobs)
	breaks<-sort(union((0:x.n)/x.n, (0:y.n)/y.n))
	total.sum<-0
	for(i in 1:(length(breaks)-1))
	{
		xx<-xobs.sort[ceiling(breaks[i+1]*x.n)]
		yy<-yobs.sort[ceiling(breaks[i+1]*y.n)]
		total.sum<-total.sum+(breaks[i+1]-breaks[i])*abs(xx-yy)
	}
	return(total.sum)
}