Contents

1 Introduction

In the first part of this vignette, one of the supplemental figures is generated, which clarifies some concepts regarding the tdr. In particular, the tdr at a fixed p-value is illustrated, the tdr as a function of p-values and also tdr(P_i) as a random variable, which in general will not be a test statistic. It’s only a test statistic when we know pi0 and the alternative density, and in such a case it really provides higher discriminatory power than the p-values.

library("ggplot2")
library("dplyr")
library("tidyr")
library("gridExtra")
library("cowplot")
library("wesanderson")

Helper functions to generate simulated data and also evaluate tdr and other quantities on them.

falt1 <- function(t){
    1-pnorm(qnorm(1-t)-mu1)
  }

  lfdr1 <- function(t){
    pi10/(pi10 + (1-pi10)*falt1_density(t))
  }
  falt1_density <- function(t){
      dnorm(qnorm(1-t)-mu1)/dnorm(qnorm(1-t))
  }

#http://stackoverflow.com/questions/10081479/solving-for-the-inverse-of-a-function-in-r
inverse = function (f, lower = -100, upper = 100) {
   function (y) uniroot((function (x) f(x) - y), lower = lower, upper = upper)[1]$roo
}

sim_pars <- function(eff_size, pi0){
  distrib <- function(t) pi0*t + (1-pi0)*(1-pnorm(qnorm(1-t)-eff_size))
  alt_density <-   function(t) dnorm(qnorm(1-t)-eff_size)/dnorm(qnorm(1-t))
  density <- function(t) pi0 + (1-pi0)*alt_density(t)
  tdr <- function(t)  1- pi0/density(t)
  inverse_f <- function(y) uniroot((function (x) density(x) - y), lower = 0.001, upper = 0.999)[1]$root
  tdr_density <- function(t) pi0*(inverse_f(pi0/(1-t)+0.001) - inverse_f(pi0/(1-t)-0.001))/0.002
  return(list(pi0=pi0,distrib=distrib, alt_density=alt_density,density=density, tdr=tdr, inverse_f=inverse_f,
            tdr_density=tdr_density  ))
}

Helper function to generate plots for different parameter values:

plot_tdr <- function(x,pi0,xthreshold=0.05){
  eff_size <- x
  pars <- sim_pars(x,pi0)
  df <- data.frame(t=seq(0.001,1, length=1000))
  df <- mutate(df, f1 = pars$alt_density(t), f=pars$density(t), tdr = pars$tdr(t))
  plot_pv <- ggplot(df, aes(x=t, y=f)) + 
                     geom_line()  + 
                     ylim(c(0,4)) + 
                     geom_vline(xintercept=xthreshold, col="blue") + 
                     geom_hline(yintercept=pars$pi0,col="red") +
                     #geom_hline(yintercept=pars$density(xthreshold), linetype=2)+
                     ylab("density")+
                     xlab("p-value")+
                     scale_x_continuous(expand = c(0, 0)) 
  
  plot_pv_vs_tdr <- ggplot(df, aes(x=t, y= tdr)) + 
                    geom_line() +
                    ylab("tdr")+
                    xlab("p-value")+
                    scale_x_continuous(expand = c(0, 0)) 
  
  # now also generate empirical histogram plots
  a = sim_pars(eff_size,pi0)
  m=100000
  H <- rbinom(m, 1, 1-pi0)
  X <- rnorm(m) + eff_size*H
  PV <- 1 - pnorm(X)
  TDR <- a$tdr(PV)
  
  sim_df <- data.frame(pvalue=PV, tdr=TDR)
  
  hist_pv <- ggplot(sim_df, aes(x=pvalue))
  grid.arrange(plot_pv, plot_pv_vs_tdr, ncol=1)
  
  pv_hist <- ggplot(sim_df, aes(x=pvalue)) + geom_histogram(aes(y = ..density..),binwidth=0.036, colour = "darkgreen", fill = "white") + xlab("p-value")
  tdr_hist <- ggplot(sim_df, aes(x=tdr)) + geom_histogram(aes(y = ..density..),binwidth=0.036, colour = "darkgreen", fill = "white") + xlab("tdr")
  list(plot_pv=plot_pv,  plot_pv_vs_tdr=plot_pv_vs_tdr, pv_hist=pv_hist, tdr_hist=tdr_hist)
}

Generate all plots:

myplots <- c(plot_tdr(3.5, 0.6), plot_tdr(3.5,0.85), plot_tdr(1.5,0.6))
myplots <- myplots[c(1,5,9) + rep(0:3,each=3)]
myplots <- c(myplots, list(labels = letters[1:12]), list(ncol=3, align="hv"))

Let’s put all plots together:

do.call(plot_grid,myplots)

pdf("pvalue_vs_tdr_explanation.pdf", width=10, height=15)
do.call(plot_grid,myplots)
dev.off()

2 Size investing and tdr

This part of the vignette generates figures which explain the relationship between tdrs and size investing. In particular it illustrates why fdr estimation methods which assume the conditional alternative densities remain the same, cannot have any power (i.e. differences only due to pi0 estimation cannot support a size investing strategy).

Again first generate the data and the individual plots for a case in which both pi0 changes and the alternative distribution:

df <- data.frame(t=seq(0.00001,1, length=1000))
x <- 2.5
pi0 <- 0.8
eff_size <- x
pars <- sim_pars(x,pi0)
df1 <- mutate(df, f1 = pars$alt_density(t), CDF=pars$distrib(t), f=pars$density(t), tdr = pars$tdr(t), test = "1")
  
x <- 1.5
pi0 <- 0.9
eff_size <- x
pars <- sim_pars(x,pi0)
df2 <- mutate(df, f1 = pars$alt_density(t), CDF=pars$distrib(t), f=pars$density(t), tdr = pars$tdr(t), test = "2")

df <- rbind(df1,df2)
  
plot_tdr_inversed1 <- ggplot(df, aes(x=tdr, y= t,color=test)) + 
                    geom_line() +
                    xlab("tdr")+
                    ylab("p-value")+
                    scale_x_continuous(expand = c(0, 0)) +
                    scale_colour_manual(values =wes_palette("Chevalier")[c(1,4)], name='',
                                        labels = expression(H , tilde(H)))


plot_density1 <- ggplot(df, aes(x=t, y= f,color=test)) + 
                    geom_line() +
                    xlab("p-value")+
                    ylab("density")+
                    scale_x_continuous(expand = c(0, 0)) +
                    ylim(0,4) +
                    scale_colour_manual(values =wes_palette("Chevalier")[c(1,4)], name='',labels = expression(H , tilde(H)))


plot_cdf1 <- ggplot(df, aes(x=t, y= CDF,color=test)) + 
                    geom_line() +
                    xlab("p-value")+
                    ylab("CDF")+
                    scale_x_continuous(expand = c(0, 0)) +
                    scale_colour_manual(values =wes_palette("Chevalier")[c(1,4)], name='',labels = expression(H , tilde(H)))

Second we generate an example in which only pi0 differs and the alternative distributions are the same:

df <- data.frame(t=seq(0.00001,1, length=1000))
x <- 2.5
pi0 <- 0.8
eff_size <- x
pars <- sim_pars(x,pi0)
df1 <- mutate(df, f1 = pars$alt_density(t), CDF=pars$distrib(t), f=pars$density(t), tdr = pars$tdr(t), test = "1")
  
x <- 2.5
pi0 <- 0.9
eff_size <- x
pars <- sim_pars(x,pi0)
df2 <- mutate(df, f1 = pars$alt_density(t), CDF=pars$distrib(t), f=pars$density(t), tdr = pars$tdr(t), test = "2")
df <- rbind(df1,df2)
  
plot_tdr_inversed <- ggplot(df, aes(x=tdr, y= t,color=test)) + 
                    geom_line() +
                    xlab("tdr")+
                    ylab("p-value")+
                    scale_x_continuous(expand = c(0, 0)) +
                      scale_colour_manual(values =wes_palette("Chevalier")[c(1,4)], name='',
                                          labels = expression(H , tilde(H)))



plot_density <- ggplot(df, aes(x=t, y= f1,color=test)) + 
                    geom_line() +
                    xlab("p-value")+
                    ylab("alt")+
                    scale_x_continuous(expand = c(0, 0)) +
                    ylim(0,4) +
                    scale_colour_manual(values =wes_palette("Chevalier")[c(1,4)], name='',
                                        labels = expression(H , tilde(H)))


plot_cdf <- ggplot(df, aes(x=t, y= CDF,color=test)) + 
                    geom_line() +
                    xlab("p-value")+
                    ylab("CDF")+
                    scale_x_continuous(expand = c(0, 0)) +
                      scale_colour_manual(values =wes_palette("Chevalier")[c(1,4)], name='',
                                          labels = expression(H , tilde(H)))
plotgrid <- plot_grid(plot_cdf1, plot_cdf, plot_tdr_inversed1, plot_tdr_inversed, labels = letters[1:4], ncol=2, align="hv")
plotgrid