diff --git a/TwoProportionResamplingTest/TwoProportionResamplingTest.R b/TwoProportionResamplingTest/TwoProportionResamplingTest.R new file mode 100644 index 0000000000000000000000000000000000000000..68e6406dc108b17b8de2bb341f0563dbcf81477f --- /dev/null +++ b/TwoProportionResamplingTest/TwoProportionResamplingTest.R @@ -0,0 +1,424 @@ +# ------------------------------------------------------------------------------ +# File: TwoProportionResamplingTest.R +# Authors: Camille Fairbourn, Scott Manski +# Date: 03/26/2019 +# Desc: This app performs a two proportion test via randomization. The +# resampling test mimics Fisher's Exact Test. +# Published Location: +# Email: fairbour@msu.edu, manskisc@msu.edu +# +# For questions or concerns, please email the authors. This work is licensed +# under a Creative Commons Attribution-ShareAlike 4.0 International License +# (https://creativecommons.org/licenses/by-sa/4.0/). +# ------------------------------------------------------------------------------ + + +# loading packages +library(shiny) +library(ggplot2) +library(dplyr) +library(BHH2) +library(gridExtra) +library(shinyjs) + +enableBookmarking(store = "server") + +# Sources objects, functions, etc, from TwoProportionSource.R +# This file contains the html code for the editable table, +# the decimalcount function, the dotplot_locs function, and +# custom ggplot2 themes. +source("TwoProportionSource.R") + +# defines the presets +Presets <- list() +# Presets`preset name` <- c(Top Left, Bottom Left, Top Right, Bottom Right, +# Column A Name, Column B name, Row A name, Row B name) +Presets$`Duct Tape Therapy` <- c(22, 15, 4, 10, "Wart Gone", "Wart Remains", + "Duct Tape", "Cryotherapy") +Presets$`Gender Discrimination` <- c(21, 14, 3, 10, "Promotion", "No Promotion", + "Male", "Female") +Presets$`Opportunity Cost` <- c(56, 41, 19, 34, "buy DVD", "not buy DVD", + "control", "treatment") +Presets$`Avandia` <- c(2593, 5386, 65000, 154592, "Yes", "No", "Rosiglitazone", + "Pioglitazone") +Presets$`Custom` <- c(0, 0, 0, 0, "Column A", "Column B", "Row 1", "Row 2") + + +ui <- function(request) { + fluidPage(useShinyjs(), + titlePanel("Two Proportion Resampling Test"), + sidebarLayout( + sidebarPanel( + tabsetPanel( + tabPanel("Test", + tags$div(class="header", checked = NA, + tags$p("Enter your data into the table + below, or choose one of the data + presets. Press the Shuffle button + to simulate the results under an + independence null model." + ) + ), + hr(), + selectInput(inputId = "plot", + label = "Plot Type", + choices = c("Dotplot", "Histogram")), + selectInput(inputId = "presets", + label = "Presets", + selected = "Custom", # selects the initial preset + choices = names(Presets)), + html_table, + actionButton(inputId = "Reset", label = "Reset"), + numericInput(inputId = "numsamp", + label = "Shuffle how many times?", + value = 100, min = 1, max = 5000), + tags$div(class="header", checked = NA, + tags$p("Enter a value from 1 to 5000")), + actionButton("Replicate", "Shuffle") + + ), + tabPanel("Information", + tags$div(class="header", checked = NA, + tags$p("Enter the value of your observed difference of proportions in the text + under the graph. Selecting 'greater/less than' will highlight the + samples that are greater/less than your value."), + tags$p("Selecting 'beyond' will highlight the samples that are further away + from 0 than your value."), + tags$p("Press the Reset button whenever you change the values in the table."), + hr(), + hr(), + tags$p("Written by Scott Manski"), + tags$p("This work is licensed under a "), + tags$a(href="http://creativecommons.org/licenses/by-sa/4.0/", "Creative Commons Attribution-ShareAlike 4.0 International License"), + hr(), + bookmarkButton() + ) + ))), + + mainPanel( + plotOutput("RandomPlot"), + checkboxInput("Show.Observed", "Show observed difference", FALSE), + textOutput("Observed.Diff"), + fluidRow( + column(textOutput("count.samples"), width = 3), + column(selectInput("inequality", NULL, c("greater than", "less than", "beyond")), width = 3), + column(textInput("cutoff", NULL), width = 4), + htmlOutput("counts")) + ) + ) +)} + + + +server <- function(input, output, session) { + # initialize values for use in server + values <- reactiveValues() + values$props <- vector() + values$table <- matrix(rep(NA, 9), ncol=3) + + # colors for plots + values$hist.fill.color <- "grey70" # histogram bar fill color + values$hist.outline.color <- "black" # histogram bar outline color + values$dot.fill.color <- "grey70" # dotplot dot fill color + values$cutoff.color <- "#F05133" # color for cutoff values + + # observe any changes in table + observe({ + values$table[1, 1] <- as.numeric(input$TL) # top left position + values$table[1, 2] <- as.numeric(input$TR) # top right position + values$table[2, 1] <- as.numeric(input$BL) # bottom left position + values$table[2, 2] <- as.numeric(input$BR) # bottom right position + values$table[1, 3] <- as.numeric(input$TL) + as.numeric(input$TR) #top sum + values$table[2, 3] <- as.numeric(input$BL) + as.numeric(input$BR) # bottom sum + values$table[3, 1] <- as.numeric(input$TL) + as.numeric(input$BL) # left sum + values$table[3, 2] <- as.numeric(input$TR) + as.numeric(input$BR) # right sum + values$table[3, 3] <- as.numeric(input$TL) + as.numeric(input$TR) + # total sum + as.numeric(input$BL) + as.numeric(input$BR) + + # calculates the limits for the plots based on the standard deviation + # the standard deviation is calculated based on the Hypergeometric distribution + values$x.lim <- 6*sqrt(values$table[3, 1]*values$table[1, 3]/values$table[3, 3]* + values$table[2, 3]/values$table[3, 3]*values$table[3, 2]/(values$table[3,3]-1)/values$table[1, 3]^2 + + values$table[3, 2]*values$table[2, 3]/values$table[3, 3]* + values$table[1, 3]/values$table[3, 3]*values$table[3, 1]/(values$table[3,3]-1)/values$table[2, 3]^2) + }) + + # output for values of table if there is a change + output$TRT <- renderText({ + values$table[1, 3] + }) + output$TRB <- renderText({ + values$table[2, 3] + }) + output$TBL <- renderText({ + values$table[3, 1] + }) + output$TBR <- renderText({ + values$table[3, 2] + }) + output$Total <- renderText({ + values$table[3, 3] + }) + + + # these will update each time the user clicks the Replicate button + observeEvent(input$Replicate || input$Show.Observed, { + values$observed <- values$table[1 ,1]/values$table[1, 3] - + values$table[2 ,1]/values$table[2, 3] + }) + + + # reset the values if "Reset" is pressed + observeEvent(input$Reset, { + values$props <- vector() + enable("Replicate") + }) + + # checks to see if the current table matches a preset, otherwise the preset is "Custom" + observeEvent(c(input$TL, input$TR, input$BL, input$BR, input$C1N, + input$C2N, input$R1N, input$R2N), { + # combine the current table inputs into a vector + current <- c(input$TL, input$BL, input$TR, input$BR, input$C1N, input$C2N, + input$R1N, input$R2N) + # loops through each preset and determines the number of cells that match the current table + preset <- unlist(lapply(names(Presets), function(i) { + sum(Presets[[i]] == current) + })) + + # if all cells match, change the SelectInput value to that preset, + # otherwise change the value to "Custom" + if (sum(preset == 8) > 0) { + updateSelectInput(session, "presets", selected = names(Presets)[which(preset == 8)]) + } else { + updateSelectInput(session, "presets", selected = "Custom") + } + }, ignoreInit = TRUE) + + + # disable or enable the "Shuffle" button + # the "Shuffle" button in enabled when the number of shuffles is less than + # 5,000 and the total number of shuffles is less than 20,000 + observeEvent(input$numsamp, { + if (is.numeric(input$numsamp)){ + if (input$numsamp > 5000){ + disable("Replicate") + } else if (length(values$props) <= 20000){ + enable("Replicate") + } + } + }) + + # update the values when shuffle is pressed + observeEvent(input$Replicate, { + new.vals <- rhyper(input$numsamp, values$table[1, 3], + values$table[2, 3], values$table[3, 1]) + new.vals <- new.vals/values$table[1, 3] - (values$table[3, 1]-new.vals)/ + values$table[2, 3] + values$props <- c(values$props, new.vals) + + if (length(values$props) >= 20000){ + disable("Replicate") + } else { + enable("Replicate") + } + }, ignoreInit = TRUE) + + # when "Beyond" is selected, this function is used to calculate the probability + # for each possible outcome. + two_sided_values <- function() { + m <- values$table[1, 3] + n <- values$table[2, 3] + k <- values$table[3, 1] + support <- c(max(0, k - n):min(k, m)) + x <- dhyper(support, m, n, k) + names(x) <- support + x + } + + # update the counts for the cutoff if there are any changes + update_counts <- eventReactive(c(input$cutoff, input$Replicate, input$Reset, + input$inequality, input$presets), { + if (!is.na(as.numeric(input$cutoff))){ + # the error is used to handle rounded values of input$cutoff + num.decimals <- decimalcount(as.character(input$cutoff)) + error <- ifelse(num.decimals <= 1, 0, 0.1^num.decimals/2) + + # for "greater than", finds the number and proportion of values greater than + # input$cutoff - error. For "less than", finds the number and proportion of + # values less than input$cutoff + error. For beyond, the number and proportion + # of values is calculated by adding up all points such that the probability of + # obtaining that point is less than or equal to that of input$cutoff see + # https://en.wikipedia.org/wiki/Fisher%27s_exact_test, the second to last paragraph + # in the Example section) + if (input$inequality == "greater than"){ + values$prob <- sum(values$props >= as.numeric(input$cutoff)-error)/ + length(values$props) + values$count <- sum(values$props >= as.numeric(input$cutoff)-error) + } else if (input$inequality == "less than") { + values$prob <- sum(values$props <= as.numeric(input$cutoff)+error)/ + length(values$props) + values$count <- sum(as.numeric(values$props) <= + as.numeric(input$cutoff)+error) + } else { + x <- two_sided_values() + cutoff <- x[which(names(x) == values$table[1, 1])] + vals <- as.numeric(names(x)[which(x <= cutoff)]) + vals <- vals/values$table[1, 3] - (values$table[3, 1]-vals)/ + values$table[2, 3] + + values$prob <- length(which(values$props %in% vals))/ + length(values$props) + + values$count <- length(which(values$props %in% vals)) + } + } + }) + + # creates the desired plot + output$RandomPlot <- renderPlot({ + if (length(values$props) != 0 & !is.na(values$table[3, 1])){ # after reset, values$props is empty + DF <- values$table + + if (input$plot == "Dotplot"){ # plot == TRUE is dotplot, FALSE is histogram + # n is the number of columns for the dotplot + # large datasets will have n <- 1 + if (DF[3, 1] > 1000){ + n <- 1 + } else { + n <- 4 + } + # gets the dotplot locations for the dotplot + df <- dotplot_locs(values$props, n, input$cutoff, values$cutoff.color, + values$dot.fill.color, input$inequality) + df <- df[df$x < values$x.lim & df$x > -values$x.lim, ] + + myplot <- ggplot(df) + + geom_point(aes(x ,y, colour = fill.color), + size=min(n, 50/length(values$props)^0.5)) + + scale_colour_manual(name = "fill.color", + values = levels(df$fill.color)) + + scale_y_continuous(limits = c(0, max(n*7.5,max(df$y)))) + + scale_x_continuous(limits = c(-values$x.lim, values$x.lim)) + + labs(x = "Shuffled Difference in Proportions", y = "Count") + + plaintheme + axistheme + } else { + df <- data.frame("x" = values$props[values$props < values$x.lim & + values$props > -values$x.lim]) + + unique.vals <- sort(unique(as.numeric(as.character(df$x)))) + # a histogram is created to determine the bars that need to be colored + myplot <- ggplot(df, aes(x=x)) + geom_histogram(binwidth = max(diff(unique.vals))) + + scale_x_continuous(limits = c(-values$x.lim, values$x.lim)) + names.counts <- ggplot_build(myplot)$data[[1]]$x + # color is determined if input$cutoff is specified + if (!is.na(as.numeric(input$cutoff))){ + num.decimals <- decimalcount(as.character(input$cutoff)) + error <- ifelse(num.decimals <= 2, 0, 0.1^num.decimals/2) + if (input$inequality == "greater than"){ + to.color <- which(names.counts >= as.numeric(input$cutoff)-error) + } else if (input$inequality == "less than"){ + to.color <- which(names.counts <= as.numeric(input$cutoff)+error) + } else{ + to.color <- c(which(names.counts <= -1*abs(as.numeric(input$cutoff))+error), + which(names.counts >= abs(as.numeric(input$cutoff))-error)) + } + } else { + to.color <- NA + } + fill.color <- rep(values$hist.fill.color, length(names.counts)) + fill.color[to.color] <- values$cutoff.color + # the histogram is plotted + myplot <- ggplot(df, aes(x=x)) + + geom_histogram(binwidth = max(diff(unique.vals)), fill = fill.color, + col = values$hist.outline.color) + + labs(x = "Shuffled Difference in Proportions", y = "Count") + + scale_x_continuous(limits = c(-values$x.lim, values$x.lim)) + + plaintheme + axistheme + } + myplot + } + }) + + # calculate the observed difference when checkbox is TRUE + output$Observed.Diff <- renderText({ + if (input$Show.Observed){ + DF <- data() + values$observed = values$table[1, 1]/values$table[1, 3] - values$table[2, 1]/values$table[2, 3] + paste("Observed Difference:", round(values$observed, 6)) + } + }) + + # text for sample counts + output$count.samples <- renderText({ + "Count Samples" + }) + + # output for counts when cutoff is specified + output$counts <- renderText({ + update_counts() + if (!is.null(values$prob)){ + if (is.na(values$prob)){ + " " + } else if (!is.na(as.numeric(input$cutoff))){ + paste("<font color=", values$cutoff.color, "><b>",values$count, "/", + length(values$props), " (", round(values$prob, 4), ")", + "</b></font>", sep = "") + } else if (nchar(input$cutoff)!=0){ + "<font color=\"#FF0000\"><b>Invalid Cutoff!</b></font>" + } else { + " " + } + } + }) + + # changes table if a different preset is selected + observeEvent(input$presets, { + values$props <- vector() + enable("Replicate") + + if (input$presets != "Custom") { + preset_index <- which(names(Presets) == input$presets) + preset <- Presets[[preset_index]] + DF <- data.frame("X1" = as.numeric(preset[1:2]), + "X2" = as.numeric(preset[3:4])) + DF[3, ] <- apply(DF, 2, sum) + DF[, 3] <- apply(DF, 1, sum) + values$table <- DF + values$table.names <- preset[5:8] + + updateTextInput(session, "TL", value = values$table[1,1]) + updateTextInput(session, "TR", value = values$table[1,2]) + updateTextInput(session, "BL", value = values$table[2,1]) + updateTextInput(session, "BR", value = values$table[2,2]) + updateTextInput(session, "C1N", value = values$table.names[1]) + updateTextInput(session, "C2N", value = values$table.names[2]) + updateTextInput(session, "R1N", value = values$table.names[3]) + updateTextInput(session, "R2N", value = values$table.names[4]) + } + }) + + + + ## Bookmarking ## + # to remove bookmarking, remove bookmarkButton() from the ui + + # when the bookmark button is pressed, the current values of props and table are saved + onBookmark(function(state) { + state$values$props <- values$props + state$values$table <- values$table + }) + + # when opening a bookmarked page, props is restored + onRestored(function(state) { + values$props <- state$values$props + }) + + # when opening a bookmarked page, table is restored + onRestore(function(state) { + values$table <- state$values$table + }) + + +} + +shinyApp(ui = ui, server = server, options = list(height = 1080), enableBookmarking = "server") \ No newline at end of file diff --git a/TwoProportionResamplingTest/TwoProportionSource.R b/TwoProportionResamplingTest/TwoProportionSource.R new file mode 100644 index 0000000000000000000000000000000000000000..3d92c5357c90afc3b21b78874c2830afbc9e31a1 --- /dev/null +++ b/TwoProportionResamplingTest/TwoProportionSource.R @@ -0,0 +1,182 @@ +# HTML code for editable table +html_table <- HTML("<script type='text/javascript'> + /*<![CDATA[*/ + function Expand1(){ + if (!R1N.savesize) R1N.savesize=R1N.size; + if (!R2N.savesize) R2N.savesize=R2N.size; + var isChrome = !!window.chrome && (!!window.chrome.webstore || !!window.chrome.runtime); + var isSafari = /constructor/i.test(window.HTMLElement) || (function (p) { return p.toString() === '[object SafariRemoteNotification]'; })(!window['safari'] || (typeof safari !== 'undefined' && safari.pushNotification)); + var isIE = /*@cc_on!@*/false || !!document.documentMode; + offset = 0; + if (isSafari) offset = 3; + R1N.size=Math.max(R1N.savesize,R1N.value.length,R2N.value.length)-offset; + R2N.size=Math.max(R2N.savesize,R1N.value.length,R2N.value.length)-offset; + } + function Expand2(){ + if (!C1N.savesize) C1N.savesize=C1N.size; + if (!TL.savesize) TL.savesize=TL.size; + if (!BL.savesize) BL.savesize=BL.size; + var isChrome = !!window.chrome && (!!window.chrome.webstore || !!window.chrome.runtime); + var isSafari = /constructor/i.test(window.HTMLElement) || (function (p) { return p.toString() === '[object SafariRemoteNotification]'; })(!window['safari'] || (typeof safari !== 'undefined' && safari.pushNotification)); + var isIE = /*@cc_on!@*/false || !!document.documentMode; + offset = 0; + if (isSafari) offset = 3; + C1N.size=Math.max(C1N.savesize,C1N.value.length,TL.value.length,BL.value.length)-offset; + TL.size=Math.max(TL.savesize,C1N.value.length,TL.value.length,BL.value.length)-offset; + BL.size=Math.max(BL.savesize,C1N.value.length,TL.value.length,BL.value.length)-offset; + } + function Expand3(){ + if (!C2N.savesize) C2N.savesize=C2N.size; + if (!TR.savesize) TR.savesize=TR.size; + if (!BR.savesize) BR.savesize=BR.size; + var isChrome = !!window.chrome && (!!window.chrome.webstore || !!window.chrome.runtime); + var isSafari = /constructor/i.test(window.HTMLElement) || (function (p) { return p.toString() === '[object SafariRemoteNotification]'; })(!window['safari'] || (typeof safari !== 'undefined' && safari.pushNotification)); + var isIE = /*@cc_on!@*/false || !!document.documentMode; + offset = 0; + if (isSafari) offset = 3; + C2N.size=Math.max(C2N.savesize,C2N.value.length,TR.value.length,BR.value.length)-offset; + TR.size=Math.max(TR.savesize,C2N.value.length,TR.value.length,BR.value.length)-offset; + BR.size=Math.max(BR.savesize,C2N.value.length,TR.value.length,BR.value.length)-offset; + } + /*]]>*/ + </script><style> + table{ + border-color: #f3f7fb; + display: block; + overflow-x: auto; + } + + th, td { + border: 1px solid black; + border-collapse: collapse; + padding: 6px; + } + input { + border: 0; + width: auto; + padding: 1px 8px; + background-color: #f5f5f5; + font-size: 1em; + } + + + </style> + <table id = 'mytable'> + <tbody> + <tr> + <td></td> + <td><input size='1' id='C1N'type = 'text' onchange='Expand2();' oninput='Expand2();' value='Column A'></td> + <td><input size='1' id='C2N'type = 'text' onchange='Expand3();' oninput='Expand3();' value='Column B'></td> + <td><div style='padding: 1px 8px'>Total</div></td> + </tr> + <tr> + <td><input size='1' id='R1N'type = 'text' onchange='Expand1();' oninput='Expand1();' value='Row 1'></td> + <td><input size='1' id='TL' type='text' onchange='Expand2();' oninput='Expand2();' value='0'></td> + <td><input size='1' id='TR' type = 'text' onchange='Expand3();' oninput='Expand3();' value='0'></td> + <td><div size='1' style='padding: 1px 8px' id='TRT' class='shiny-text-output'></div></td> + </tr> + <tr> + <td><input size='1' id='R2N' type = 'text' onchange='Expand1();' oninput='Expand1();' value='Row 2'></td> + <td><input size='1' id='BL' type = 'text' onchange='Expand2();' oninput='Expand2();' value='0'></td> + <td><input size='1' id='BR' type = 'text' onchange='Expand3();' oninput='Expand3();' value='0'></td> + <td><div size='1' style='padding: 1px 8px' id='TRB' class='shiny-text-output'></div></td> + </tr> + <tr> + <td><div style='padding: 1px 8px'>Total</div></td> + <td><div size='1' style='padding: 1px 8px' id='TBL' class='shiny-text-output'></div></td> + <td><div size='1' style='padding: 1px 8px' id='TBR' class='shiny-text-output'></div></td> + <td><div size='1' style='padding: 1px 8px' id='Total' class='shiny-text-output'></div></td> + </tr> + </tbody> + </table>") + +# determines the number of decimal places of a number +decimalcount <- function(x){ + stopifnot(class(x) == "character") + x<-gsub("(.*)(\\.)|([0]*$)", "", x) + as.numeric(nchar(x)) +} + +# create dotplot locations from data x +dotplot_locs <- function(x, n, cutoff, cutoff.color, dot.fill.color, inequality){ + counts <- table(x) + x.locs <- as.numeric(names(counts)) + + # find minimum difference between points, with an exeption for a single point + if (length(names(counts)) == 1){ + point_dist <- min(diff(c(0, as.numeric(names(counts)))))/(n+2) + } else { + point_dist <- min(diff(as.numeric(names(counts))))/(n+2) + } + + # define the standard x coordinates to be used + x.coord <- sapply(x.locs, function(x) x + ((1:n)-(n+1)/2)*point_dist) + + x.coords <- vector() + y.coords <- vector() + to.color <- vector() + names.counts <- as.numeric(names(counts)) + # loop through each count, defining new x and y coordinates for "dotplot" + for (i in 1:length(counts)){ + if (n == 1){ + x.coords <- c(x.coords, rep(x.coord[i], counts[i]/n)) + } else { + x.coords <- c(x.coords, rep(x.coord[, i], counts[i]/n), + x.coord[0:(counts[i] %% n), i]) + } + + if (counts[i] > n){ + y.coords <- c(y.coords, sort(rep(1:(counts[i]/n), n)), + rep(ceiling(counts[i]/n), counts[i] %% n)) + } else { + y.coords <- c(y.coords, sort(rep(1:(counts[i]/n), counts[i]))) + } + # defines color of dots when cutoff defined + if(!is.na(as.numeric(cutoff))){ + num.decimals <- decimalcount(as.character(cutoff)) + # error term for rounded cutoff values + error <- ifelse(num.decimals <= 2, 0, 0.1^num.decimals/2) + if (inequality == "greater than"){ + if (names.counts[i] >= as.numeric(cutoff)-error){ + to.color <- c(to.color, rep(cutoff.color, counts[i])) + } else { + to.color <- c(to.color, rep(dot.fill.color, counts[i])) + } + } else if (inequality == "less than") { + if (names.counts[i] <= as.numeric(cutoff)+error){ + to.color <- c(to.color, rep(cutoff.color, counts[i])) + } else { + to.color <- c(to.color, rep(dot.fill.color, counts[i])) + } + } else { + if ((names.counts[i] <= -1*abs(as.numeric(cutoff))+error) | + (names.counts[i] >= abs(as.numeric(cutoff))-error)){ + to.color <- c(to.color, rep(cutoff.color, counts[i])) + } else { + to.color <- c(to.color, rep(dot.fill.color, counts[i])) + } + } + } else { + to.color <- c(to.color, rep(dot.fill.color, counts[i])) + } + } + return(data.frame("x" = x.coords, "y" = y.coords*n, + "fill.color" = to.color)) +} + + +# theme for plots +plaintheme <- theme_bw() + + theme(plot.background = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank() ) + + theme(axis.line.x = element_line(color="black", size = 1), + axis.line.y = element_line(color="black", size = 1)) + + theme(legend.position = "none", plot.margin = margin(10, 10, 10, 10)) + +# axis theme for plots +axistheme <- theme(plot.title = element_text(hjust = 0.5, color = "black", + face = "bold", size=20)) + + theme(axis.title = element_text(color = "black", size = 16)) + + theme(axis.text.x = element_text(size = 14, color = "black")) + + theme(axis.text.y = element_text(size = 14, color = "black"))