From b18aeb0e8cd58fdba8d14669b5fc18a1135cec3d Mon Sep 17 00:00:00 2001 From: Camille Fairbourn <fairbour@msu.edu> Date: Mon, 25 Mar 2019 19:19:19 -0400 Subject: [PATCH] Current version of 2 prop app --- TwoProportionResamplingTest.R | 433 ++++++++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 TwoProportionResamplingTest.R diff --git a/TwoProportionResamplingTest.R b/TwoProportionResamplingTest.R new file mode 100644 index 0000000..ae8c826 --- /dev/null +++ b/TwoProportionResamplingTest.R @@ -0,0 +1,433 @@ +# ------------------------------------------------------------------------------ +# File: TwoProportionResamplingTest.R +# Authors: Camille Fairbourn, Scott Manski +# Date: 01/21/2019 +# Desc: +# Published Location: +# Email: fairbour@msu.edu, manskisc@msu.edu +# +# For questions or concerns, please email the authors. This work is licensed +# under a Creative Commons Attribution-ShareAlike 4.0 International License +# (https://creativecommons.org/licenses/by-sa/4.0/). +# ------------------------------------------------------------------------------ + + +# loading packages +library(shiny) +library(ggplot2) +library(dplyr) +library(BHH2) +library(gridExtra) +library(shinyjs) + +enableBookmarking(store = "server") + +# Sources objects, functions, etc, from TwoProportionSource.R +# This file contains the html code for the editable table, +# the decimalcount function, and the dotplot_locs function. +source("TwoProportionSource.R") + +# defines the presets +Presets <- list() +# Presets`preset name` <- c(Top Left, Bottom Left, Top Right, Bottom Right, +# Column A Name, Column B name, Row A name, Row B name) +Presets$`Duct Tape Therapy` <- c(22, 15, 4, 10, "Wart Gone", "Wart Remains", + "Duct Tape", "Cryotherapy") +Presets$`Gender Discrimination` <- c(21, 14, 3, 10, "Promotion", "No Promotion", + "Male", "Female") +Presets$`Opportunity Cost` <- c(56, 41, 19, 34, "buy DVD", "not buy DVD", + "control", "treatment") +Presets$`Avandia` <- c(2593, 5386, 65000, 154592, "Yes", "No", "Rosiglitazone", + "Pioglitazone") +Presets$`Custom` <- c("", "", "", "", "Column A", "Column B", "Row 1", "Row 2") + + +ui <- function(request) { + fluidPage(useShinyjs(), + titlePanel("Two Proportion Resampling Test"), + sidebarLayout( + sidebarPanel( + tabsetPanel( + tabPanel("Test", + tags$div(class="header", checked = NA, + tags$p("Enter your data into the table + below, or choose one of the data + presets. Press the Shuffle button + to simulate the results under an + independence null model." + ) + ), + hr(), + selectInput(inputId = "plot", + label = "Plot Type", + choices = c("Dotplot", "Histogram")), + selectInput(inputId = "presets", + label = "Presets", + selected = names(Presets)[1], + choices = names(Presets)), + html_table, + actionButton(inputId = "Reset", label = "Reset"), + numericInput(inputId = "numsamp", + label = "Shuffle how many times?", + value = 100, min = 1, max = 5000), + tags$div(class="header", checked = NA, + tags$p("Enter a value from 1 to 5000")), + actionButton("Replicate", "Shuffle") + + ), + tabPanel("Information", + tags$div(class="header", checked = NA, + tags$p("Enter the value of your observed difference of proportions in the text + under the graph. Selecting 'greater/less than' will highlight the + samples that are greater/less than your value."), + tags$p("Selecting 'beyond' will highlight the samples that are further away + from 0 than your value."), + tags$p("Press the Reset button whenever you change the values in the table."), + hr(), + hr(), + tags$p("Written by Scott Manski"), + tags$p("This work is licensed under a "), + tags$a(href="http://creativecommons.org/licenses/by-sa/4.0/", "Creative Commons Attribution-ShareAlike 4.0 International License"), + hr(), + bookmarkButton() + ) + ))), + + mainPanel( + plotOutput("RandomPlot1"), + checkboxInput("Show.Observed", "Show observed difference", FALSE), + textOutput("Observed.Diff"), + fluidRow( + column(textOutput("count.samples"), width = 3), + column(selectInput("inequality", NULL, c("greater than", "less than", "beyond")), width = 3), + column(textInput("cutoff", NULL), width = 4), + htmlOutput("counts")) + ) + ) +)} + + + +server <- function(input, output, session) { + # initialize values for use in server + values <- reactiveValues() + values$props <- vector() + values$table <- matrix(rep(NA, 9), ncol=3) + + # colors for plots + values$hist.fill.color <- "grey70" # histogram bar fill color + values$hist.outline.color <- "black" # histogram bar outline color + values$dot.fill.color <- "grey70" # dotplot dot fill color + values$cutoff.color <- "#F05133" # color for cutoff values + + # theme for plots + plaintheme <- theme_bw() + + theme(plot.background = element_blank(), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank() ) + + theme(axis.line.x = element_line(color="black", size = 1), + axis.line.y = element_line(color="black", size = 1)) + + theme(legend.position = "none", plot.margin = margin(10, 10, 10, 10)) + + # axis theme for plots + axistheme <- theme(plot.title = element_text(hjust = 0.5, color = "black", + face = "bold", size=20)) + + theme(axis.title = element_text(color = "black", size = 16)) + + theme(axis.text.x = element_text(size = 14, color = "black")) + + theme(axis.text.y = element_text(size = 14, color = "black")) + + # observe any changes in table + observe({ + values$table[1, 1] <- as.numeric(input$TL) # top left position + values$table[1, 2] <- as.numeric(input$TR) # top right position + values$table[2, 1] <- as.numeric(input$BL) # bottom left position + values$table[2, 2] <- as.numeric(input$BR) # bottom right position + values$table[1, 3] <- as.numeric(input$TL) + as.numeric(input$TR) #top sum + values$table[2, 3] <- as.numeric(input$BL) + as.numeric(input$BR) # bottom sum + values$table[3, 1] <- as.numeric(input$TL) + as.numeric(input$BL) # left sum + values$table[3, 2] <- as.numeric(input$TR) + as.numeric(input$BR) # right sum + values$table[3, 3] <- as.numeric(input$TL) + as.numeric(input$TR) + # total sum + as.numeric(input$BL) + as.numeric(input$BR) + + # calculates the limits for the plots based on the standard deviation + # the standard deviation is calculated based on the Hypergeometric distribution + values$x.lim <- 6*sqrt(values$table[3, 1]*values$table[1, 3]/values$table[3, 3]* + values$table[2, 3]/values$table[3, 3]*values$table[3, 2]/(values$table[3,3]-1)/values$table[1, 3]^2 + + values$table[3, 2]*values$table[2, 3]/values$table[3, 3]* + values$table[1, 3]/values$table[3, 3]*values$table[3, 1]/(values$table[3,3]-1)/values$table[2, 3]^2) + }) + + # output for values of table if there is a change + output$TRT <- renderText({ + values$table[1, 3] + }) + output$TRB <- renderText({ + values$table[2, 3] + }) + output$TBL <- renderText({ + values$table[3, 1] + }) + output$TBR <- renderText({ + values$table[3, 2] + }) + output$Total <- renderText({ + values$table[3, 3] + }) + + + + + # these will update each time the user clicks the Replicate button + observeEvent(input$Replicate || input$Show.Observed, { + values$observed <- values$table[1 ,1]/values$table[1, 3] - + values$table[2 ,1]/values$table[2, 3] + }) + + + # reset the values if "Reset" is pressed + observeEvent(input$Reset, { + values$props <- vector() + enable("Replicate") + }) + + # checks to see if the current table matches a preset, otherwise the preset is "Custom" + observeEvent(c(input$TL, input$TR, input$BL, input$BR, input$C1N, + input$C2N, input$R1N, input$R2N), { + # combine the current table inputs into a vector + current <- c(input$TL, input$BL, input$TR, input$BR, input$C1N, input$C2N, + input$R1N, input$R2N) + # loops through each preset and determines the number of cells that match the current table + preset <- unlist(lapply(names(Presets), function(i) { + sum(Presets[[i]] == current) + })) + + # if all cells match, change the SelectInput value to that preset, + # otherwise change the value to "Custom" + if (sum(preset == 8) > 0) { + updateSelectInput(session, "presets", selected = names(Presets)[which(preset == 8)]) + } else { + updateSelectInput(session, "presets", selected = "Custom") + } + }, ignoreInit = TRUE) + + + # disable or enable the "Shuffle" button + # the "Shuffle" button in enabled when the number of shuffles is less than + # 5,000 and the total number of shuffles is less than 20,000 + observeEvent(input$numsamp, { + if (is.numeric(input$numsamp)){ + if (input$numsamp > 5000){ + disable("Replicate") + } else if (length(values$props) <= 20000){ + enable("Replicate") + } + } + }) + + # update the values when shuffle is pressed + observeEvent(input$Replicate, { + new.vals <- rhyper(input$numsamp, values$table[1, 3], + values$table[2, 3], values$table[3, 1]) + new.vals <- new.vals/values$table[1, 3] - (values$table[3, 1]-new.vals)/ + values$table[2, 3] + values$props <- c(values$props, new.vals) + + if (length(values$props) >= 20000){ + disable("Replicate") + } else { + enable("Replicate") + } + }, ignoreInit = TRUE) + + # when "Beyond" is selected, this function is used to calculate the probability + # for each possible outcome. + two_sided_values <- function() { + m <- values$table[1, 3] + n <- values$table[2, 3] + k <- values$table[3, 1] + support <- c(max(0, k - n):min(k, m)) + x <- dhyper(support, m, n, k) + names(x) <- support + x + } + + # update the counts for the cutoff if there are any changes + update_counts <- eventReactive(c(input$cutoff, input$Replicate, input$Reset, + input$inequality, input$presets), { + if (!is.na(as.numeric(input$cutoff))){ + # the error is used to handle rounded values of input$cutoff + num.decimals <- decimalcount(as.character(input$cutoff)) + error <- ifelse(num.decimals <= 1, 0, 0.1^num.decimals/2) + + # for "greater than", finds the number and proportion of values greater than + # input$cutoff - error. For "less than", finds the number and proportion of + # values less than input$cutoff + error. For beyond, the number and proportion + # of values is calculated by adding up all points such that the probability of + # obtaining that point is less than or equal to that of input$cutoff see + # https://en.wikipedia.org/wiki/Fisher%27s_exact_test, the second to last paragraph + # in the Example section) + if (input$inequality == "greater than"){ + values$prob <- sum(values$props >= as.numeric(input$cutoff)-error)/ + length(values$props) + values$count <- sum(values$props >= as.numeric(input$cutoff)-error) + } else if (input$inequality == "less than") { + values$prob <- sum(values$props <= as.numeric(input$cutoff)+error)/ + length(values$props) + values$count <- sum(as.numeric(values$props) <= + as.numeric(input$cutoff)+error) + } else { + x <- two_sided_values() + cutoff <- x[which(names(x) == values$table[1, 1])] + vals <- as.numeric(names(x)[which(x <= cutoff)]) + vals <- vals/values$table[1, 3] - (values$table[3, 1]-vals)/ + values$table[2, 3] + + values$prob <- length(which(values$props %in% vals))/ + length(values$props) + + values$count <- length(which(values$props %in% vals)) + } + } + }) + + # create the desifill.color plot + output$RandomPlot1 <- renderPlot({ + #update_vals() + if (length(values$props) != 0 & !is.na(values$table[3, 1])){ # after reset, values$props is empty + DF <- values$table + + if (input$plot == "Dotplot"){ # plot == TRUE is dotplot, FALSE is histogram + # n is the number of columns for the dotplot + # large datasets will have n <- 1 + if (DF[3, 1] > 1000){ + n <- 1 + } else { + n <- 4 + } + df <- dotplot_locs(values$props, n, input$cutoff, values$cutoff.color, + values$dot.fill.color, input$inequality) + df <- df[df$x < values$x.lim & df$x > -values$x.lim, ] + + myplot <- ggplot(df) + + geom_point(aes(x ,y, colour = fill.color), + size=min(n, 50/length(values$props)^0.5)) + + scale_colour_manual(name = "fill.color", + values = levels(df$fill.color)) + + scale_y_continuous(limits = c(0, max(n*7.5,max(df$y)))) + + scale_x_continuous(limits = c(-values$x.lim, values$x.lim)) + + labs(x = "Shuffled Difference in Proportions", y = "Count") + + plaintheme + axistheme + } else { + df <- data.frame("x" = values$props[values$props < values$x.lim & + values$props > -values$x.lim]) + + unique.vals <- sort(unique(as.numeric(as.character(df$x)))) + myplot <- ggplot(df, aes(x=x)) + geom_histogram(binwidth = max(diff(unique.vals))) + + scale_x_continuous(limits = c(-values$x.lim, values$x.lim)) + names.counts <- ggplot_build(myplot)$data[[1]]$x + if (!is.na(as.numeric(input$cutoff))){ + num.decimals <- decimalcount(as.character(input$cutoff)) + error <- ifelse(num.decimals <= 2, 0, 0.1^num.decimals/2) + if (input$inequality == "greater than"){ + to.color <- which(names.counts >= as.numeric(input$cutoff)-error) + } else if (input$inequality == "less than"){ + to.color <- which(names.counts <= as.numeric(input$cutoff)+error) + } else{ + to.color <- c(which(names.counts <= -1*abs(as.numeric(input$cutoff))+error), + which(names.counts >= abs(as.numeric(input$cutoff))-error)) + } + } else { + to.color <- NA + } + fill.color <- rep(values$hist.fill.color, length(names.counts)) + fill.color[to.color] <- values$cutoff.color + myplot <- ggplot(df, aes(x=x)) + + geom_histogram(binwidth = max(diff(unique.vals)), fill = fill.color, + col = values$hist.outline.color) + + labs(x = "Shuffled Difference in Proportions", y = "Count") + + scale_x_continuous(limits = c(-values$x.lim, values$x.lim)) + + plaintheme + axistheme + } + myplot + } + }) + + # calculate the observed difference when checkbox is TRUE + output$Observed.Diff <- renderText({ + if (input$Show.Observed){ + DF <- data() + values$observed = values$table[1, 1]/values$table[1, 3] - values$table[2, 1]/values$table[2, 3] + paste("Observed Difference:", round(values$observed, 6)) + } + }) + + # text for sample counts + output$count.samples <- renderText({ + "Count Samples" + }) + + # counts when cutoff is specified + output$counts <- renderText({ + update_counts() + if (!is.null(values$prob)){ + if (is.na(values$prob)){ + " " + } else if (!is.na(as.numeric(input$cutoff))){ + paste("<font color=", values$cutoff.color, "><b>",values$count, "/", + length(values$props), " (", round(values$prob, 4), ")", + "</b></font>", sep = "") + } else if (nchar(input$cutoff)!=0){ + "<font color=\"#FF0000\"><b>Invalid Cutoff!</b></font>" + } else { + " " + } + } + }) + + # presets for the contingency table + observeEvent(input$presets, { + values$props <- vector() + enable("Replicate") + + if (input$presets != "Custom") { + preset_index <- which(names(Presets) == input$presets) + preset <- Presets[[preset_index]] + DF <- data.frame("X1" = as.numeric(preset[1:2]), + "X2" = as.numeric(preset[3:4])) + DF[3, ] <- apply(DF, 2, sum) + DF[, 3] <- apply(DF, 1, sum) + values$table <- DF + values$table.names <- preset[5:8] + + updateTextInput(session, "TL", value = values$table[1,1]) + updateTextInput(session, "TR", value = values$table[1,2]) + updateTextInput(session, "BL", value = values$table[2,1]) + updateTextInput(session, "BR", value = values$table[2,2]) + updateTextInput(session, "C1N", value = values$table.names[1]) + updateTextInput(session, "C2N", value = values$table.names[2]) + updateTextInput(session, "R1N", value = values$table.names[3]) + updateTextInput(session, "R2N", value = values$table.names[4]) + } + }) + + # when the bookmark button is pressed, the current values of props and table are saved + onBookmark(function(state) { + state$values$props <- values$props + state$values$table <- values$table + }) + + # when opening a bookmarked page, props is restored + onRestored(function(state) { + values$props <- state$values$props + }) + + # when opening a bookmarked page, table is restored + onRestore(function(state) { + values$table <- state$values$table + }) + + + +} + +shinyApp(ui = ui, server = server, options = list(height = 1080), enableBookmarking = "server") \ No newline at end of file -- GitLab