current version (04/22/2019)

8d677d58 · Manski, Scott · 953259ab · 8d677d58 · 8d677d58
Commit 8d677d58 authored 5 years ago by Manski, Scott
--- a/OneProportionResamplingTest/OneProportionResamplingTest.R
+++ b/OneProportionResamplingTest/OneProportionResamplingTest.R
+# ------------------------------------------------------------------------------
+# File: OneProportionTest.R
+# Authors: Camille Fairbourn, Scott Manski
+# Date: 04/22/2019 
+# Desc: 
+# Published Location: 
+# Email: fairbour@msu.edu, manskisc@msu.edu
+#
+# For questions or concerns, please email the authors. This work is licensed 
+# under a Creative Commons Attribution-ShareAlike 4.0 International License 
+# (https://creativecommons.org/licenses/by-sa/4.0/).
+# ------------------------------------------------------------------------------
+
+
+# loading packages
+library(shiny)
+library(ggplot2)
+library(dplyr)
+library(BHH2)
+library(gridExtra)
+library(shinyjs)
+
+# Sources objects, functions, etc, from OneProportionSource.R
+# This file contains the html code for the editable table,
+# the decimalcount function, the dotplot_locs function, and
+# custom ggplot2 themes.
+source("OneProportionSource.R")
+
+# defines the presets
+Presets <- list()
+# Presets`preset name` <- c(Probability of Success, Sample Size)
+Presets$`Coin Flipping` <- c(0.5, 10)
+Presets$`Medical Consultant` <- c(0.048, 62)
+Presets$Custom <- c("", "")
+
+# colors for plots
+hist.fill.color <- "grey70" # histogram bar fill color
+hist.outline.color <- "black" # histogram bar outline color
+dot.fill.color <- "grey70" # dotplot dot fill color
+cutoff.color <- "#F05133" # color for cutoff values
+
+
+
+ui <- fluidPage(useShinyjs(),
+                titlePanel("One Proportion Resampling Test"),
+                sidebarLayout(
+                  sidebarPanel(
+                    tabsetPanel(
+                      tabPanel("Shuffle",
+                               tags$div(class="header", checked = NA,
+                                        tags$p(" ")
+                               ),
+                               hr(),
+                               selectInput("plot", "Plot Type", c("Dotplot", "Histogram")),
+                               selectInput("prop.counts", "Proportion or Counts", c("Proportion",
+                                                                                    "Counts")),
+                               selectInput("presets", "Presets", choices = names(Presets), selected = "Custom"),
+                               numericInput("probability", "Probability of Success", value = 0.5, min = 0, max = 1),
+                               numericInput("sampsize", "Sample Size", value = 10, min = 1),
+                               actionButton("Reset", "Reset"),
+                               numericInput("numsamp", "Number of Samples", value = 100, min = 1),
+                               tags$div(class="header", checked = NA,
+                                        tags$p("Enter a value from 1 to 5000")
+                               ),
+                               actionButton("Replicate", "Draw Samples")
+                               ),
+                      tabPanel("Instructions", 
+                                tags$div(class="header", checked = NA,
+                                        tags$p("Enter the value of the population (or model) proportion
+                                               in the 'Probability of Success' field."),
+                                        tags$p("Choose your sample size and the number of samples you
+                                               wish the app to generate. Then press the 'Draw Samples'
+                                               button."),
+                                        tags$p("Enter the value of your observed proportion in the text
+                                               under the graph. Selecting 'greater/less than' will highlight the
+                                               samples that are greater/less than your value."),
+                                        tags$p("Selecting 'beyond' will highlight the samples that are further away
+                                               from the 'Probability of Success' than your value."),
+                                        tags$p("Press the Reset button whenever you change the Probability of Success
+                                               or the Sample Size."),
+                                        hr(),
+                                        hr(),
+                                        tags$p("Written by Scott Manski"),
+                                        tags$p("This work is licensed under a "),
+                                        tags$a(href="http://creativecommons.org/licenses/by-sa/4.0/",
+                                               "Creative Commons Attribution-ShareAlike 4.0 International License")
+                                        
+                                        
+                      
+                      )
+                      ))),
+                  
+                  mainPanel(              
+                    plotOutput("SamplingDistribution"),
+                    checkboxInput("show.curve", "Overlay Normal Curve", FALSE),
+                    checkboxInput("show.summary", "Show summary statistics", FALSE),
+                    textOutput("summary"),
+                    fluidRow(
+                      column(textOutput("count.samples"), width = 3),
+                      column(selectInput("inequality", NULL, c("greater than", "less than", "beyond")), width = 3),
+                      column(textInput("cutoff", NULL), width = 4),
+                      textOutput("counts"))
+                  )
+                  )
+                )
+
+
+server <- function(input, output, session) {
+  # initialize values for use in server
+  values <- reactiveValues()
+  values$props <- vector()
+
+  # disable or enable the "Shuffle" button
+  # the "Shuffle" button in enabled when the number of shuffles is less than
+  # 5,000 and the total number of shuffles is less than 20,000
+  # sample size and probability must also be valid (n > 0 and 0 <= p <= 1)
+  observeEvent(c(input$Reset, input$numsamp, input$probability, input$sampsize), {
+    if (is.numeric(input$numsamp) & is.numeric(input$probability) & is.numeric(input$sampsize)){
+      if (input$numsamp > 5000 || input$numsamp < 1 || !is.integer(input$numsamp)){
+        disable("Replicate")
+      } else if (input$probability < 0 | input$probability > 1){
+        disable("Replicate")
+      } else if (input$sampsize <= 0) {
+        disable("Replicate")
+      } else if (length(values$props) <= 20000) {
+        enable("Replicate")
+      }
+    } else {
+      disable("Replicate")
+    }
+  })
+
+  # reset the values if "Reset" is pressed or if n or p are changed
+  observeEvent(c(input$Reset, input$presets, input$probability, input$sampsize), {
+    values$props <- vector()
+    if (input$prop.counts == "Proportion"){
+      values$mean <- input$probability
+    } else {
+      values$mean <- input$sampsize*input$probability
+    }
+  })
+  
+  # appropriately change values if Proportions or Counts is selected
+  observeEvent(input$prop.counts, {
+    if (input$prop.counts == "Proportion"){
+      values$props <- values$props/input$sampsize
+      values$mean <- input$probability
+    } else {
+      values$props <- values$props*input$sampsize
+      values$mean <- input$sampsize*input$probability
+    }
+    if (!is.na(as.numeric(input$cutoff))){
+      if (input$prop.counts == "Proportion"){
+        updateTextInput(session, "cutoff", value = as.numeric(input$cutoff)/input$sampsize)
+      } else {
+        updateTextInput(session, "cutoff", value = as.numeric(input$cutoff)*input$sampsize)
+      }
+    }
+  })
+  
+  # update the values when shuffle is pressed
+  update_vals <- eventReactive(input$Replicate, {
+    if (input$prop.counts == "Proportion"){
+      new.vals <- rbinom(input$numsamp, input$sampsize, input$probability)/input$sampsize
+    } else {
+      new.vals <- rbinom(input$numsamp, input$sampsize, input$probability)
+    }
+    values$props <- c(values$props, new.vals)
+    
+    if (length(values$props) >= 20000){
+      disable("Replicate")
+    } else {
+      enable("Replicate")
+    }
+  })
+  
+  # update the counts for the cutoff if there are any changes
+  update_counts <- eventReactive(c(input$cutoff, input$Replicate, input$Reset, input$inequality, input$presets), {
+    if (!is.na(as.numeric(input$cutoff))){
+      # the error is used to handle rounded values of input$cutoff
+      num.decimals <- decimalcount(as.character(input$cutoff))
+      error <- ifelse(num.decimals <= 2, 0, 0.1^num.decimals/2)
+      
+      # for "greater than", finds the number and proportion of values greater than
+      # input$cutoff - error. For "less than", finds the number and proportion of 
+      # values less than input$cutoff + error.  For "beyond", finds the number and
+      # proportion of values less than (mean - diff) + error and values greater than
+      # (mean + diff) - error, where diff is |input$cutoff - mean|
+      if (input$inequality == "greater than"){
+        values$prob <- sum(values$props >= as.numeric(input$cutoff)-error)/length(values$props)
+        values$count <- sum(values$props >= as.numeric(input$cutoff)-error)
+      } else if (input$inequality == "less than") {
+        values$prob <- sum(values$props <= as.numeric(input$cutoff)+error)/length(values$props)
+        values$count <- sum(values$props <= as.numeric(input$cutoff)+error)
+      } else {
+        values$prob <- (sum(values$props <= (values$mean-abs(values$mean-as.numeric(input$cutoff)))+error)+
+                          sum(values$props >= (values$mean+abs(values$mean-as.numeric(input$cutoff)))-error))/length(values$props)
+        values$count <- sum(values$props <= (values$mean-abs(values$mean-as.numeric(input$cutoff)))+error)+
+          sum(values$props >= (values$mean+abs(values$mean-as.numeric(input$cutoff)))-error)
+      }
+    }
+  })
+  
+  # create the desired plot
+  output$SamplingDistribution <- renderPlot({
+    update_vals()
+    if (length(values$props) != 0){ # after reset, values$props is empty
+      if (input$plot == "Dotplot"){ # plot == TRUE is dotplot, FALSE is histogram
+        # n is the number of columns for the dotplot. Large datasets will have n <- 1
+        if (input$sampsize > 1000){
+          n <- 1
+        } else {
+          n <- 4
+        }
+        # gets the dotplot locations for the dotplot
+        df <- dotplot_locs(values$props, n, input$cutoff, cutoff.color,
+                           dot.fill.color, input$inequality, values$mean)
+        
+        # creates dotplot with appropriate fill colors
+        if (!is.na(as.numeric(input$cutoff))){
+          myplot <- ggplot(df)  +
+            geom_point(aes(x ,y, colour = fill.color), size=min(n, 50/length(values$props)^0.5)) +
+            scale_colour_manual(name = cutoff.color, values = levels(df$fill.color)) +
+            theme(legend.position="none")
+        } else {
+          myplot <- ggplot(df)  +
+            geom_point(aes(x ,y, colour = fill.color), size=min(n, 50/length(values$props)^0.5)) + 
+            scale_colour_manual(name = dot.fill.color, values = levels(df$fill.color)) +
+            theme(legend.position="none")
+        }
+        finalplot <- myplot + scale_y_continuous(limits = c(0, max(n*7.5,max(df$y)*1.01)))
+      } else {
+        # creates histogram
+        df <- data.frame("x" = values$props)
+        unique.vals <- sort(unique(as.numeric(as.character(df$x))))
+        if (!is.na(as.numeric(input$cutoff))){
+          num.decimals <- decimalcount(as.character(input$cutoff))
+          error <- ifelse(num.decimals <= 2, 0, 0.1^num.decimals/2)
+          
+          # a histogram is created to determine the bars that need to be colored
+          myplot <- ggplot(df, aes(x=x)) + geom_histogram(binwidth = max(diff(unique.vals)))
+          names.counts <- ggplot_build(myplot)$data[[1]]$x
+          if (input$inequality == "greater than"){
+            to.color <- which(names.counts >= as.numeric(input$cutoff)-error)
+          } else if (input$inequality == "less than"){
+            to.color <- which(names.counts <= as.numeric(input$cutoff)+error)
+          } else{
+            to.color <- c(which(names.counts <= (values$mean-abs(values$mean-as.numeric(input$cutoff)))+error),
+                        which(names.counts >= (values$mean+abs(values$mean-as.numeric(input$cutoff)))-error))
+          }
+          fill.color <- rep(dot.fill.color, length(names.counts))
+          fill.color[to.color] <- cutoff.color
+          myplot <- ggplot(df, aes(x=x)) + geom_histogram(binwidth = max(diff(unique.vals)),
+                                                          fill = fill.color, col = hist.outline.color)
+        } else {
+          myplot <- ggplot(df, aes(x=x)) + geom_histogram(binwidth = max(diff(unique.vals)),
+                                                          fill = hist.fill.color,
+                                                          col = hist.outline.color)
+        }
+        finalplot <- myplot
+      }
+      # make appropriate changes if Proportions or Counts is selected
+      xlabel <- ifelse(input$prop.counts == "Proportion", "Proportion of Successes", "Number of Successes")
+      MEAN <- ifelse(input$prop.counts == "Proportion", input$probability, input$probability*input$sampsize)
+      SD <- ifelse(input$prop.counts == "Proportion", sqrt(input$probability*(1-input$probability)/input$sampsize), 
+                   sqrt(input$probability*(1-input$probability)*input$sampsize))
+      
+      finalplot <- finalplot + plaintheme + axistheme + labs(x = xlabel, y = "Count")
+      
+      # adds normal curve if checkbox is selected
+      if (input$show.curve == TRUE){
+        if (input$plot == "Dotplot"){
+          finalplot <- finalplot + 
+            stat_function(fun = function(x) dnorm(x, mean = MEAN, sd = SD) * max(df$y) / dnorm(MEAN, MEAN, SD))
+        } else {
+          finalplot <- finalplot +
+            stat_function(fun = function(x) dnorm(x, mean = MEAN, sd = SD) * max(diff(unique.vals)) * length(values$props))
+        }
+      }
+      finalplot
+    }
+  })
+  
+  # calculate the summary statistics when checkbox is TRUE
+  # summary statistics are different for Proportions and Counts
+  output$summary <- renderText({
+    if (input$show.summary && length(values$props != 0)){
+      if (input$prop.counts == "Proportion"){
+        mymean <- input$probability
+        mysd <- sqrt(input$probability*(1-input$probability)/input$sampsize)
+      } else {
+        mymean <- input$sampsize*input$probability
+        mysd <- sqrt(input$probability*(1-input$probability)*input$sampsize)
+      }
+      paste("Mean = ", round(mymean, 2), " SD = ", round(mysd, 4), sep = "")
+    }
+  })
+  
+  # text for sample counts
+  output$count.samples <- renderText({
+    "Count Samples"
+  })
+  
+  # counts when cutoff is specified
+  output$counts <- renderText({
+    update_counts()
+    if (!is.null(values$prob)){
+      if (is.na(values$prob)){
+        " "
+      } else if (!is.na(as.numeric(input$cutoff))){
+        paste(values$count, "/", length(values$props), " (", round(values$prob, 4), ")", sep = "")
+      } else if (nchar(input$cutoff)!=0){
+        "Invalid Cutoff!"
+      } else {
+        " "
+      }
+    }
+  })
+  
+  # updates probability and sample size  when preset is chosen
+  observeEvent(input$presets, {
+    preset <- which(names(Presets) == input$presets)
+    updateNumericInput(session, "probability", value = Presets[[preset]][1])
+    updateNumericInput(session, "sampsize", value = Presets[[preset]][2])
+  })
+  
+}
+
+
+shinyApp(ui = ui, server = server, options = list(height = 1080))
+
--- a/OneProportionResamplingTest/OneProportionSource.R
+++ b/OneProportionResamplingTest/OneProportionSource.R
+#theme for plots
+plaintheme <- theme_bw() + 
+  theme(plot.background = element_blank(),
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank() ) +
+  theme(axis.line.x = element_line(color="black", size = 1),
+        axis.line.y = element_line(color="black", size = 1))+
+  theme(legend.position = "none")
+
+#axis theme for plots
+axistheme <- theme(plot.title = element_text(hjust = 0.5, color = "black", face = "bold", size=20)) +
+  theme(axis.title = element_text(color = "black", size = 16)) +
+  theme(axis.text.x = element_text(size = 14, color = "black")) +
+  theme(axis.text.y = element_text(size = 14, color = "black")) 
+
+#determines the number of decimal places of a number
+decimalcount<-function(x){stopifnot(class(x)=="character")
+  x<-gsub("(.*)(\\.)|([0]*$)","",x)
+  as.numeric(nchar(x))
+}
+
+# create dotplot locations from data x
+dotplot_locs <- function(x, n, cutoff, cutoff.color, dot.fill.color, inequality, Mean){
+  counts <- table(x)
+  x.locs <- as.numeric(names(counts))
+  
+  # find minimum difference between points, with an exeption for a single point
+  if (length(names(counts)) == 1){
+    point_dist <- min(diff(c(0, as.numeric(names(counts)))))/(n+2)
+  } else {
+    point_dist <- min(diff(as.numeric(names(counts))))/(n+2)
+  }
+  
+  # define the standard x coordinates to be used
+  x.coord <- sapply(x.locs, function(x) x + ((1:n)-(n+1)/2)*point_dist)
+  
+  x.coords <- vector()
+  y.coords <- vector()
+  to.color <- vector()
+  names.counts <- as.numeric(names(counts))
+  # loop through each count, defining new x and y coordinates for "dotplot"
+  for (i in 1:length(counts)){ 
+    if (n == 1){
+      x.coords <- c(x.coords, rep(x.coord[i], counts[i]/n))
+    } else {
+      x.coords <- c(x.coords, rep(x.coord[, i], counts[i]/n),
+                    x.coord[0:(counts[i] %% n), i])
+    }
+    
+    if (counts[i] > n){
+      y.coords <- c(y.coords, sort(rep(1:(counts[i]/n), n)),
+                    rep(ceiling(counts[i]/n), counts[i] %% n))
+    } else {
+      y.coords <- c(y.coords, sort(rep(1:(counts[i]/n), counts[i])))
+    }
+    # defines color of dots when cutoff defined
+    if(!is.na(as.numeric(cutoff))){
+      num.decimals <- decimalcount(as.character(cutoff))
+      # error term for rounded cutoff values
+      error <- ifelse(num.decimals <= 2, 0, 0.1^num.decimals/2) 
+      if (inequality == "greater than"){
+        if (names.counts[i] >= as.numeric(cutoff)-error){
+          to.color <- c(to.color, rep(cutoff.color, counts[i]))
+        } else {
+          to.color <- c(to.color, rep(dot.fill.color, counts[i]))
+        }
+      } else if (inequality == "less than") {
+        if (names.counts[i] <= as.numeric(cutoff)+error){
+          to.color <- c(to.color, rep(cutoff.color, counts[i]))
+        } else {
+          to.color <- c(to.color, rep(dot.fill.color, counts[i]))
+        }
+      } else {
+        dist <- abs(Mean - (abs(as.numeric(cutoff))-error))
+        if ((names.counts[i] <= Mean - dist) |
+            (names.counts[i] >= Mean + dist)){
+          to.color <- c(to.color, rep(cutoff.color, counts[i]))
+        } else {
+          to.color <- c(to.color, rep(dot.fill.color, counts[i]))
+        }
+      }
+    } else {
+      to.color <- c(to.color, rep(dot.fill.color, counts[i]))
+    }
+  }
+  return(data.frame("x" = x.coords, "y" = y.coords*n,
+                    "fill.color" = to.color))
+}
+