Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
app.R 14.05 KiB
# ------------------------------------------------------------------------------
# File: ConfidenceIntervalApp.R
# Authors: Camille Fairbourn, Scott Manski
# Date: 05/03/2019 
# Desc: This app allows users to visualize the repeated confidence intervals
#       for a proportion. The true proportion, confidence level, sample size,
#       and the number of samples can all be chosen by the user.
# Published Location: 
# Email: fairbour@msu.edu, manskisc@msu.edu
#
# For questions or concerns, please email the authors. This work is licensed 
# under a Creative Commons Attribution-ShareAlike 4.0 International License 
# (https://creativecommons.org/licenses/by-sa/4.0/).
# ------------------------------------------------------------------------------


# loading packages
library(shiny)
library(ggplot2)
library(dplyr)
library(png)
library(grid)
library(shinydashboard)
library(scales)

# Sources objects, functions, etc, from ConfidenceIntervalSource.R
source("www/ConfidenceIntervalSource.R")

# defines colors for plots
ball_colors <- c("#E69F00", "#009E73")
CI_contains_p <- "black"
CI_not_contain_p <- "red"
arrow_color <- "blue"


ui <- dashboardPage(
  dashboardHeader(title = "Confidence Interval", titleWidth = 350),
  dashboardSidebar(width = 350,
    sliderInput("p", "Population Proportion", value = 0.5, min = 0.2, max = 0.8, step = 0.01),
    sliderInput("nsize", "Sample Size", value = 100, min = 50, max = 150, step = 10),
    sliderInput("conf", "Confidence Level", value=95, min=1, max=99, step = 1),
    sliderInput("reps", "How many intervals to draw", value = 50, min = 1, max = 100, step = 1),
    hr(),
    p("The application will draw samples of the size you specify
      and visually represent the confidence interval based on each
      sample. If the confidence interval does not include the true
      population proportion, the interval will be colored red."),
    p("Try different values for both the sample size and the confidence
      level. Notice how the width of the intervals changes for different
      sample sizes and confidence levels. Does a larger sample size give
      narrower or wider intervals?"),
    hr(),
    p("Written by Scott Manski"),
    p("This work is licensed under a "),
    a(href="http://creativecommons.org/licenses/by-sa/4.0/",
      "Creative Commons Attribution-ShareAlike 4.0 International License")
  ),
  dashboardBody(
    tags$head(tags$style(CSS_style)),
    fluidRow(column(verticalLayout(
      actionButton("new_samp", "Draw New Samples"),
      selectInput("visual", "Visualization", choices = c("Gumballs", "Balls", "Cards")),
      plotOutput("pop", width = "400px")), width = 6),
             column(verticalLayout(
               conditionalPanel("output.showSamples",
                                column(selectInput("sample_num", "Sample Number", choices = ""),
                                       uiOutput("sample_prop"), width = 12)),
               plotOutput("sample", width = "400px")), width = 6)),
    plotOutput("interval_plot"),
    h3(textOutput("percentage"))
  )
)


# Define a server for the Shiny app
server <-function(input, output, session) {
  # initialize values for use in server
  values <- reactiveValues()
  values$population <- vector()
  values$color <- vector()
  values$sample_values <- list()
  
  # show selectInput for 'Sample Number' but only when there are samples
  output$showSamples <- reactive({
    updateSelectInput(session, "sample_num", choices = 1:length(values$sample_values))
    length(values$sample_values) > 0
  })
  outputOptions(output, "showSamples", suspendWhenHidden = FALSE)
  
  # function for creating a new population based on input$p
  new_population <- reactive({
    population <- c(rep(1, round(10000*input$p)), rep(0, 10000-round(10000*input$p)))
    population[sample(1:10000, 10000)]
  })
  
  # population visualization
  output$pop <- renderPlot({
    values$population <- new_population()
    if (length(values$population) > 0){
      if (input$visual == "Balls") {
        point_colors <- as.character(values$population)
        x <- runif(length(values$population), 0, 1)
        y <- runif(length(values$population), 0, 1)
        balls <- data.frame(x, y)
        
        # creates rectangle with colored balls inside
        visual <- ggplot() + geom_point(data = balls, aes(x, y, fill = point_colors), colour = "black", pch = 21, size = 5) +
          scale_fill_manual(values = ball_colors) +
          geom_polygon(aes(x = c(-0.01, -0.01, 1.01, 1.01), y = c(-0.01, 1.01, 1.01, -0.01), fill = NA), color = "white", size = 4) +
          geom_polygon(aes(x = c(0, 0, 1, 1), y = c(0, 1, 1, 0), fill = NA), color = "black", size = 2) +
          scale_x_continuous(limits = c(-0.1, 1.01)) + 
          theme(legend.position = "none",
                line = element_blank(),
                text = element_blank(),
                title = element_blank(),
                panel.background = element_blank())

      } else if (input$visual == "Gumballs") {
        # create gumball locations for gumball machine based on the population
        point_colors <- as.character(values$population)
        x <- runif(length(values$population), 0.265, 0.7355)
        y <- unlist(lapply(1:length(values$population), function(i){
          runif(1, 0.725-1*sqrt(0.23525^2-(0.50025-x[i])^2), min(0.87, 0.725+1*sqrt(0.23525^2-(0.50025-x[i])^2)))
        }))
        gumballs <- data.frame(x, y, point_colors, stringsAsFactors = FALSE)
        
        # creates the gumball machine population visual
        visual <- ggplot() + annotation_custom(gumball_machine, xmin = 0, xmax = 1, ymin = 0, ymax = 1) +
          geom_point(data = gumballs, aes(x, y, fill = point_colors), colour = "black", pch = 21, size = 5) +
          scale_fill_manual(values = ball_colors) +
          scale_x_continuous(limits = c(0, 1)) +
          scale_y_continuous(limits = c(0, 1)) +
          theme(legend.position = "none",
                line = element_blank(),
                text = element_blank(),
                title = element_blank(),
                panel.background = element_blank())
          
      } else {
        visual <- ggplot(data = NULL) 
        # adds 100 cards to the visual
        for (i in 1:100) {
          card <- sample(c(0, 1), 1, prob = c(input$p, 1-input$p))
          x <- runif(1, 0, 0.5)
          y <- runif(1, 0, 0.4)
          if (card == 1) {
            visual <- visual + 
              annotation_custom(red_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
          } else {
            visual <- visual + 
              annotation_custom(blue_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
          }
        }
        # creates the card population visual 
        visual <- visual + scale_x_continuous(limits = c(0, 0.6)) +
          scale_y_continuous(limits = c(0, 0.6)) +
          theme(legend.position = "none",
                line = element_blank(),
                text = element_blank(),
                title = element_blank(),
                panel.background = element_blank())
      }
      visual
    }
  })
  
  # creates selected sample visualization
  output$sample <- renderPlot({
    if (length(values$sample_values) > 0) {
      if (input$visual == "Cards") {
        # get the card locations for the sample
        sample_locs <- getCardSampleLocations(values$sample_values[[as.numeric(input$sample_num)]])
        
        # calculate the breaks and labels for y axis for sample visual
        breaks <- seq(0, max(sample_locs$y), by = 1/4)
        if (max(breaks) != max(sample_locs$y)) {
          breaks <- c(breaks, max(breaks)+1/4)
        }
        breaks <- breaks + 0.1
        labels <- seq(0, length(unique(sample_locs$x))/2 * (length(breaks)-1),
                      by = length(unique(sample_locs$x))/2)
        
        
        visual <- ggplot(data = NULL) 
        # add each card to sample visual based on sample_locs
        for (i in 1:nrow(sample_locs)) {
          card <- sample_locs$color[i]
          x <- sample_locs$x[i]
          y <- sample_locs$y[i]
          if (card == 0) {
            visual <- visual + 
              annotation_custom(red_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
          } else {
            visual <- visual + 
              annotation_custom(blue_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
          }
        }
        
        # creates card sample visual
        visual + scale_y_continuous(name = "Counts", limits = c(mean(breaks[1:2]), max(breaks)+1/8), 
                                    breaks = breaks, labels = labels, position = "right") +
          scale_x_continuous(limits = c(0, 1)) +
          theme(legend.position = "none",
                axis.ticks.x = element_blank(), 
                axis.text.x = element_blank(),
                axis.title.x = element_blank(),
                axis.ticks.y = element_line(size = 1),
                axis.title.y = element_text(color = "black", size = 20),
                axis.text.y = element_text(size = 16),
                axis.line.x = element_line(color="black", size = 1),
                axis.line.y = element_line(color="black", size = 1),
                panel.background = element_blank())
      } else {
        # get the point locations for the sample
        sample_locs <- getSampleLocations(values$sample_values[[as.numeric(input$sample_num)]])
        
        # calculate the breaks and labels for y axis for sample visual
        breaks <- seq(0, max(sample_locs$y), by = 1/8)
        if (max(breaks) != max(sample_locs$y)) {
          breaks <- c(breaks, max(breaks)+1/8)
        }
        labels <- seq(0, length(unique(sample_locs$x))/2 * (length(breaks)-1),
                      by = length(unique(sample_locs$x))/2)*4

        # creates ball sample visual
        ggplot() + geom_point(data = sample_locs, aes(x = x, y = y, fill = as.character(color)),
                            colour = "black", pch = 21, size = 5) +
          scale_fill_manual(values = ball_colors) +
          scale_y_continuous(name = "Counts", limits = c(0, max(sample_locs$y)+1/16), 
                             breaks = breaks, labels = labels, position = "right") +
          theme(legend.position = "none",
                axis.ticks.x = element_blank(), 
                axis.text.x = element_blank(),
                axis.title.x = element_blank(),
                axis.ticks.y = element_line(size = 1),
                axis.title.y = element_text(color = "black", size = 20),
                axis.text.y = element_text(size = 16),
                axis.line.x = element_line(color="black", size = 1),
                axis.line.y = element_line(color="black", size = 1),
                panel.background = element_blank())
      }
    }
  })
  
  # take input$reps number of samples of size input$nsize from the population
  # and calculate the sample mean
  observeEvent(input$new_samp, {
    values$samples <- unlist(lapply(1:input$reps, function(x) {
      values$sample_values[[x]] <- sample(values$population, input$nsize)
      mean(values$sample_values[[x]])
    }))
  })

  # if the input$reps is increased, simply calculate more sample means and add them
  # to curent sample means.
  observeEvent(input$reps, {
    if (input$reps > length(values$samples)) {
      values$samples <- c(values$samples, unlist(lapply(1:(input$reps-length(values$samples)), function(x) {
        values$sample_values[[length(values$samples) + x]] <- sample(values$population, input$nsize)
        mean(values$sample_values[[length(values$samples) + x]])
      })))
    }
  })
  
  # if input$p or input$nsize change, reset the samples
  observeEvent(c(input$p, input$nsize), {
    values$samples <- vector()
    values$sample_values <- list()
    values$color <- vector()
  })
  
  # show the confidence intervals for each sample
  output$interval_plot <- renderPlot({
    if (length(values$samples) > 0){
      # calculate the lower and upper bounds for the CI for each sample
      intervals <- lapply(1:input$reps, function(i){
        p <- values$samples[i]
        c(p-qnorm((1+input$conf/100)/2)*sqrt(p*(1-p)/input$nsize), 
          p+qnorm((1+input$conf/100)/2)*sqrt(p*(1-p)/input$nsize))
      })
      intervals <- do.call(c, intervals)
      
      # round off CI bound if it is more than 1 or less than 0
      intervals <- ifelse(abs(intervals - 0.5) >= 0.5, round(intervals), intervals)
      
      # change CI line color depending on if the CI contains the true input$p
      color <- vector()
      for (i in 1:input$reps){
        interval <- intervals[(2*(i-1) + 1):(2*i)]
        values$color[i] <- ifelse(interval[1] <= input$p & interval[2] >= input$p, CI_contains_p, CI_not_contain_p)
        color[(2*(i-1) + 1):(2*i)] <- values$color[i]
      }
      # defines arrow for selected sample
      arrow.x <- ifelse(input$p < 0.5, 1, 0)
      arrow.x <- c(arrow.x, abs(arrow.x - 0.1))
      
      # creates CI plot
      ggplot() + geom_line(aes(y = sort(rep(1:input$reps, 2)),  x = intervals,
                               group = sort(rep(1:input$reps, 2))), color = color) +
        scale_fill_continuous(color) +
        geom_vline(aes(xintercept = input$p)) +
        scale_x_continuous(name = "", limits = c(0, 1)) +
        scale_y_continuous(name = "Intervals", breaks = pretty_breaks(), limits = c(0.5, input$reps+0.5)) +
        plaintheme + axistheme +
        annotate("segment", x = arrow.x[1], xend = arrow.x[2],
                 y = as.numeric(input$sample_num),
                 yend = as.numeric(input$sample_num),
                 colour = arrow_color, size=1, arrow=arrow())
    }
  })
  
  # adds p hat for current selected sample
  output$sample_prop <-  renderUI({
    if (length(values$samples) > 0) {
      withMathJax(paste0("$$\\hat{p} =", values$samples[as.numeric(input$sample_num)], "$$"))
    }
  })
  
  # displays the percentage of CI's that contain the true input$p
  output$percentage <- renderText({
    if (length(values$color) > 0){
      contains <- values$color[1:input$reps]
      contains <- length(contains[contains == "black"])
      paste(contains, " of the ", input$reps, " intervals (", round(contains/input$reps*100, 1),
            "%) contain the true population proportion, p = ", input$p, sep = "")
    }
  })
  
}

shinyApp(ui = ui, server = server)