-
Manski, Scott authoredManski, Scott authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
app.R 14.05 KiB
# ------------------------------------------------------------------------------
# File: ConfidenceIntervalApp.R
# Authors: Camille Fairbourn, Scott Manski
# Date: 05/03/2019
# Desc: This app allows users to visualize the repeated confidence intervals
# for a proportion. The true proportion, confidence level, sample size,
# and the number of samples can all be chosen by the user.
# Published Location:
# Email: fairbour@msu.edu, manskisc@msu.edu
#
# For questions or concerns, please email the authors. This work is licensed
# under a Creative Commons Attribution-ShareAlike 4.0 International License
# (https://creativecommons.org/licenses/by-sa/4.0/).
# ------------------------------------------------------------------------------
# loading packages
library(shiny)
library(ggplot2)
library(dplyr)
library(png)
library(grid)
library(shinydashboard)
library(scales)
# Sources objects, functions, etc, from ConfidenceIntervalSource.R
source("www/ConfidenceIntervalSource.R")
# defines colors for plots
ball_colors <- c("#E69F00", "#009E73")
CI_contains_p <- "black"
CI_not_contain_p <- "red"
arrow_color <- "blue"
ui <- dashboardPage(
dashboardHeader(title = "Confidence Interval", titleWidth = 350),
dashboardSidebar(width = 350,
sliderInput("p", "Population Proportion", value = 0.5, min = 0.2, max = 0.8, step = 0.01),
sliderInput("nsize", "Sample Size", value = 100, min = 50, max = 150, step = 10),
sliderInput("conf", "Confidence Level", value=95, min=1, max=99, step = 1),
sliderInput("reps", "How many intervals to draw", value = 50, min = 1, max = 100, step = 1),
hr(),
p("The application will draw samples of the size you specify
and visually represent the confidence interval based on each
sample. If the confidence interval does not include the true
population proportion, the interval will be colored red."),
p("Try different values for both the sample size and the confidence
level. Notice how the width of the intervals changes for different
sample sizes and confidence levels. Does a larger sample size give
narrower or wider intervals?"),
hr(),
p("Written by Scott Manski"),
p("This work is licensed under a "),
a(href="http://creativecommons.org/licenses/by-sa/4.0/",
"Creative Commons Attribution-ShareAlike 4.0 International License")
),
dashboardBody(
tags$head(tags$style(CSS_style)),
fluidRow(column(verticalLayout(
actionButton("new_samp", "Draw New Samples"),
selectInput("visual", "Visualization", choices = c("Gumballs", "Balls", "Cards")),
plotOutput("pop", width = "400px")), width = 6),
column(verticalLayout(
conditionalPanel("output.showSamples",
column(selectInput("sample_num", "Sample Number", choices = ""),
uiOutput("sample_prop"), width = 12)),
plotOutput("sample", width = "400px")), width = 6)),
plotOutput("interval_plot"),
h3(textOutput("percentage"))
)
)
# Define a server for the Shiny app
server <-function(input, output, session) {
# initialize values for use in server
values <- reactiveValues()
values$population <- vector()
values$color <- vector()
values$sample_values <- list()
# show selectInput for 'Sample Number' but only when there are samples
output$showSamples <- reactive({
updateSelectInput(session, "sample_num", choices = 1:length(values$sample_values))
length(values$sample_values) > 0
})
outputOptions(output, "showSamples", suspendWhenHidden = FALSE)
# function for creating a new population based on input$p
new_population <- reactive({
population <- c(rep(1, round(10000*input$p)), rep(0, 10000-round(10000*input$p)))
population[sample(1:10000, 10000)]
})
# population visualization
output$pop <- renderPlot({
values$population <- new_population()
if (length(values$population) > 0){
if (input$visual == "Balls") {
point_colors <- as.character(values$population)
x <- runif(length(values$population), 0, 1)
y <- runif(length(values$population), 0, 1)
balls <- data.frame(x, y)
# creates rectangle with colored balls inside
visual <- ggplot() + geom_point(data = balls, aes(x, y, fill = point_colors), colour = "black", pch = 21, size = 5) +
scale_fill_manual(values = ball_colors) +
geom_polygon(aes(x = c(-0.01, -0.01, 1.01, 1.01), y = c(-0.01, 1.01, 1.01, -0.01), fill = NA), color = "white", size = 4) +
geom_polygon(aes(x = c(0, 0, 1, 1), y = c(0, 1, 1, 0), fill = NA), color = "black", size = 2) +
scale_x_continuous(limits = c(-0.1, 1.01)) +
theme(legend.position = "none",
line = element_blank(),
text = element_blank(),
title = element_blank(),
panel.background = element_blank())
} else if (input$visual == "Gumballs") {
# create gumball locations for gumball machine based on the population
point_colors <- as.character(values$population)
x <- runif(length(values$population), 0.265, 0.7355)
y <- unlist(lapply(1:length(values$population), function(i){
runif(1, 0.725-1*sqrt(0.23525^2-(0.50025-x[i])^2), min(0.87, 0.725+1*sqrt(0.23525^2-(0.50025-x[i])^2)))
}))
gumballs <- data.frame(x, y, point_colors, stringsAsFactors = FALSE)
# creates the gumball machine population visual
visual <- ggplot() + annotation_custom(gumball_machine, xmin = 0, xmax = 1, ymin = 0, ymax = 1) +
geom_point(data = gumballs, aes(x, y, fill = point_colors), colour = "black", pch = 21, size = 5) +
scale_fill_manual(values = ball_colors) +
scale_x_continuous(limits = c(0, 1)) +
scale_y_continuous(limits = c(0, 1)) +
theme(legend.position = "none",
line = element_blank(),
text = element_blank(),
title = element_blank(),
panel.background = element_blank())
} else {
visual <- ggplot(data = NULL)
# adds 100 cards to the visual
for (i in 1:100) {
card <- sample(c(0, 1), 1, prob = c(input$p, 1-input$p))
x <- runif(1, 0, 0.5)
y <- runif(1, 0, 0.4)
if (card == 1) {
visual <- visual +
annotation_custom(red_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
} else {
visual <- visual +
annotation_custom(blue_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
}
}
# creates the card population visual
visual <- visual + scale_x_continuous(limits = c(0, 0.6)) +
scale_y_continuous(limits = c(0, 0.6)) +
theme(legend.position = "none",
line = element_blank(),
text = element_blank(),
title = element_blank(),
panel.background = element_blank())
}
visual
}
})
# creates selected sample visualization
output$sample <- renderPlot({
if (length(values$sample_values) > 0) {
if (input$visual == "Cards") {
# get the card locations for the sample
sample_locs <- getCardSampleLocations(values$sample_values[[as.numeric(input$sample_num)]])
# calculate the breaks and labels for y axis for sample visual
breaks <- seq(0, max(sample_locs$y), by = 1/4)
if (max(breaks) != max(sample_locs$y)) {
breaks <- c(breaks, max(breaks)+1/4)
}
breaks <- breaks + 0.1
labels <- seq(0, length(unique(sample_locs$x))/2 * (length(breaks)-1),
by = length(unique(sample_locs$x))/2)
visual <- ggplot(data = NULL)
# add each card to sample visual based on sample_locs
for (i in 1:nrow(sample_locs)) {
card <- sample_locs$color[i]
x <- sample_locs$x[i]
y <- sample_locs$y[i]
if (card == 0) {
visual <- visual +
annotation_custom(red_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
} else {
visual <- visual +
annotation_custom(blue_back, xmin = x, xmax = x + 0.1, ymin = y, ymax = y + 0.2)
}
}
# creates card sample visual
visual + scale_y_continuous(name = "Counts", limits = c(mean(breaks[1:2]), max(breaks)+1/8),
breaks = breaks, labels = labels, position = "right") +
scale_x_continuous(limits = c(0, 1)) +
theme(legend.position = "none",
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_line(size = 1),
axis.title.y = element_text(color = "black", size = 20),
axis.text.y = element_text(size = 16),
axis.line.x = element_line(color="black", size = 1),
axis.line.y = element_line(color="black", size = 1),
panel.background = element_blank())
} else {
# get the point locations for the sample
sample_locs <- getSampleLocations(values$sample_values[[as.numeric(input$sample_num)]])
# calculate the breaks and labels for y axis for sample visual
breaks <- seq(0, max(sample_locs$y), by = 1/8)
if (max(breaks) != max(sample_locs$y)) {
breaks <- c(breaks, max(breaks)+1/8)
}
labels <- seq(0, length(unique(sample_locs$x))/2 * (length(breaks)-1),
by = length(unique(sample_locs$x))/2)*4
# creates ball sample visual
ggplot() + geom_point(data = sample_locs, aes(x = x, y = y, fill = as.character(color)),
colour = "black", pch = 21, size = 5) +
scale_fill_manual(values = ball_colors) +
scale_y_continuous(name = "Counts", limits = c(0, max(sample_locs$y)+1/16),
breaks = breaks, labels = labels, position = "right") +
theme(legend.position = "none",
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_line(size = 1),
axis.title.y = element_text(color = "black", size = 20),
axis.text.y = element_text(size = 16),
axis.line.x = element_line(color="black", size = 1),
axis.line.y = element_line(color="black", size = 1),
panel.background = element_blank())
}
}
})
# take input$reps number of samples of size input$nsize from the population
# and calculate the sample mean
observeEvent(input$new_samp, {
values$samples <- unlist(lapply(1:input$reps, function(x) {
values$sample_values[[x]] <- sample(values$population, input$nsize)
mean(values$sample_values[[x]])
}))
})
# if the input$reps is increased, simply calculate more sample means and add them
# to curent sample means.
observeEvent(input$reps, {
if (input$reps > length(values$samples)) {
values$samples <- c(values$samples, unlist(lapply(1:(input$reps-length(values$samples)), function(x) {
values$sample_values[[length(values$samples) + x]] <- sample(values$population, input$nsize)
mean(values$sample_values[[length(values$samples) + x]])
})))
}
})
# if input$p or input$nsize change, reset the samples
observeEvent(c(input$p, input$nsize), {
values$samples <- vector()
values$sample_values <- list()
values$color <- vector()
})
# show the confidence intervals for each sample
output$interval_plot <- renderPlot({
if (length(values$samples) > 0){
# calculate the lower and upper bounds for the CI for each sample
intervals <- lapply(1:input$reps, function(i){
p <- values$samples[i]
c(p-qnorm((1+input$conf/100)/2)*sqrt(p*(1-p)/input$nsize),
p+qnorm((1+input$conf/100)/2)*sqrt(p*(1-p)/input$nsize))
})
intervals <- do.call(c, intervals)
# round off CI bound if it is more than 1 or less than 0
intervals <- ifelse(abs(intervals - 0.5) >= 0.5, round(intervals), intervals)
# change CI line color depending on if the CI contains the true input$p
color <- vector()
for (i in 1:input$reps){
interval <- intervals[(2*(i-1) + 1):(2*i)]
values$color[i] <- ifelse(interval[1] <= input$p & interval[2] >= input$p, CI_contains_p, CI_not_contain_p)
color[(2*(i-1) + 1):(2*i)] <- values$color[i]
}
# defines arrow for selected sample
arrow.x <- ifelse(input$p < 0.5, 1, 0)
arrow.x <- c(arrow.x, abs(arrow.x - 0.1))
# creates CI plot
ggplot() + geom_line(aes(y = sort(rep(1:input$reps, 2)), x = intervals,
group = sort(rep(1:input$reps, 2))), color = color) +
scale_fill_continuous(color) +
geom_vline(aes(xintercept = input$p)) +
scale_x_continuous(name = "", limits = c(0, 1)) +
scale_y_continuous(name = "Intervals", breaks = pretty_breaks(), limits = c(0.5, input$reps+0.5)) +
plaintheme + axistheme +
annotate("segment", x = arrow.x[1], xend = arrow.x[2],
y = as.numeric(input$sample_num),
yend = as.numeric(input$sample_num),
colour = arrow_color, size=1, arrow=arrow())
}
})
# adds p hat for current selected sample
output$sample_prop <- renderUI({
if (length(values$samples) > 0) {
withMathJax(paste0("$$\\hat{p} =", values$samples[as.numeric(input$sample_num)], "$$"))
}
})
# displays the percentage of CI's that contain the true input$p
output$percentage <- renderText({
if (length(values$color) > 0){
contains <- values$color[1:input$reps]
contains <- length(contains[contains == "black"])
paste(contains, " of the ", input$reps, " intervals (", round(contains/input$reps*100, 1),
"%) contain the true population proportion, p = ", input$p, sep = "")
}
})
}
shinyApp(ui = ui, server = server)