JoeToolFunctions.R 14.2 KB
Newer Older
Jalbert, Joe's avatar
Jalbert, Joe committed
1
2
3
4
#' Standard Error
#'
#' @param data The vector or dataset over which the standard error is calculated
#'
5
#' @return The calculated Standard Error
Jalbert, Joe's avatar
Jalbert, Joe committed
6
7
8
9
10
11
12
13
14
15
#' @importFrom stats sd
#' @export
#'
#' @examples
#' x=c(10, 20, 30, 40, 50)
#' se(x)
se=function(data){
  return(sd(data, na.rm=T)/sqrt(sum(!is.na(data))))
}

16
17
18
19
20
21
22
23
24
#' Simplifying Reports
#'
#' @param input 
#'
#' @return Simplified interacton reports
#' @import tidyverse
#' @export
#'
#' @examples
25
simplifyAppend <- function(input) {
Jalbert, Joe's avatar
Jalbert, Joe committed
26
27
  # x=match("Signif",colnames(input))+1
  # y=as.numeric(ncol(input))
28
29
  df=input%>%
    filter(!is.nan(Diff))%>%
Jalbert, Joe's avatar
Jalbert, Joe committed
30
31
    mutate(across(colnames(input)[match("Signif",colnames(input))+1]:colnames(input)[length(colnames(input))],
                  ~cut(.,breaks=c( 0,.001,.01,.05, .1,.999,1), labels=c("***" ,"**","*",".","NS","1" ))))%>%
Jalbert, Joe's avatar
Jalbert, Joe committed
32
    select(1:Signif,where(~sum((.%in%c("***" ,"**","*",".")))>0))
33
34
}

Jalbert, Joe's avatar
Jalbert, Joe committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#' Flexible Workbook Creation Function
#'
#' @param df The dataframe to be saved
#' @param filename Name of the file, including the xlsx extension
#' @param sheetBy Option to create sheets based on the values in a particular column of the dataframe
#' @param keepNames The function automatically renames the column names with \code{\link[stringr:case]{stringr::str_to_sentence()}}. Setting this to TRUE will leave your column names unaltered
#'
#' @return Creates a Excel workbook from the supplied dataframe
#' @import tidyverse
#' @import xlsx
#' @import lazyeval
#' @export
#'
#' @examples
#' library(datasets)
#' library(tidyverse)
#' library(xlsx)
#' data(iris)
#' wbsave(iris, "Iris report.xlsx")
#' wbsave(iris, "Iris report - by Species.xlsx", sheetBy="Species", keepNames=TRUE)
Jalbert, Joe's avatar
Jalbert, Joe committed
55
wbsave=function(df,filename,sheetBy=NULL,keepNames=TRUE){
Jalbert, Joe's avatar
Jalbert, Joe committed
56
57
58
  if(keepNames==FALSE) {names(df)=str_to_sentence(names(df))}
  if(!is.null(sheetBy)) {sheetBy=str_to_sentence(sheetBy)}
  wb=createWorkbook()
Jalbert, Joe's avatar
Jalbert, Joe committed
59
  df=as.data.frame(df)
Jalbert, Joe's avatar
Jalbert, Joe committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  TABLE_COLNAMES_STYLE <- CellStyle(wb)+ Font(wb,  heightInPoints=11, color="#44546A", isBold=TRUE)+
    Border(color = "#8EA9DB", position = "BOTTOM", pen="BORDER_THICK")
  sheetAll=createSheet(wb,sheetName = "All")
  wbdata=NULL
  wbAll=NULL
  if(!is.null(sheetBy)) {l=levels(as.factor(df[[sheetBy]]))}
  if(!is.null(sheetBy)) {for (i in l){
    filter_criteria <- interp(~y == x, .values=list(y = as.name(sheetBy), x = i))
    wbdata=df%>%
      filter_(filter_criteria)
    wbAll[[i]]=wbdata
    sheet=createSheet(wb,sheetName = i)
    addDataFrame(wbdata,sheet, colnamesStyle = TABLE_COLNAMES_STYLE)
    autoSizeColumn(sheet, colIndex = 1:ncol(df)+1)
  }}
  addDataFrame(df,sheetAll, colnamesStyle = TABLE_COLNAMES_STYLE)
  autoSizeColumn(sheetAll, colIndex = 1:ncol(df)+1)
  saveWorkbook(wb, filename)
}

#' Creates a Report with t-tests for a Vector of Outcomes
#'
#' @param df Dataframe to use for the report
#' @param Measures A vector of string names for columns to take means for. Order will be used to make the report.
#' @param Factor A binary factor which will be used for comparisons.  Must be 1 (Treatment) or 0 (No_Treatment)
#' @param paired Options for running t-test.  "Yes" forces paired, t-tests, "No" assumes no pairing, and "Try" will try a paired t-test, and follow up with a unpaired t-test if it fails to run.
#'
#' @return
#' @import tidyverse
#' @export
#'
#'
#' @examples

94
report=function(df, Measures, Factor, paired=c("Try","Yes", "No")){
Jalbert, Joe's avatar
Jalbert, Joe committed
95
96
97
98
99
  output=df%>%
    group_by(.data[[Factor]])%>%
    summarise_at(vars(Measures), mean,na.rm=T)%>%
    mutate(Levels =ifelse(.data[[Factor]]==1, "Treatment", "No_Treatment"))%>%
    select(-.data[[Factor]])%>%
Jalbert, Joe's avatar
Jalbert, Joe committed
100
    pivot_longer(cols=Measures, names_to = "Measure")%>%
Jalbert, Joe's avatar
Jalbert, Joe committed
101
102
103
104
105
106
107
    pivot_wider(names_from = "Levels", values_from = "value")%>%
    mutate(Diff=Treatment-No_Treatment)%>%
    mutate(Measure=factor(.data$Measure,levels=Measures))%>%
    arrange(Measure)


  reportpvaluecount=0
Jalbert, Joe's avatar
Jalbert, Joe committed
108
  p=NULL
Jalbert, Joe's avatar
Jalbert, Joe committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  for(i in Measures){
    reportpvaluecount=reportpvaluecount+1
    form=paste0(i, "~",Factor)
    if(paired=="Yes"){p[reportpvaluecount]=as.numeric(try(t.test(formula=as.formula(form), data=df, paired=T)$p.value))}
    else if (paired=="No"){p[reportpvaluecount]=as.numeric(try(t.test(formula=as.formula(form), data=df)$p.value))}
    else {p[reportpvaluecount]=tryCatch(as.numeric(t.test(formula=as.formula(form), data=df, paired=T)$p.value), error=function(err)
      p[reportpvaluecount]=as.numeric(try(t.test(formula=as.formula(form), data=df)$p.value)), finally = NA)}

  }

  output$p_value=p

  output=output%>%
    mutate(Signif=ifelse(p_value<.001, "***",ifelse(p_value<.01, "**",
                                                    ifelse(p_value<.05, "*", ifelse(p_value<.1, ".", "")))))
}
Jalbert, Joe's avatar
Jalbert, Joe committed
125
126
127
128
129


#' Appends Interactions to a \code{\link[JoeTools:report]{report()}} Output Dataframe
#'
#' @param report The report dataframe that is being appended
130
131
#' @param df The original dataframe used for the report
#' @param Measures A vector of string names for the dependent variable. Order must be the same as the repor
Jalbert, Joe's avatar
Jalbert, Joe committed
132
133
134
135
136
137
138
139
#' @param Factor A binary factor (as a string) which will be used for comparisons.  Must be 1 (Treatment) or 0 (No_Treatment)
#' @param Interaction A factor column name (as a string) that will be used as an interacting independant variable.
#'
#' @return
#' @import tidyverse
#' @export
#'
#' @examples
Jalbert, Joe's avatar
Jalbert, Joe committed
140

141
appendInteraction=function(report,df, Measures,Factor,Interaction, Simplify=T){
Jalbert, Joe's avatar
Jalbert, Joe committed
142
  p=NULL
143
144
  anova=NULL
  aov_factors=as.vector(Interaction)
Jalbert, Joe's avatar
Jalbert, Joe committed
145
  for(i in Measures){
146
147
148
149
150
151
152
153
154
155
156
157
158
159
    form=paste0(i, "~",Factor,"*",paste(aov_factors, collapse="*"))
    anova[[i]]=NULL
    if(!is.na(tryCatch(summary(aov(as.formula(form),data=df)), error=function(err) NA))){
      
    
    anova[[i]]=summary(aov(as.formula(form),data=df))[[1]]%>%
      rownames_to_column("Interactions")%>%
      filter(grepl(paste0(Factor,":"), Interactions))%>%
      mutate(Interactions=str_remove(Interactions,paste0(Factor,":")))%>%
      mutate(Interactions=str_remove_all(Interactions, " "))%>%
      select(Interactions,pvalue=`Pr(>F)` )%>%
      pivot_wider(names_from = Interactions, values_from = "pvalue")%>%
      mutate(Measure=i)
    } 
Jalbert, Joe's avatar
Jalbert, Joe committed
160
  }
161
  anova=bind_rows(anova)
Jalbert, Joe's avatar
Jalbert, Joe committed
162
  if(nrow(anova)>0&(!is.null(anova))){
163
164
165
    output=report%>%
    left_join(anova, by="Measure")
    if(Simplify==T){
166
      output=simplifyAppend(output)
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    }
  } else  {
    warning("Error in ANOVA, No comparison added.")
    output=report
  }
}

#' Plotting more than one plot together
#'
#' @param ... Name of each plot
#' @param plotlist A vector with plot names
#' @param file 
#' @param cols How many columns of plots you want in your chart
#' @param layout A matrix specifying the layout. If present, 'cols' is ignored.
#'
#' @return
#' @import grid
#' @export
#'
#' @examples
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
Jalbert, Joe's avatar
Jalbert, Joe committed
194
  
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
}

#' Force Flextable to Page
#'
#' @param ft Flextable object
#' @param pgwidth Customizable page width setting
#'
#' @return
#' @import flextable
#' @export
#'
#' @examples
FitFlextableToPage <- function(ft, pgwidth = 6){
  
  ft_out <- ft %>% autofit()
  
  ft_out <- width(ft_out, width = dim(ft_out)$widths*pgwidth /(flextable_dim(ft_out)$widths))
  return(ft_out)
}



#' Produce the Pairwise Comparisons found Significant in a Report
#'
#' @param Report Report object output from Report/appendInteraction
#' @param Data The original data used to make Report
#'
#' @return
#' @import tidyverse
#' @export
#'
#' @examples
pairwise <- function(Report, Data) {
  Ind=colnames(Report)[match("Diff",colnames(Report))-1]
  w=match("Measure",colnames(Report))+0
  x=match("Signif", colnames(Report))+1
  y=ncol(Report)+0
  z=w+1
  CompPrime=Report%>%
    select(1:w,x:y)%>%
    pivot_longer(z:ncol(.),names_to = "Factor", values_to = "p_value" )%>%
    filter(p_value%in%c(".","*","**","***"))%>%
    mutate(Row=row_number())
  comps=NULL
  for (i in 1:nrow(CompPrime)) {
    a=CompPrime%>%
      filter(Row==i)%>%
      select(-p_value, -Row)%>%
      pivot_longer(1:Factor,names_to = "Settings", values_to = "value")
    b=as.character(a$value)
    names(b)=a$Settings
    # c=Pairwise(data, "Treatment",b["Measure"],b["Factor"], b["Program"],b["Cohort"])
    c=Data%>%
      filter(if(b["Cohort"]!="All"){Cohort==b["Cohort"]} else {T})%>%
      filter(!is.na(!!b["Measure"]))%>%
      mutate_("Dependant"=b["Measure"], "Independant"=Ind, "Levels"=b["Factor"])%>%
      group_by(Independant, Levels)%>%
      summarize(Mean=mean(Dependant, na.rm=T),Count=n(), Measure=b["Measure"], Dep_Values=list(Dependant))%>%
      filter(Count>4)%>%
      select(-Count)%>%
      ungroup()%>%
      mutate(Independant=ifelse(Independant==1,"Treatment","No_Treatment"))%>%
      pivot_wider(names_from = Independant, values_from = c("Mean", "Dep_Values"))%>%
      rename(No_Treatment=Mean_No_Treatment, Treatment=Mean_Treatment)%>%
      filter(!is.na(No_Treatment)&!is.na(Treatment))%>%
      mutate(Diff=Treatment-No_Treatment)%>%
      group_by(Levels)%>%
      mutate(p_value=tryCatch({t.test(unlist(Dep_Values_No_Treatment), unlist(Dep_Values_Treatment))$p.value},
                              error=function(e) {NA}))%>%
      select(-Dep_Values_No_Treatment,-Dep_Values_Treatment)%>%
      mutate(Signif=ifelse(p_value<.001, "***",ifelse(p_value<.01, "**",
                                                      ifelse(p_value<.05, "*", ifelse(p_value<.1, ".", "")))))%>%
      mutate(Factor=b["Factor"], Cohort=b["Cohort"])
    comps[[i]]=c
  }
  comps=bind_rows(comps)%>%
    relocate(Cohort, Factor)%>%
    group_by(Cohort, Factor, Measure)%>%
    filter(min(p_value)<.05)%>%
    filter(max(row_number())>1)%>%
    ungroup()%>%
    mutate(Levels=str_replace_all(Levels, ":", " "),
           Factor=str_replace_all(Factor, ":"," X "))
  names(comps)[names(comps)=="Treatment"] <- Ind
  names(comps)[names(comps)=="No_Treatment"] <- "Matched_Cohort"
  comps
}


#' Creates a document including the Report and Pairwise comparisons produced above
#'
#' @param Report The report output by report/appendInteraction
#' @param Pairwise The output of the pairwise function created from Report
#' @param measureNames A named vector with cleaned names for Measures
#' @param factorNames A named vector with cleaned names for Factors
#' @param percentOutcomes A vector of Outcomes that should be percentages
#'
#' @return
#' @import tidyverse
#' @import officer
#' @import flextable
#' @export
#'
#' @examples
document <- function(Report, Pairwise, measureNames, factorNames, percentOutcomes) {
  x=match("Measure", colnames(Report))+1
  y=x+2
  z=x+4
  Report=Report%>%
    rename_with(~str_replace_all(.,"_"," "))
  Main=Report%>%
    filter(Cohort=="All")%>%
    mutate(across(x:y,~ifelse(Measure%in%percentOutcomes,percent(., accuracy=.1), round(.,3))))%>%
    mutate(Diff=ifelse(Diff>0, paste0("+",Diff), Diff))%>%
    mutate(Measure=fct_recode(Measure, !!!measureNames))%>%
    select(1:z, -`p value`, -Cohort, -Program)%>%
    flextable()%>%
    theme_vanilla()%>%
    bold(j=~Diff)%>%
    color(j=~Diff, color="Black")%>%
    color(~str_detect(Diff,"\\+"), ~Diff,color="#7C997C" )%>%
    color(~Signif%in%c(".","*","**", "***")&str_detect(Diff,"\\+"), ~Diff, color="#0db14b")%>%
    color(~str_detect(Diff,"-"), ~Diff,color="#93736C")%>%
    color(~Signif%in%c(".","*","**", "***")&str_detect(Diff,"-"), ~Diff, color="Red")%>%
    void(~Signif, part='all')%>%
    FitFlextableToPage()
  
  Cohorts=Report%>%
    filter(Cohort!="All")%>%
    mutate(across(x:y,~ifelse(Measure%in%percentOutcomes,percent(., accuracy=.1), round(.,3))))%>%
    mutate(Diff=ifelse(Diff==0,Diff,ifelse(Diff>0, paste0("+",Diff),Diff)))%>%
    mutate(Measure=fct_recode(Measure, !!!measureNames))%>%
    select(1:z, -`p value`, -Program)%>%
    flextable()%>%
    merge_v(j=~Cohort)%>%
    theme_vanilla()%>%
    bold(j=~Diff)%>%
    color(j=~Diff, color="Black")%>%
    color(~str_detect(Diff,"\\+"), ~Diff,color="#7C997C" )%>%
    color(~Signif%in%c(".","*","**", "***")&str_detect(Diff,"\\+"), ~Diff, color="#0db14b")%>%
    color(~str_detect(Diff,"-"), ~Diff,color="#93736C")%>%
    color(~Signif%in%c(".","*","**", "***")&str_detect(Diff,"-"), ~Diff, color="Red")%>%
    void(~Signif, part='all')%>%
    FitFlextableToPage()
  
  doc=read_docx()%>%
    body_add_flextable(Main)%>%
    body_add_par("Break")%>%
    body_add_flextable(Cohorts)
  
  for (k in levels(as.factor(Pairwise$Factor))) {
    Pairwise_Report=NULL
    x=match("Measure", colnames(Pairwise))+1
    y=x+2
    z=x+4
    Pairwise_Report=Pairwise%>%
      rename_with(~str_replace_all(.,"_"," "))%>%
      mutate(across(x:y,~ifelse(Measure%in%percentOutcomes,percent(., accuracy=.1), round(.,3))))%>%
      mutate(Diff=ifelse(Diff>0, paste0("+",Diff), Diff))%>%
      mutate(Measure=fct_recode(Measure, !!!measureNames))%>%
      filter(Factor==k)%>%
      select(-Factor, -`p value`)%>%
      rename(!!k:=Levels)%>%
      arrange(Cohort, Measure)%>%
      relocate(Cohort, Measure)%>%
      flextable()%>%
      merge_v(j=~Cohort)%>%
      merge_v(j=~Measure)%>%
      theme_vanilla()%>%
      bold(j=~Diff)%>%
      color(j=~Diff, color="Black")%>%
      color(~str_detect(Diff,"\\+"), ~Diff,color="#7C997C" )%>%
      color(~Signif%in%c(".","*","**", "***")&str_detect(Diff,"\\+"), ~Diff, color="#0db14b")%>%
      color(~str_detect(Diff,"-"), ~Diff,color="#93736C")%>%
      color(~Signif%in%c(".","*","**", "***")&str_detect(Diff,"-"), ~Diff, color="Red")%>%
      void(~Signif, part='all')%>%
      FitFlextableToPage()
    
    doc=body_add_par(doc,"Break")%>%
      body_add_flextable(Pairwise_Report)
  }
  doc
}