diff --git a/radiant.data/R/transform.R b/radiant.data/R/transform.R
index 4a84ee40ec8fd6541fc9e0960e12feae8de9c255..fb3b77a5174bf5e6b7d8dfb109151815c9b03a68 100644
--- a/radiant.data/R/transform.R
+++ b/radiant.data/R/transform.R
@@ -1,764 +1,764 @@
-#' Center
-#' @param x Input variable
-#' @param na.rm If TRUE missing values are removed before calculation
-#' @return If x is a numeric variable return x - mean(x)
-#' @export
-center <- function(x, na.rm = TRUE) {
- if (is.numeric(x)) {
- x - mean(x, na.rm = na.rm)
- } else {
- x
- }
-}
-
-#' Standardize
-#' @param x Input variable
-#' @param na.rm If TRUE missing values are removed before calculation
-#' @return If x is a numeric variable return (x - mean(x)) / sd(x)
-#' @export
-standardize <- function(x, na.rm = TRUE) {
- if (is.numeric(x)) {
- x_sd <- sd(x, na.rm = na.rm)
- x <- x - mean(x, na.rm = na.rm)
- if (isTRUE(x_sd > 0)) {
- x / x_sd
- } else {
- x
- }
- } else {
- x
- }
-}
-
-#' Calculate square of a variable
-#' @param x Input variable
-#' @return x^2
-#' @export
-square <- function(x) x^2
-
-#' Calculate inverse of a variable
-#' @param x Input variable
-#' @return 1/x
-#' @export
-inverse <- function(x) {
- if (is.numeric(x)) 1 / x else x
-}
-
-#' Normalize a variable x by a variable y
-#' @param x Input variable
-#' @param y Normalizing variable
-#' @return x/y
-#' @export
-normalize <- function(x, y) {
- if (is.numeric(x) && is.numeric(y)) x / y else x
-}
-
-#' Convert input in month-day-year format to date
-#' @details Use as.character if x is a factor
-#' @param x Input variable
-#' @return Date variable of class Date
-#' @examples
-#' as_mdy("2-1-2014")
-#' \dontrun{
-#' as_mdy("2-1-2014") %>% month(label = TRUE)
-#' as_mdy("2-1-2014") %>% week()
-#' as_mdy("2-1-2014") %>% wday(label = TRUE)
-#' }
-#' @export
-as_mdy <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(mdy(x)) %>% as.Date()
-}
-
-#' Convert input in day-month-year format to date
-#' @param x Input variable
-#' @return Date variable of class Date
-#' @examples
-#' as_dmy("1-2-2014")
-#'
-#' @export
-as_dmy <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(dmy(x)) %>% as.Date()
-}
-
-#' Convert input in year-month-day format to date
-#' @param x Input variable
-#' @return Date variable of class Date
-#' @examples
-#' as_ymd("2013-1-1")
-#'
-#' @export
-as_ymd <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(ymd(x)) %>% as.Date()
-}
-
-# http://www.noamross.net/blog/2014/2/10/using-times-and-dates-in-r---presentation-code.html
-#' Convert input in year-month-day-hour-minute-second format to date-time
-#' @param x Input variable
-#' @return Date-time variable of class Date
-#' @examples
-#' as_ymd_hms("2014-1-1 12:15:01")
-#' \dontrun{
-#' as_ymd_hms("2014-1-1 12:15:01") %>% as.Date()
-#' as_ymd_hms("2014-1-1 12:15:01") %>% month()
-#' as_ymd_hms("2014-1-1 12:15:01") %>% hour()
-#' }
-#' @export
-as_ymd_hms <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(ymd_hms(x))
-}
-
-#' Convert input in year-month-day-hour-minute format to date-time
-#' @param x Input variable
-#' @return Date-time variable of class Date
-#' @examples
-#' as_ymd_hm("2014-1-1 12:15")
-#' @export
-as_ymd_hm <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(parse_date_time(x, "%Y%m%d %H%M"))
-}
-
-#' Convert input in month-day-year-hour-minute-second format to date-time
-#' @param x Input variable
-#' @return Date-time variable of class Date
-#' @examples
-#' as_mdy_hms("1-1-2014 12:15:01")
-#' @export
-as_mdy_hms <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(parse_date_time(x, "%m%d%Y %H%M%S"))
-}
-
-#' Convert input in month-day-year-hour-minute format to date-time
-#' @param x Input variable
-#' @return Date-time variable of class Date
-#' @examples
-#' as_mdy_hm("1-1-2014 12:15")
-#' @export
-as_mdy_hm <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(parse_date_time(x, "%m%d%Y %H%M"))
-}
-
-#' Convert input in day-month-year-hour-minute-second format to date-time
-#' @param x Input variable
-#' @return Date-time variable of class Date
-#' @examples
-#' as_mdy_hms("1-1-2014 12:15:01")
-#' @export
-as_dmy_hms <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(parse_date_time(x, "%d%m%Y %H%M%S"))
-}
-
-#' Convert input in day-month-year-hour-minute format to date-time
-#' @param x Input variable
-#' @return Date-time variable of class Date
-#' @examples
-#' as_mdy_hm("1-1-2014 12:15")
-#' @export
-as_dmy_hm <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(parse_date_time(x, "%d%m%Y %H%M"))
-}
-
-#' Convert input in hour-minute-second format to time
-#' @param x Input variable
-#' @return Time variable of class Period
-#' @examples
-#' as_hms("12:45:00")
-#' \dontrun{
-#' as_hms("12:45:00") %>% hour()
-#' as_hms("12:45:00") %>% second()
-#' }
-#' @export
-as_hms <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(hms(x))
-}
-
-#' Convert input in hour-minute format to time
-#' @param x Input variable
-#' @return Time variable of class Period
-#' @examples
-#' as_hm("12:45")
-#' \dontrun{
-#' as_hm("12:45") %>% minute()
-#' }
-#' @export
-as_hm <- function(x) {
- if (is.factor(x)) x <- as.character(x)
- sshhr(hm(x))
-}
-
-#' Convert variable to integer avoiding potential issues with factors
-#' @param x Input variable
-#' @return Integer
-#' @examples
-#' as_integer(rnorm(10))
-#' as_integer(letters)
-#' as_integer(as.factor(5:10))
-#' as.integer(as.factor(5:10))
-#' as_integer(c("a", "b"))
-#' as_integer(c("0", "1"))
-#' as_integer(as.factor(c("0", "1")))
-#'
-#' @export
-as_integer <- function(x) {
- if (is.factor(x)) {
- int <- sshhr(levels(x) %>% .[x] %>% as.integer())
- if (length(na.omit(int)) == 0) as.integer(x) else int
- } else if (is.character(x)) {
- int <- sshhr(as.integer(x))
- if (length(na.omit(int)) == 0) as_integer(as.factor(x)) else int
- } else {
- as.integer(x)
- }
-}
-
-#' Convert variable to numeric avoiding potential issues with factors
-#' @param x Input variable
-#' @return Numeric
-#' @examples
-#' as_numeric(rnorm(10))
-#' as_numeric(letters)
-#' as_numeric(as.factor(5:10))
-#' as.numeric(as.factor(5:10))
-#' as_numeric(c("a", "b"))
-#' as_numeric(c("3", "4"))
-#' as_numeric(as.factor(c("3", "4")))
-#'
-#' @export
-as_numeric <- function(x) {
- if (is.factor(x)) {
- num <- sshhr(levels(x) %>% .[x] %>% as.numeric())
- if (length(na.omit(num)) == 0) as.numeric(x) else num
- } else if (is.character(x)) {
- num <- sshhr(as.numeric(x))
- if (length(na.omit(num)) == 0) as_numeric(as.factor(x)) else num
- } else {
- as.numeric(x)
- }
-}
-
-#' Wrapper for factor with ordered = FALSE
-#' @param x Input vector
-#' @param ordered Order factor levels (TRUE, FALSE)
-#' @export
-as_factor <- function(x, ordered = FALSE) factor(x, ordered = ordered)
-
-#' Wrapper for as.character
-#' @param x Input vector
-#' @export
-as_character <- function(x) as.character(x)
-
-#' Wrapper for lubridate's as.duration function. Result converted to numeric
-#' @param x Time difference
-#' @export
-as_duration <- function(x) as.numeric(lubridate::as.duration(x))
-
-#' Distance in kilometers or miles between two locations based on lat-long
-#' Function based on \url{http://www.movable-type.co.uk/scripts/latlong.html}. Uses the haversine formula
-#' @param long1 Longitude of location 1
-#' @param lat1 Latitude of location 1
-#' @param long2 Longitude of location 2
-#' @param lat2 Latitude of location 2
-#' @param unit Measure kilometers ("km", default) or miles ("miles")
-#' @param R Radius of the earth
-#' @return Distance between two points
-#' @examples
-#' as_distance(32.8245525, -117.0951632, 40.7033127, -73.979681, unit = "km")
-#' as_distance(32.8245525, -117.0951632, 40.7033127, -73.979681, unit = "miles")
-#'
-#' @export
-as_distance <- function(lat1, long1, lat2, long2,
- unit = "km", R = c("km" = 6371, "miles" = 3959)[[unit]]) {
- rad <- pi / 180
- d1 <- lat1 * rad
- d2 <- lat2 * rad
- dlat <- (lat2 - lat1) * rad
- dlong <- (long2 - long1) * rad
- a <- sin(dlat / 2)^2 + cos(d1) * cos(d2) * sin(dlong / 2)^2
- c <- 2 * atan2(sqrt(a), sqrt(1 - a))
- R * c
-}
-
-#' Generate a variable used to selected a training sample
-#' @param n Number (or fraction) of observations to label as training
-#' @param nr Number of rows in the dataset
-#' @param blocks A vector to use for blocking or a data.frame from which to construct a blocking vector
-#' @param seed Random seed
-#'
-#' @return 0/1 variables for filtering
-#'
-#' @importFrom randomizr complete_ra block_ra
-#'
-#' @examples
-#' make_train(.5, 10)
-#' make_train(.5, 10) %>% table()
-#' make_train(100, 1000) %>% table()
-#' make_train(.15, blocks = mtcars$vs) %>% table() / nrow(mtcars)
-#' make_train(.10, blocks = iris$Species) %>% table() / nrow(iris)
-#' make_train(.5, blocks = iris[, c("Petal.Width", "Species")]) %>% table()
-#'
-#' @export
-make_train <- function(n = .7, nr = NULL, blocks = NULL, seed = 1234) {
- seed <- gsub("[^0-9]", "", seed)
- if (!is.empty(seed)) set.seed(seed)
-
- if (is.empty(nr) && is.empty(blocks)) {
- stop("Please provided the number of rows in the data (nr) or a vector with blocking information (blocks)")
- } else if (is.data.frame(blocks)) {
- blocks <- do.call(paste, c(blocks, sep = "-"))
- nr <- length(blocks)
- } else if (is.vector(blocks)) {
- nr <- length(blocks)
- }
-
- if (n > 1) n <- n / nr
-
- if (length(blocks) > 0) {
- randomizr::block_ra(blocks, prob = n)
- } else {
- randomizr::complete_ra(N = nr, prob = n)
- }
-}
-
-#' Add transformed variables to a data frame with the option to include a custom variable name extension
-#'
-#' @details Wrapper for dplyr::mutate_at that allows custom variable name extensions
-#'
-#' @param .tbl Data frame to add transformed variables to
-#' @param .funs Function(s) to apply (e.g., log)
-#' @param ... Variables to transform
-#' @param .ext Extension to add for each variable
-#' @param .vars A list of columns generated by dplyr::vars(), or a character vector of column names, or a numeric vector of column positions.
-#'
-#' @examples
-#' mutate_ext(mtcars, .funs = log, mpg, cyl, .ext = "_ln")
-#' mutate_ext(mtcars, .funs = log, .ext = "_ln")
-#' mutate_ext(mtcars, .funs = log)
-#' mutate_ext(mtcars, .funs = log, .ext = "_ln", .vars = vars(mpg, cyl))
-#'
-#' @export
-mutate_ext <- function(.tbl, .funs, ..., .ext = "", .vars = c()) {
- if (length(.vars) == 0) {
- ## from https://stackoverflow.com/a/35317870/1974918
- .vars <- sapply(substitute(list(...))[-1], deparse)
- if (length(.vars) == 0) {
- .vars <- colnames(.tbl)
- }
- }
-
- if (is.empty(.ext)) {
- dplyr::mutate_at(.tbl, .vars = .vars, .funs = .funs) %>%
- set_rownames(rownames(.tbl))
- } else {
- new <- gsub("^~", "", .vars) %>% paste0(., .ext)
- .tbl[, new] <- transmute_at(.tbl, .vars = .vars, .funs = .funs) %>%
- set_colnames(new)
- .tbl
- }
-}
-
-#' Split a numeric variable into a number of bins and return a vector of bin numbers
-#'
-#' @param x Numeric variable
-#' @param n number of bins to create
-#' @param rev Reverse the order of the bin numbers
-#' @param type An integer between 1 and 9 to select one of the quantile algorithms described in the help for the stats::quantile function
-#'
-#' @seealso See \link[stats]{quantile} for a description of the different algorithm types
-#'
-#' @examples
-#' xtile(1:10, 5)
-#' xtile(1:10, 5, rev = TRUE)
-#' xtile(c(rep(1, 6), 7:10), 5)
-#'
-#' @export
-xtile <- function(x, n = 5, rev = FALSE, type = 7) {
- if (!is.numeric(x)) {
- stop(paste0("The variable to bin must be of type {numeric} but is of type {", class(x)[1], "}"), call. = FALSE)
- } else if (n < 1) {
- stop(paste0("The number of bins must be > 1 but is ", n), call. = FALSE)
- } else if (length(x) < n) {
- stop(paste("The number of bins to create is larger than\nthe number of data points. Perhaps you grouped the data before\ncalling the xtile function and the number of observations per\ngroup is too small"), call. = FALSE)
- } else if (type < 1 || type > 9) {
- stop(paste("The value for type is", type, "but must be between 1 and 9"), call. = FALSE)
- }
-
- breaks <- quantile(x, prob = seq(0, 1, length = n + 1), na.rm = TRUE, type = type)
- if (length(breaks) < 2) stop(paste("Insufficient variation in x to construct", n, "breaks"), call. = FALSE)
- bins <- .bincode(x, breaks, include.lowest = TRUE)
-
- if (rev) as.integer((n + 1) - bins) else bins
-}
-
-#' Show all rows with duplicated values (not just the first or last)
-#'
-#' @details If an entire row is duplicated use "duplicated" to show only one of the duplicated rows. When using a subset of variables to establish uniqueness it may be of interest to show all rows that have (some) duplicate elements
-#'
-#' @param .tbl Data frame to add transformed variables to
-#' @param ... Variables used to evaluate row uniqueness
-#'
-#' @examples
-#' bind_rows(mtcars, mtcars[c(1, 5, 7), ]) %>%
-#' show_duplicated(mpg, cyl)
-#' bind_rows(mtcars, mtcars[c(1, 5, 7), ]) %>%
-#' show_duplicated()
-#'
-#' @export
-show_duplicated <- function(.tbl, ...) {
- .vars <- sapply(substitute(list(...))[-1], deparse)
- if (length(.vars) == 0 || length(unique(.vars)) == ncol(.tbl)) {
- filter(.tbl, duplicated(.tbl))
- } else {
- .tbl %>%
- group_by_at(.vars = .vars) %>%
- filter(n() > 1) %>%
- mutate(nr_dup = 1:n()) %>%
- arrange_at(.vars = .vars) %>%
- ungroup()
- }
-}
-
-#' Weighted standard deviation
-#'
-#' @details Calculate weighted standard deviation
-#'
-#' @param x Numeric vector
-#' @param wt Numeric vector of weights
-#' @param na.rm Remove missing values (default is TRUE)
-#'
-#' @export
-weighted.sd <- function(x, wt, na.rm = TRUE) {
- if (na.rm) {
- ind <- is.na(x) | is.na(wt)
- x <- x[!ind]
- wt <- wt[!ind]
- }
- wt <- wt / sum(wt)
- wm <- weighted.mean(x, wt)
- sqrt(sum(wt * (x - wm)^2))
-}
-
-#' Create data.frame summary
-#'
-#' @details Used in Radiant's Data > Transform tab
-#'
-#' @param dataset Data.frame
-#' @param dc Class for each variable
-#' @param dec Number of decimals to show
-#'
-#' @export
-get_summary <- function(dataset, dc = get_class(dataset), dec = 3) {
- isFct <- "factor" == dc
- isNum <- dc %in% c("numeric", "integer", "Duration")
- isDate <- "date" == dc
- isChar <- "character" == dc
- isLogic <- "logical" == dc
- isPeriod <- "period" == dc
- isTs <- "ts" == dc
-
- if (sum(isNum) > 0) {
- cn <- names(dc)[isNum]
-
- cat("Summarize numeric variables:\n")
- select(dataset, which(isNum)) %>%
- gather("variable", "values", !!cn, factor_key = TRUE) %>%
- group_by_at(.vars = "variable") %>%
- summarise_all(
- list(
- n_obs = n_obs,
- n_missing = n_missing,
- n_distinct = n_distinct,
- mean = mean,
- median = median,
- min = min,
- max = max,
- p25 = p25,
- p75 = p75,
- sd = sd,
- se = se
- ),
- na.rm = TRUE
- ) %>%
- data.frame(check.names = FALSE, stringsAsFactors = FALSE) %>%
- format_df(dec = dec, mark = ",") %>%
- set_colnames(c("", colnames(.)[-1])) %>%
- print(row.names = FALSE)
- cat("\n")
- }
-
- if (sum(isTs) > 0) {
- cn <- names(dc)[isTs]
-
- cat("Summarize time-series variables:\n")
- lapply(
- select(dataset, which(isTs)),
- function(x) {
- as.data.frame(x) %>%
- summarise_all(
- list(
- n_obs = n_obs,
- n_missing = n_missing,
- n_distinct = n_distinct,
- mean = mean,
- median = median,
- min = min,
- max = max,
- start = ~ attr(., "tsp")[1] %>% round(dec),
- end = ~ attr(., "tsp")[2] %>% round(dec),
- frequency = ~ attr(., "tsp")[3] %>% as.integer()
- ),
- na.rm = TRUE
- )
- }
- ) %>%
- bind_rows() %>%
- data.frame(check.names = FALSE, stringsAsFactors = FALSE) %>%
- data.frame(.vars = cn, .) %>%
- format_df(dec = 3, mark = ",") %>%
- set_colnames(c("", colnames(.)[-1])) %>%
- print(row.names = FALSE)
- cat("\n")
- }
-
- if (sum(isFct) > 0) {
- cat("Summarize factors:\n")
- select(dataset, which(isFct)) %>%
- summary(maxsum = 20) %>%
- print()
- cat("\n")
- }
-
- if (sum(isDate) > 0) {
- cat("Earliest dates:\n")
- select(dataset, which(isDate)) %>%
- summarise_all(min) %>%
- as.data.frame(stringsAsFactors = FALSE) %>%
- print(row.names = FALSE)
- cat("\nFinal dates:\n")
- select(dataset, which(isDate)) %>%
- summarise_all(max) %>%
- as.data.frame(stringsAsFactors = FALSE) %>%
- print(row.names = FALSE)
-
- cat("\n")
- }
-
- if (sum(isPeriod) > 0) {
- max_time <- function(x) sort(x) %>% tail(1)
- min_time <- function(x) sort(x) %>% head(1)
-
- cat("Earliest time:\n")
- select(dataset, which(isPeriod)) %>%
- summarise_all(min_time) %>%
- as.data.frame(stringsAsFactors = FALSE) %>%
- print(row.names = FALSE)
- cat("\nFinal time:\n")
- select(dataset, which(isPeriod)) %>%
- summarise_all(max_time) %>%
- as.data.frame(stringsAsFactors = FALSE) %>%
- print(row.names = FALSE)
- cat("\n")
- }
-
- if (sum(isChar) > 0) {
- ## finding unique elements can be slow for large files
- if (nrow(dataset) < 10^5) {
- cat("Summarize character variables (< 20 unique values shown):\n")
- select(dataset, which(isChar)) %>%
- lapply(unique) %>%
- (function(x) {
- for (i in names(x)) {
- cat(i, paste0("(n_distinct ", length(x[[i]]), "): "), x[[i]][1:min(20, length(x[[i]]))], "\n")
- }
- })
- } else {
- cat("Summarize character variables (< 20 values shown):\n")
- select(dataset, which(isChar)) %>%
- (function(x) {
- for (i in names(x)) {
- cat(i, ":", x[[i]][1:min(20, length(x[[i]]))], "\n")
- }
- })
- }
- cat("\n")
- }
- if (sum(isLogic) > 0) {
- cat("Summarize logical variables:\n")
- select(dataset, which(isLogic)) %>%
- summarise_all(list(x = ~ sum(., na.rm = TRUE), y = ~ mean(., na.rm = TRUE), z = ~ n_missing(.))) %>%
- round(dec) %>%
- matrix(ncol = 3) %>%
- as.data.frame(stringsAsFactors = FALSE) %>%
- set_colnames(c("# TRUE", "% TRUE", "n_missing")) %>%
- set_rownames(names(dataset)[isLogic]) %>%
- format(big.mark = ",", scientific = FALSE) %>%
- print()
- cat("\n")
- }
-}
-
-#' Create data.frame from a table
-#'
-#' @param dataset Data.frame
-#' @param freq Column name with frequency information
-#'
-#' @examples
-#' data.frame(price = c("$200", "$300"), sale = c(10, 2)) %>% table2data()
-#'
-#' @export
-table2data <- function(dataset, freq = tail(colnames(dataset), 1)) {
- if (!is.numeric(dataset[[freq]])) stop("The frequency variable must be numeric", call = FALSE)
- blowup <- function(i) {
- if (!is.na(dataset[[freq]][i])) dataset[rep(i, each = dataset[[freq]][i]), ]
- }
-
- lapply(seq_len(nrow(dataset)), blowup) %>%
- bind_rows() %>%
- select_at(.vars = base::setdiff(colnames(dataset), freq)) %>%
- mutate_all(as.factor)
-}
-
-#' Generate list of levels and unique values
-#'
-#' @param dataset A data.frame
-#' @param ... Unquoted variable names to evaluate
-#'
-#' @examples
-#' data.frame(a = c(rep("a", 5), rep("b", 5)), b = c(rep(1, 5), 6:10)) %>% level_list()
-#' level_list(mtcars, mpg, cyl)
-#'
-#' @export
-level_list <- function(dataset, ...) {
- fl <- function(x) {
- if ("factor" %in% class(x)) {
- levels(x)
- } else {
- unique(x)
- }
- }
- .vars <- sapply(substitute(list(...))[-1], deparse)
- if (length(.vars) > 0) {
- lapply(select_at(dataset, .vars = .vars), fl)
- } else {
- lapply(dataset, fl)
- }
-}
-
-#' Add ordered argument to lubridate::month
-#' @param x Input date vector
-#' @param label Month as label (TRUE, FALSE)
-#' @param abbr Abbreviate label (TRUE, FALSE)
-#' @param ordered Order factor (TRUE, FALSE)
-#'
-#' @importFrom lubridate month
-#'
-#' @seealso See the \code{\link[lubridate]{month}} function in the lubridate package for additional details
-#'
-#' @export
-month <- function(x, label = FALSE, abbr = TRUE, ordered = FALSE) {
- x <- lubridate::month(x, label = label, abbr = abbr)
- if (!ordered && label) {
- factor(x, ordered = FALSE)
- } else {
- x
- }
-}
-
-#' Add ordered argument to lubridate::wday
-#' @param x Input date vector
-#' @param label Weekday as label (TRUE, FALSE)
-#' @param abbr Abbreviate label (TRUE, FALSE)
-#' @param ordered Order factor (TRUE, FALSE)
-#'
-#' @importFrom lubridate wday
-#'
-#' @seealso See the \code{\link[lubridate:day]{lubridate::wday()}} function in the lubridate package for additional details
-#'
-#' @export
-wday <- function(x, label = FALSE, abbr = TRUE, ordered = FALSE) {
- x <- lubridate::wday(x, label = label, abbr = abbr)
- if (!ordered && label) {
- factor(x, ordered = FALSE)
- } else {
- x
- }
-}
-
-#' Remove/reorder levels
-#' @details Keep only a specific set of levels in a factor. By removing levels the base for comparison in, e.g., regression analysis, becomes the first level. To relabel the base use, for example, repl = 'other'
-#' @param x Character or Factor
-#' @param levs Set of levels to use
-#' @param repl String (or NA) used to replace missing levels
-#'
-#' @examples
-#' refactor(diamonds$cut, c("Premium", "Ideal")) %>% head()
-#' refactor(diamonds$cut, c("Premium", "Ideal"), "Other") %>% head()
-#'
-#' @export
-refactor <- function(x, levs = levels(x), repl = NA) {
- if (is.factor(x)) {
- lv <- levels(x)
- } else {
- lv <- unique(x)
- if (length(levs) == 0) levs <- lv
- }
-
- if (length(levs) > 0 && length(lv) > length(levs)) {
- if (!is.empty(repl)) levs <- unique(c(repl, levs))
- x <- as_character(x) %>% ifelse(. %in% base::setdiff(lv, levs), repl, .)
- }
-
- factor(x, levels = levs)
-}
-
-#' Convert a string of numbers into a vector
-#'
-#' @param x A string of numbers that may include fractions
-#'
-#' @importFrom MASS fractions
-#'
-#' @examples
-#' make_vec("1 2 4")
-#' make_vec("1/2 2/3 4/5")
-#' make_vec(0.1)
-#' @export
-make_vec <- function(x) {
- if (is.empty(x)) {
- return(NULL)
- } else if (!is.character(x)) {
- return(x)
- }
-
- any_frac <- FALSE
- check_frac <- function(x) {
- if (length(x) == 2) {
- any_frac <<- TRUE
- as.numeric(x[1]) / as.numeric(x[2])
- } else {
- as.numeric(x)
- }
- }
- x <- strsplit(x, "(\\s*,\\s*|\\s*;\\s*|\\s+)") %>%
- unlist() %>%
- strsplit("\\s*/\\s*") %>%
- sapply(check_frac)
-
- if (any_frac) {
- MASS::fractions(x)
- } else {
- x
- }
-}
-
-###############################
-## function below not exported
-###############################
+#' Center
+#' @param x Input variable
+#' @param na.rm If TRUE missing values are removed before calculation
+#' @return If x is a numeric variable return x - mean(x)
+#' @export
+center <- function(x, na.rm = TRUE) {
+ if (is.numeric(x)) {
+ x - mean(x, na.rm = na.rm)
+ } else {
+ x
+ }
+}
+
+#' Standardize
+#' @param x Input variable
+#' @param na.rm If TRUE missing values are removed before calculation
+#' @return If x is a numeric variable return (x - mean(x)) / sd(x)
+#' @export
+standardize <- function(x, na.rm = TRUE) {
+ if (is.numeric(x)) {
+ x_sd <- sd(x, na.rm = na.rm)
+ x <- x - mean(x, na.rm = na.rm)
+ if (isTRUE(x_sd > 0)) {
+ x / x_sd
+ } else {
+ x
+ }
+ } else {
+ x
+ }
+}
+
+#' Calculate square of a variable
+#' @param x Input variable
+#' @return x^2
+#' @export
+square <- function(x) x^2
+
+#' Calculate inverse of a variable
+#' @param x Input variable
+#' @return 1/x
+#' @export
+inverse <- function(x) {
+ if (is.numeric(x)) 1 / x else x
+}
+
+#' Normalize a variable x by a variable y
+#' @param x Input variable
+#' @param y Normalizing variable
+#' @return x/y
+#' @export
+normalize <- function(x, y) {
+ if (is.numeric(x) && is.numeric(y)) x / y else x
+}
+
+#' Convert input in month-day-year format to date
+#' @details Use as.character if x is a factor
+#' @param x Input variable
+#' @return Date variable of class Date
+#' @examples
+#' as_mdy("2-1-2014")
+#' \dontrun{
+#' as_mdy("2-1-2014") %>% month(label = TRUE)
+#' as_mdy("2-1-2014") %>% week()
+#' as_mdy("2-1-2014") %>% wday(label = TRUE)
+#' }
+#' @export
+as_mdy <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(mdy(x)) %>% as.Date()
+}
+
+#' Convert input in day-month-year format to date
+#' @param x Input variable
+#' @return Date variable of class Date
+#' @examples
+#' as_dmy("1-2-2014")
+#'
+#' @export
+as_dmy <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(dmy(x)) %>% as.Date()
+}
+
+#' Convert input in year-month-day format to date
+#' @param x Input variable
+#' @return Date variable of class Date
+#' @examples
+#' as_ymd("2013-1-1")
+#'
+#' @export
+as_ymd <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(ymd(x)) %>% as.Date()
+}
+
+# http://www.noamross.net/blog/2014/2/10/using-times-and-dates-in-r---presentation-code.html
+#' Convert input in year-month-day-hour-minute-second format to date-time
+#' @param x Input variable
+#' @return Date-time variable of class Date
+#' @examples
+#' as_ymd_hms("2014-1-1 12:15:01")
+#' \dontrun{
+#' as_ymd_hms("2014-1-1 12:15:01") %>% as.Date()
+#' as_ymd_hms("2014-1-1 12:15:01") %>% month()
+#' as_ymd_hms("2014-1-1 12:15:01") %>% hour()
+#' }
+#' @export
+as_ymd_hms <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(ymd_hms(x))
+}
+
+#' Convert input in year-month-day-hour-minute format to date-time
+#' @param x Input variable
+#' @return Date-time variable of class Date
+#' @examples
+#' as_ymd_hm("2014-1-1 12:15")
+#' @export
+as_ymd_hm <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(parse_date_time(x, "%Y%m%d %H%M"))
+}
+
+#' Convert input in month-day-year-hour-minute-second format to date-time
+#' @param x Input variable
+#' @return Date-time variable of class Date
+#' @examples
+#' as_mdy_hms("1-1-2014 12:15:01")
+#' @export
+as_mdy_hms <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(parse_date_time(x, "%m%d%Y %H%M%S"))
+}
+
+#' Convert input in month-day-year-hour-minute format to date-time
+#' @param x Input variable
+#' @return Date-time variable of class Date
+#' @examples
+#' as_mdy_hm("1-1-2014 12:15")
+#' @export
+as_mdy_hm <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(parse_date_time(x, "%m%d%Y %H%M"))
+}
+
+#' Convert input in day-month-year-hour-minute-second format to date-time
+#' @param x Input variable
+#' @return Date-time variable of class Date
+#' @examples
+#' as_mdy_hms("1-1-2014 12:15:01")
+#' @export
+as_dmy_hms <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(parse_date_time(x, "%d%m%Y %H%M%S"))
+}
+
+#' Convert input in day-month-year-hour-minute format to date-time
+#' @param x Input variable
+#' @return Date-time variable of class Date
+#' @examples
+#' as_mdy_hm("1-1-2014 12:15")
+#' @export
+as_dmy_hm <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(parse_date_time(x, "%d%m%Y %H%M"))
+}
+
+#' Convert input in hour-minute-second format to time
+#' @param x Input variable
+#' @return Time variable of class Period
+#' @examples
+#' as_hms("12:45:00")
+#' \dontrun{
+#' as_hms("12:45:00") %>% hour()
+#' as_hms("12:45:00") %>% second()
+#' }
+#' @export
+as_hms <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(hms(x))
+}
+
+#' Convert input in hour-minute format to time
+#' @param x Input variable
+#' @return Time variable of class Period
+#' @examples
+#' as_hm("12:45")
+#' \dontrun{
+#' as_hm("12:45") %>% minute()
+#' }
+#' @export
+as_hm <- function(x) {
+ if (is.factor(x)) x <- as.character(x)
+ sshhr(hm(x))
+}
+
+#' Convert variable to integer avoiding potential issues with factors
+#' @param x Input variable
+#' @return Integer
+#' @examples
+#' as_integer(rnorm(10))
+#' as_integer(letters)
+#' as_integer(as.factor(5:10))
+#' as.integer(as.factor(5:10))
+#' as_integer(c("a", "b"))
+#' as_integer(c("0", "1"))
+#' as_integer(as.factor(c("0", "1")))
+#'
+#' @export
+as_integer <- function(x) {
+ if (is.factor(x)) {
+ int <- sshhr(levels(x) %>% .[x] %>% as.integer())
+ if (length(na.omit(int)) == 0) as.integer(x) else int
+ } else if (is.character(x)) {
+ int <- sshhr(as.integer(x))
+ if (length(na.omit(int)) == 0) as_integer(as.factor(x)) else int
+ } else {
+ as.integer(x)
+ }
+}
+
+#' Convert variable to numeric avoiding potential issues with factors
+#' @param x Input variable
+#' @return Numeric
+#' @examples
+#' as_numeric(rnorm(10))
+#' as_numeric(letters)
+#' as_numeric(as.factor(5:10))
+#' as.numeric(as.factor(5:10))
+#' as_numeric(c("a", "b"))
+#' as_numeric(c("3", "4"))
+#' as_numeric(as.factor(c("3", "4")))
+#'
+#' @export
+as_numeric <- function(x) {
+ if (is.factor(x)) {
+ num <- sshhr(levels(x) %>% .[x] %>% as.numeric())
+ if (length(na.omit(num)) == 0) as.numeric(x) else num
+ } else if (is.character(x)) {
+ num <- sshhr(as.numeric(x))
+ if (length(na.omit(num)) == 0) as_numeric(as.factor(x)) else num
+ } else {
+ as.numeric(x)
+ }
+}
+
+#' Wrapper for factor with ordered = FALSE
+#' @param x Input vector
+#' @param ordered Order factor levels (TRUE, FALSE)
+#' @export
+as_factor <- function(x, ordered = FALSE) factor(x, ordered = ordered)
+
+#' Wrapper for as.character
+#' @param x Input vector
+#' @export
+as_character <- function(x) as.character(x)
+
+#' Wrapper for lubridate's as.duration function. Result converted to numeric
+#' @param x Time difference
+#' @export
+as_duration <- function(x) as.numeric(lubridate::as.duration(x))
+
+#' Distance in kilometers or miles between two locations based on lat-long
+#' Function based on \url{http://www.movable-type.co.uk/scripts/latlong.html}. Uses the haversine formula
+#' @param long1 Longitude of location 1
+#' @param lat1 Latitude of location 1
+#' @param long2 Longitude of location 2
+#' @param lat2 Latitude of location 2
+#' @param unit Measure kilometers ("km", default) or miles ("miles")
+#' @param R Radius of the earth
+#' @return Distance between two points
+#' @examples
+#' as_distance(32.8245525, -117.0951632, 40.7033127, -73.979681, unit = "km")
+#' as_distance(32.8245525, -117.0951632, 40.7033127, -73.979681, unit = "miles")
+#'
+#' @export
+as_distance <- function(lat1, long1, lat2, long2,
+ unit = "km", R = c("km" = 6371, "miles" = 3959)[[unit]]) {
+ rad <- pi / 180
+ d1 <- lat1 * rad
+ d2 <- lat2 * rad
+ dlat <- (lat2 - lat1) * rad
+ dlong <- (long2 - long1) * rad
+ a <- sin(dlat / 2)^2 + cos(d1) * cos(d2) * sin(dlong / 2)^2
+ c <- 2 * atan2(sqrt(a), sqrt(1 - a))
+ R * c
+}
+
+#' Generate a variable used to selected a training sample
+#' @param n Number (or fraction) of observations to label as training
+#' @param nr Number of rows in the dataset
+#' @param blocks A vector to use for blocking or a data.frame from which to construct a blocking vector
+#' @param seed Random seed
+#'
+#' @return 0/1 variables for filtering
+#'
+#' @importFrom randomizr complete_ra block_ra
+#'
+#' @examples
+#' make_train(.5, 10)
+#' make_train(.5, 10) %>% table()
+#' make_train(100, 1000) %>% table()
+#' make_train(.15, blocks = mtcars$vs) %>% table() / nrow(mtcars)
+#' make_train(.10, blocks = iris$Species) %>% table() / nrow(iris)
+#' make_train(.5, blocks = iris[, c("Petal.Width", "Species")]) %>% table()
+#'
+#' @export
+make_train <- function(n = .7, nr = NULL, blocks = NULL, seed = 1234) {
+ seed <- gsub("[^0-9]", "", seed)
+ if (!is.empty(seed)) set.seed(seed)
+
+ if (is.empty(nr) && is.empty(blocks)) {
+ stop("Please provided the number of rows in the data (nr) or a vector with blocking information (blocks)")
+ } else if (is.data.frame(blocks)) {
+ blocks <- do.call(paste, c(blocks, sep = "-"))
+ nr <- length(blocks)
+ } else if (is.vector(blocks)) {
+ nr <- length(blocks)
+ }
+
+ if (n > 1) n <- n / nr
+
+ if (length(blocks) > 0) {
+ randomizr::block_ra(blocks, prob = n)
+ } else {
+ randomizr::complete_ra(N = nr, prob = n)
+ }
+}
+
+#' Add transformed variables to a data frame with the option to include a custom variable name extension
+#'
+#' @details Wrapper for dplyr::mutate_at that allows custom variable name extensions
+#'
+#' @param .tbl Data frame to add transformed variables to
+#' @param .funs Function(s) to apply (e.g., log)
+#' @param ... Variables to transform
+#' @param .ext Extension to add for each variable
+#' @param .vars A list of columns generated by dplyr::vars(), or a character vector of column names, or a numeric vector of column positions.
+#'
+#' @examples
+#' mutate_ext(mtcars, .funs = log, mpg, cyl, .ext = "_ln")
+#' mutate_ext(mtcars, .funs = log, .ext = "_ln")
+#' mutate_ext(mtcars, .funs = log)
+#' mutate_ext(mtcars, .funs = log, .ext = "_ln", .vars = vars(mpg, cyl))
+#'
+#' @export
+mutate_ext <- function(.tbl, .funs, ..., .ext = "", .vars = c()) {
+ if (length(.vars) == 0) {
+ ## from https://stackoverflow.com/a/35317870/1974918
+ .vars <- sapply(substitute(list(...))[-1], deparse)
+ if (length(.vars) == 0) {
+ .vars <- colnames(.tbl)
+ }
+ }
+
+ if (is.empty(.ext)) {
+ dplyr::mutate_at(.tbl, .vars = .vars, .funs = .funs) %>%
+ set_rownames(rownames(.tbl))
+ } else {
+ new <- gsub("^~", "", .vars) %>% paste0(., .ext)
+ .tbl[, new] <- transmute_at(.tbl, .vars = .vars, .funs = .funs) %>%
+ set_colnames(new)
+ .tbl
+ }
+}
+
+#' Split a numeric variable into a number of bins and return a vector of bin numbers
+#'
+#' @param x Numeric variable
+#' @param n number of bins to create
+#' @param rev Reverse the order of the bin numbers
+#' @param type An integer between 1 and 9 to select one of the quantile algorithms described in the help for the stats::quantile function
+#'
+#' @seealso See \link[stats]{quantile} for a description of the different algorithm types
+#'
+#' @examples
+#' xtile(1:10, 5)
+#' xtile(1:10, 5, rev = TRUE)
+#' xtile(c(rep(1, 6), 7:10), 5)
+#'
+#' @export
+xtile <- function(x, n = 5, rev = FALSE, type = 7) {
+ if (!is.numeric(x)) {
+ stop(paste0("The variable to bin must be of type {numeric} but is of type {", class(x)[1], "}"), call. = FALSE)
+ } else if (n < 1) {
+ stop(paste0("The number of bins must be > 1 but is ", n), call. = FALSE)
+ } else if (length(x) < n) {
+ stop(paste("The number of bins to create is larger than\nthe number of data points. Perhaps you grouped the data before\ncalling the xtile function and the number of observations per\ngroup is too small"), call. = FALSE)
+ } else if (type < 1 || type > 9) {
+ stop(paste("The value for type is", type, "but must be between 1 and 9"), call. = FALSE)
+ }
+
+ breaks <- quantile(x, prob = seq(0, 1, length = n + 1), na.rm = TRUE, type = type)
+ if (length(breaks) < 2) stop(paste("Insufficient variation in x to construct", n, "breaks"), call. = FALSE)
+ bins <- .bincode(x, breaks, include.lowest = TRUE)
+
+ if (rev) as.integer((n + 1) - bins) else bins
+}
+
+#' Show all rows with duplicated values (not just the first or last)
+#'
+#' @details If an entire row is duplicated use "duplicated" to show only one of the duplicated rows. When using a subset of variables to establish uniqueness it may be of interest to show all rows that have (some) duplicate elements
+#'
+#' @param .tbl Data frame to add transformed variables to
+#' @param ... Variables used to evaluate row uniqueness
+#'
+#' @examples
+#' bind_rows(mtcars, mtcars[c(1, 5, 7), ]) %>%
+#' show_duplicated(mpg, cyl)
+#' bind_rows(mtcars, mtcars[c(1, 5, 7), ]) %>%
+#' show_duplicated()
+#'
+#' @export
+show_duplicated <- function(.tbl, ...) {
+ .vars <- sapply(substitute(list(...))[-1], deparse)
+ if (length(.vars) == 0 || length(unique(.vars)) == ncol(.tbl)) {
+ filter(.tbl, duplicated(.tbl))
+ } else {
+ .tbl %>%
+ group_by_at(.vars = .vars) %>%
+ filter(n() > 1) %>%
+ mutate(nr_dup = 1:n()) %>%
+ arrange_at(.vars = .vars) %>%
+ ungroup()
+ }
+}
+
+#' Weighted standard deviation
+#'
+#' @details Calculate weighted standard deviation
+#'
+#' @param x Numeric vector
+#' @param wt Numeric vector of weights
+#' @param na.rm Remove missing values (default is TRUE)
+#'
+#' @export
+weighted.sd <- function(x, wt, na.rm = TRUE) {
+ if (na.rm) {
+ ind <- is.na(x) | is.na(wt)
+ x <- x[!ind]
+ wt <- wt[!ind]
+ }
+ wt <- wt / sum(wt)
+ wm <- weighted.mean(x, wt)
+ sqrt(sum(wt * (x - wm)^2))
+}
+
+#' Create data.frame summary
+#'
+#' @details Used in Radiant's Data > Transform tab
+#'
+#' @param dataset Data.frame
+#' @param dc Class for each variable
+#' @param dec Number of decimals to show
+#'
+#' @export
+get_summary <- function(dataset, dc = get_class(dataset), dec = 3) {
+ isFct <- "factor" == dc
+ isNum <- dc %in% c("numeric", "integer", "Duration")
+ isDate <- "date" == dc
+ isChar <- "character" == dc
+ isLogic <- "logical" == dc
+ isPeriod <- "period" == dc
+ isTs <- "ts" == dc
+
+ if (sum(isNum) > 0) {
+ cn <- names(dc)[isNum]
+
+ cat("Summarize numeric variables:\n")
+ select(dataset, which(isNum)) %>%
+ gather("variable", "values", !!cn, factor_key = TRUE) %>%
+ group_by_at(.vars = "variable") %>%
+ summarise_all(
+ list(
+ n_obs = n_obs,
+ n_missing = n_missing,
+ n_distinct = n_distinct,
+ mean = mean,
+ median = median,
+ min = min,
+ max = max,
+ p25 = p25,
+ p75 = p75,
+ sd = sd,
+ se = se
+ ),
+ na.rm = TRUE
+ ) %>%
+ data.frame(check.names = FALSE, stringsAsFactors = FALSE) %>%
+ format_df(dec = dec, mark = ",") %>%
+ set_colnames(c("", colnames(.)[-1])) %>%
+ print(row.names = FALSE)
+ cat("\n")
+ }
+
+ if (sum(isTs) > 0) {
+ cn <- names(dc)[isTs]
+
+ cat("Summarize time-series variables:\n")
+ lapply(
+ select(dataset, which(isTs)),
+ function(x) {
+ as.data.frame(x) %>%
+ summarise_all(
+ list(
+ n_obs = n_obs,
+ n_missing = n_missing,
+ n_distinct = n_distinct,
+ mean = mean,
+ median = median,
+ min = min,
+ max = max,
+ start = ~ attr(., "tsp")[1] %>% round(dec),
+ end = ~ attr(., "tsp")[2] %>% round(dec),
+ frequency = ~ attr(., "tsp")[3] %>% as.integer()
+ ),
+ na.rm = TRUE
+ )
+ }
+ ) %>%
+ bind_rows() %>%
+ data.frame(check.names = FALSE, stringsAsFactors = FALSE) %>%
+ data.frame(.vars = cn, .) %>%
+ format_df(dec = 3, mark = ",") %>%
+ set_colnames(c("", colnames(.)[-1])) %>%
+ print(row.names = FALSE)
+ cat("\n")
+ }
+
+ if (sum(isFct) > 0) {
+ cat("Summarize factors:\n")
+ select(dataset, which(isFct)) %>%
+ summary(maxsum = 20) %>%
+ print()
+ cat("\n")
+ }
+
+ if (sum(isDate) > 0) {
+ cat("Earliest dates:\n")
+ select(dataset, which(isDate)) %>%
+ summarise_all(min) %>%
+ as.data.frame(stringsAsFactors = FALSE) %>%
+ print(row.names = FALSE)
+ cat("\nFinal dates:\n")
+ select(dataset, which(isDate)) %>%
+ summarise_all(max) %>%
+ as.data.frame(stringsAsFactors = FALSE) %>%
+ print(row.names = FALSE)
+
+ cat("\n")
+ }
+
+ if (sum(isPeriod) > 0) {
+ max_time <- function(x) sort(x) %>% tail(1)
+ min_time <- function(x) sort(x) %>% head(1)
+
+ cat("Earliest time:\n")
+ select(dataset, which(isPeriod)) %>%
+ summarise_all(min_time) %>%
+ as.data.frame(stringsAsFactors = FALSE) %>%
+ print(row.names = FALSE)
+ cat("\nFinal time:\n")
+ select(dataset, which(isPeriod)) %>%
+ summarise_all(max_time) %>%
+ as.data.frame(stringsAsFactors = FALSE) %>%
+ print(row.names = FALSE)
+ cat("\n")
+ }
+
+ if (sum(isChar) > 0) {
+ ## finding unique elements can be slow for large files
+ if (nrow(dataset) < 10^5) {
+ cat("Summarize character variables (< 20 unique values shown):\n")
+ select(dataset, which(isChar)) %>%
+ lapply(unique) %>%
+ (function(x) {
+ for (i in names(x)) {
+ cat(i, paste0("(n_distinct ", length(x[[i]]), "): "), x[[i]][1:min(20, length(x[[i]]))], "\n")
+ }
+ })
+ } else {
+ cat("Summarize character variables (< 20 values shown):\n")
+ select(dataset, which(isChar)) %>%
+ (function(x) {
+ for (i in names(x)) {
+ cat(i, ":", x[[i]][1:min(20, length(x[[i]]))], "\n")
+ }
+ })
+ }
+ cat("\n")
+ }
+ if (sum(isLogic) > 0) {
+ cat("Summarize logical variables:\n")
+ select(dataset, which(isLogic)) %>%
+ summarise_all(list(x = ~ sum(., na.rm = TRUE), y = ~ mean(., na.rm = TRUE), z = ~ n_missing(.))) %>%
+ round(dec) %>%
+ matrix(ncol = 3) %>%
+ as.data.frame(stringsAsFactors = FALSE) %>%
+ set_colnames(c("# TRUE", "% TRUE", "n_missing")) %>%
+ set_rownames(names(dataset)[isLogic]) %>%
+ format(big.mark = ",", scientific = FALSE) %>%
+ print()
+ cat("\n")
+ }
+}
+
+#' Create data.frame from a table
+#'
+#' @param dataset Data.frame
+#' @param freq Column name with frequency information
+#'
+#' @examples
+#' data.frame(price = c("$200", "$300"), sale = c(10, 2)) %>% table2data()
+#'
+#' @export
+table2data <- function(dataset, freq = tail(colnames(dataset), 1)) {
+ if (!is.numeric(dataset[[freq]])) stop("The frequency variable must be numeric", call = FALSE)
+ blowup <- function(i) {
+ if (!is.na(dataset[[freq]][i])) dataset[rep(i, each = dataset[[freq]][i]), ]
+ }
+
+ lapply(seq_len(nrow(dataset)), blowup) %>%
+ bind_rows() %>%
+ select_at(.vars = base::setdiff(colnames(dataset), freq)) %>%
+ mutate_all(as.factor)
+}
+
+#' Generate list of levels and unique values
+#'
+#' @param dataset A data.frame
+#' @param ... Unquoted variable names to evaluate
+#'
+#' @examples
+#' data.frame(a = c(rep("a", 5), rep("b", 5)), b = c(rep(1, 5), 6:10)) %>% level_list()
+#' level_list(mtcars, mpg, cyl)
+#'
+#' @export
+level_list <- function(dataset, ...) {
+ fl <- function(x) {
+ if ("factor" %in% class(x)) {
+ levels(x)
+ } else {
+ unique(x)
+ }
+ }
+ .vars <- sapply(substitute(list(...))[-1], deparse)
+ if (length(.vars) > 0) {
+ lapply(select_at(dataset, .vars = .vars), fl)
+ } else {
+ lapply(dataset, fl)
+ }
+}
+
+#' Add ordered argument to lubridate::month
+#' @param x Input date vector
+#' @param label Month as label (TRUE, FALSE)
+#' @param abbr Abbreviate label (TRUE, FALSE)
+#' @param ordered Order factor (TRUE, FALSE)
+#'
+#' @importFrom lubridate month
+#'
+#' @seealso See the \code{\link[lubridate]{month}} function in the lubridate package for additional details
+#'
+#' @export
+month <- function(x, label = FALSE, abbr = TRUE, ordered = FALSE) {
+ x <- lubridate::month(x, label = label, abbr = abbr)
+ if (!ordered && label) {
+ factor(x, ordered = FALSE)
+ } else {
+ x
+ }
+}
+
+#' Add ordered argument to lubridate::wday
+#' @param x Input date vector
+#' @param label Weekday as label (TRUE, FALSE)
+#' @param abbr Abbreviate label (TRUE, FALSE)
+#' @param ordered Order factor (TRUE, FALSE)
+#'
+#' @importFrom lubridate wday
+#'
+#' @seealso See the \code{\link[lubridate:day]{lubridate::wday()}} function in the lubridate package for additional details
+#'
+#' @export
+wday <- function(x, label = FALSE, abbr = TRUE, ordered = FALSE) {
+ x <- lubridate::wday(x, label = label, abbr = abbr)
+ if (!ordered && label) {
+ factor(x, ordered = FALSE)
+ } else {
+ x
+ }
+}
+
+#' Remove/reorder levels
+#' @details Keep only a specific set of levels in a factor. By removing levels the base for comparison in, e.g., regression analysis, becomes the first level. To relabel the base use, for example, repl = 'other'
+#' @param x Character or Factor
+#' @param levs Set of levels to use
+#' @param repl String (or NA) used to replace missing levels
+#'
+#' @examples
+#' refactor(diamonds$cut, c("Premium", "Ideal")) %>% head()
+#' refactor(diamonds$cut, c("Premium", "Ideal"), "Other") %>% head()
+#'
+#' @export
+refactor <- function(x, levs = levels(x), repl = NA) {
+ if (is.factor(x)) {
+ lv <- levels(x)
+ } else {
+ lv <- unique(x)
+ if (length(levs) == 0) levs <- lv
+ }
+
+ if (length(levs) > 0 && length(lv) > length(levs)) {
+ if (!is.empty(repl)) levs <- unique(c(repl, levs))
+ x <- as_character(x) %>% ifelse(. %in% base::setdiff(lv, levs), repl, .)
+ }
+
+ factor(x, levels = levs)
+}
+
+#' Convert a string of numbers into a vector
+#'
+#' @param x A string of numbers that may include fractions
+#'
+#' @importFrom MASS fractions
+#'
+#' @examples
+#' make_vec("1 2 4")
+#' make_vec("1/2 2/3 4/5")
+#' make_vec(0.1)
+#' @export
+make_vec <- function(x) {
+ if (is.empty(x)) {
+ return(NULL)
+ } else if (!is.character(x)) {
+ return(x)
+ }
+
+ any_frac <- FALSE
+ check_frac <- function(x) {
+ if (length(x) == 2) {
+ any_frac <<- TRUE
+ as.numeric(x[1]) / as.numeric(x[2])
+ } else {
+ as.numeric(x)
+ }
+ }
+ x <- strsplit(x, "(\\s*,\\s*|\\s*;\\s*|\\s+)") %>%
+ unlist() %>%
+ strsplit("\\s*/\\s*") %>%
+ sapply(check_frac)
+
+ if (any_frac) {
+ MASS::fractions(x)
+ } else {
+ x
+ }
+}
+
+###############################
+## function below not exported
+###############################
.recode. <- function(x, cmd) car::Recode(x, cmd)
\ No newline at end of file
diff --git a/radiant.data/inst/app/server.R b/radiant.data/inst/app/server.R
index a904171808d51d5932614f7e7834c7ddfec63a8e..0eb59fb230f0216e4ad9dd64aebe463cd90b9329 100644
--- a/radiant.data/inst/app/server.R
+++ b/radiant.data/inst/app/server.R
@@ -1,20 +1,99 @@
shinyServer(function(input, output, session) {
enc <- getOption("radiant.encoding", "UTF-8")
-
+
## source shared functions
source("init.R", encoding = enc, local = TRUE)
source("radiant.R", encoding = enc, local = TRUE)
-
+
## packages to use for example data
options(radiant.example.data = "radiant.data")
-
+
## source data & analysis tools
for (file in list.files(c("tools/app", "tools/data"), pattern = "\\.(r|R)$", full.names = TRUE)) {
source(file, encoding = enc, local = TRUE)
}
-
+
+ # ============================================================
+ # [新增功能] 企业级集成:Token鉴权 + 环境变量 + 自动加载
+ # ============================================================
+ observe({
+ # 1. 解析 URL 参数
+ query <- parseQueryString(session$clientData$url_search)
+
+ dataset_id <- query[['datasetId']]
+ token <- query[['token']]
+
+ # 2. 仅当 ID 和 Token 均存在时执行
+ if (!is.null(dataset_id) && !is.null(token)) {
+
+ # 定义 Radiant 内部使用的数据集名称 (例如: data_10086)
+ # 如果 URL 传了 name 参数就用 name,否则用 id 拼接
+ ds_name <- if (!is.null(query[['name']])) query[['name']] else paste0("data_", dataset_id)
+
+ # 3. 检查数据是否已存在 (防止重复加载)
+ if (is.null(r_data[[ds_name]])) {
+
+ withProgress(message = '正在从业务系统同步数据...', value = 0.2, {
+
+ # 4. 获取环境变量中的 API 基地址
+ api_base <- Sys.getenv("HOST_API_BASE","http://127.0.0.1:11999")
+
+ # 5. 拼接完整 API 路径
+ target_url <- paste0(api_base, "/disease-data/data/export/apply/apply/case?applyId=", dataset_id)
+
+ # 6. 创建临时文件 (明确 .xlsx 后缀)
+ tmp_file <- tempfile(fileext = ".xlsx")
+
+ tryCatch({
+ incProgress(0.3, detail = "正在鉴权并下载...")
+
+ # 7. 发起带 Token 的 HTTP 请求
+ response <- httr::POST(
+ url = target_url,
+ # 添加 Bearer Token (或根据你的接口要求修改 Header)
+ httr::add_headers(Authorization = paste("Bearer", token)),
+ # 将结果写入磁盘
+ httr::write_disk(tmp_file, overwrite = TRUE)
+ )
+
+ # 检查 HTTP 状态码
+ if (httr::status_code(response) != 200) {
+ stop(paste("下载失败,请手动导入,HTTP状态码:", httr::status_code(response)))
+ }
+
+ incProgress(0.7, detail = "解析并导入 Radiant...")
+
+ # 8. 复用 Radiant 核心加载函数 (manage_ui.R 中定义)
+ # 这会自动完成读取、转因子、生成R代码、注册到下拉框等所有动作
+ load_user_data(
+ fname = paste0(ds_name, ".xlsx"), # 虚拟文件名
+ uFile = tmp_file, # 实际文件路径
+ ext = "xlsx",
+ xlsx_sheet = 1,
+ xlsx_header = TRUE,
+ man_str_as_factor = TRUE
+ )
+
+ # 9. 界面联动:选中数据并跳转到视图
+ updateSelectInput(session, "dataset", selected = ds_name)
+ updateTabsetPanel(session, "nav_radiant", selected = "Data")
+ updateTabsetPanel(session, "tabs_data", selected = "View") # 或者 "Visualize"
+
+ showNotification(paste("数据集", ds_name, "加载成功!"), type = "message")
+
+ }, error = function(e) {
+ showNotification(paste("数据同步失败,请手动导入数据:", e$message), type = "error", duration = 10)
+ #调试打印 print(e)
+ })
+ })
+ }
+ }
+ })
+ # ============================================================
+
+
# dataviewer_proxy <- DT::dataTableProxy("dataviewer", session)
-
+
# observe(session$setCurrentTheme(
# if (isTRUE(input$dark_mode)) {
# bslib::bs_theme(version = 4, bg = "black", fg = "white")
@@ -22,7 +101,7 @@ shinyServer(function(input, output, session) {
# bslib::bs_theme(version = 4)
# }
# ))
-
+
## save state on refresh or browser close
saveStateOnRefresh(session)
})
diff --git a/radiant.data/inst/app/tools/data/manage_ui.R b/radiant.data/inst/app/tools/data/manage_ui.R
index 4bf94985e971a0a75c49ea1e74a5c83053221eb7..a3097f896f96f431e869819b968bff87bf2cf03e 100644
--- a/radiant.data/inst/app/tools/data/manage_ui.R
+++ b/radiant.data/inst/app/tools/data/manage_ui.R
@@ -50,8 +50,7 @@ output$ui_fileUpload <- renderUI({
)
} else if (input$dataType == "xlsx") {
tagList(
- make_uploadfile(accept = c(".xlsx", ".xls")),
- make_description_uploadfile(accept = c(".md", ".txt"))
+ make_uploadfile(accept = c(".xlsx", ".xls"))
)
} else if (input$dataType == "url_rds") {
with(tags, table(
@@ -229,11 +228,6 @@ output$ui_Manage <- renderUI({
),
conditionalPanel(
"input.dataType == 'xlsx'",
- numericInput(
- "xlsx_sheet",
- label = i18n$t("Sheet index (1-based):"),
- value = 1, min = 1, step = 1
- ),
checkboxInput(
"xlsx_header",
label = i18n$t("First row as header"),
@@ -514,19 +508,42 @@ observeEvent(input$uploadfile, {
withProgress(message = "Loading ...", value = 1, {
for (i in 1:nrow(inFile)) {
- # 区分文件类型,传递对应参数
if (input$dataType == "xlsx") {
- # 调用load_user_data,传递xlsx专属参数
- load_user_data(
- fname = as.character(inFile[i, "name"]),
- uFile = as.character(inFile[i, "datapath"]),
- ext = "xlsx", # 明确指定ext为xlsx
- xlsx_sheet = input$xlsx_sheet, # 从UI获取工作表索引
- xlsx_header = input$xlsx_header, # 从UI获取表头设置
- man_str_as_factor = TRUE # xlsx也支持“字符串转因子”
- )
- } else if (input$dataType %in% c("csv", "url_csv")) {
- # 原有CSV参数传递
+ uFile <- as.character(inFile[i, "datapath"])
+ # 获取xlsx文件中所有sheet名称
+ sheets <- try(readxl::excel_sheets(uFile), silent = TRUE)
+ if (inherits(sheets, "try-error")) {
+ showNotification(i18n$t("Failed to read the sheet list from the xlsx file. Please check if the file is corrupted or properly formatted "), type = "error")
+ next # 跳过当前文件
+ }
+
+ # 循环读取每个sheet,先带后缀加载,再重命名
+ for (sheet in sheets) {
+ # 1. 临时文件名(带.xlsx后缀,满足函数校验)
+ temp_fname <- paste0(sheet, ".xlsx")
+ # 2. 加载数据
+ load_user_data(
+ fname = temp_fname, # 临时文件名:sheet名.xlsx
+ uFile = uFile,
+ ext = "xlsx",
+ xlsx_sheet = sheet, # 按sheet名称读取
+ xlsx_header = input$xlsx_header,
+ man_str_as_factor = TRUE
+ )
+
+ # 3. 重命名数据集:从“sheet名.xlsx”改为“sheet名”
+ # 检查是否已存在同名数据集(避免覆盖)
+ if (!is.null(r_data[[sheet]])) {
+ sheet_new <- paste0(sheet, "_", length(grep(paste0("^", sheet), names(r_data))) + 1)
+ sheet <- sheet_new
+ }
+ # 执行重命名
+ r_data[[sheet]] <- r_data[[temp_fname]]
+ # 删除临时数据集
+ rm(list = temp_fname, envir = r_data)
+ }
+ }
+ else if (input$dataType %in% c("csv", "url_csv")) {
load_user_data(
fname = as.character(inFile[i, "name"]),
uFile = as.character(inFile[i, "datapath"]),
diff --git a/radiant.data/inst/app/tools/data/transform_ui.R b/radiant.data/inst/app/tools/data/transform_ui.R
index 033819c71027d5f355ab11161de3a6bf3c3099d3..9ee35969348c41e6ec89cd3c441f6ed360c92524 100644
--- a/radiant.data/inst/app/tools/data/transform_ui.R
+++ b/radiant.data/inst/app/tools/data/transform_ui.R
@@ -1,1463 +1,1407 @@
-## UI-elements for transform
-output$ui_tr_vars <- renderUI({
- vars <- varnames()
- req(available(vars))
- selectInput(
- "tr_vars", i18n$t("Select variable(s):"),
- choices = vars,
- multiple = TRUE,
- size = min(8, length(vars)),
- selectize = FALSE
- )
-})
-
-output$ui_tr_replace <- renderUI({
- validate(
- need(available(input$tr_vars), i18n$t("Select one or more variables to replace"))
- )
- vars <- varnames()
- selectInput(
- "tr_replace", i18n$t("Select replacement variables:"),
- choices = vars,
- multiple = TRUE, size = min(2, length(vars)), selectize = FALSE
- )
-})
-
-output$ui_tr_normalizer <- renderUI({
- isNum <- .get_class() %in% c("numeric", "integer", "ts")
- vars <- varnames()[isNum]
- if (length(vars) == 0) {
- return()
- }
- selectInput(
- "tr_normalizer", i18n$t("Normalizing variable:"),
- choices = setNames(
- c("none", vars),
- c(i18n$t("None"), vars)
- ),
- selected = "none"
- )
-})
-
-output$ui_tr_tab2dat <- renderUI({
- isNum <- .get_class() %in% c("numeric", "integer", "ts")
- vars <- varnames()[isNum]
- selectInput(
- "tr_tab2dat", i18n$t("Frequency variable:"),
- choices = setNames(
- c("none", vars),
- c(i18n$t("None"), vars)
- ),
- selected = "none"
- )
-})
-
-output$ui_tr_gather <- renderUI({
- tagList(
- tags$table(
- tags$td(returnTextInput("tr_gather_key", i18n$t("Key name:"), value = "key")),
- tags$td(returnTextInput("tr_gather_value", i18n$t("Value name:"), value = "value"))
- )
- )
-})
-
-output$ui_tr_spread <- renderUI({
- req(input$tr_change_type)
- vars <- c("None" = "none", varnames())
- tagList(
- selectizeInput(
- "tr_spread_key", i18n$t("Key(s):"),
- choices = vars[-1],
- selected = NULL, multiple = TRUE,
- options = list(placeholder = i18n$t("None"), plugins = list("remove_button", "drag_drop"))
- ),
- selectInput("tr_spread_value", i18n$t("Value:"), choices = vars, selected = "none", multiple = FALSE),
- numericInput("tr_spread_fill", i18n$t("Fill:"), value = NA)
- )
-})
-
-output$ui_tr_reorg_vars <- renderUI({
- req(input$tr_change_type)
- vars <- varnames()
- validate(
- need(length(vars) < 101, i18n$t("Interactive re-ordering is only supported up to 100 variables. See ?dplyr::select for information on how to re-order variables in R"))
- )
- selectizeInput(
- "tr_reorg_vars", i18n$t("Reorder/remove variables:"),
- choices = vars,
- selected = vars, multiple = TRUE,
- options = list(placeholder = i18n$t("Select variable(s)"), plugins = list("remove_button", "drag_drop"))
- )
-})
-
-output$ui_tr_reorg_levs <- renderUI({
- req(input$tr_change_type)
- validate(
- need(available(input$tr_vars), i18n$t("Select a single variable of type factor or character"))
- )
- fctCol <- input$tr_vars[1]
- fct <- .get_data_transform()[[fctCol]]
- levs <- if (is.factor(fct)) levels(fct) else levels(as_factor(fct))
- validate(
- need(length(levs) < 101, i18n$t("Interactive re-ordering is only supported up to 100 levels. See ?radiant.data::refactor for information on how to re-order levels in R"))
- )
- tagList(
- selectizeInput(
- "tr_reorg_levs", i18n$t("Reorder/remove levels:"),
- choices = levs,
- selected = levs, multiple = TRUE,
- options = list(placeholder = i18n$t("Select level(s)"), plugins = list("remove_button", "drag_drop"))
- ),
- textInput(
- "tr_rorepl", i18n$t("Replacement level name:"),
- placeholder = i18n$t("Provide name for missing levels"),
- value = NA
- )
- )
-})
-
-transform_auto_complete <- reactive({
- req(input$dataset)
- comps <- list(r_info[["datasetlist"]][input$dataset], as.vector(varnames()))
- names(comps) <- c("{datasets}", paste0("{", input$dataset, "}"))
- comps
-})
-
-output$ui_tr_log <- renderUI({
- tagList(
- HTML(paste0("
")),
- shinyAce::aceEditor(
- "tr_log",
- mode = "r",
- theme = getOption("radiant.ace_theme", default = "tomorrow"),
- wordWrap = TRUE,
- debounce = 0,
- value = state_init("tr_log", "") %>% fix_smart(),
- vimKeyBinding = getOption("radiant.ace_vim.keys", default = FALSE),
- tabSize = getOption("radiant.ace_tabSize", 2),
- useSoftTabs = getOption("radiant.ace_useSoftTabs", TRUE),
- showInvisibles = getOption("radiant.ace_showInvisibles", FALSE),
- autoScrollEditorIntoView = TRUE,
- autoComplete = getOption("radiant.ace_autoComplete", "enable"),
- autoCompleters = c("static", "rlang"),
- autoCompleteList = isolate(transform_auto_complete()),
- minLines = 5,
- maxLines = 15
- )
- )
-})
-
-transform_annotater <- shinyAce::aceAnnotate("tr_log")
-transform_tooltip <- shinyAce::aceTooltip("tr_log")
-transform_ac <- shinyAce::aceAutocomplete("tr_log")
-
-observe({
- shinyAce::updateAceEditor(
- session, "tr_log",
- autoCompleters = c("static", "rlang"),
- autoCompleteList = transform_auto_complete()
- )
-})
-
-
-ext_options <- list(
- "none" = "", "log" = "_ln", "exp" = "_exp",
- "square" = "_sq", "sqrt" = "_sqrt", "center" = "_ct",
- "standardize" = "_st", "inverse" = "_inv"
-)
-
-output$ui_tr_ext <- renderUI({
- trfun <- input$tr_transfunction
- if (is.empty(trfun)) trfun <- "none"
- returnTextInput(
- "tr_ext", i18n$t("Variable name extension:"),
- value = ext_options[[trfun]]
- )
-})
-
-output$ui_tr_ext_nz <- renderUI({
- if (is.empty(input$tr_normalizer, "none")) {
- return()
- }
- returnTextInput(
- "tr_ext_nz", i18n$t("Variable name extension:"),
- value = paste0("_", input$tr_normalizer)
- )
-})
-
-output$ui_tr_rcname <- renderUI({
- if (is.empty(input$tr_vars)) {
- return()
- }
- returnTextInput(
- "tr_rcname", i18n$t("Recoded variable name:"),
- value = paste0(input$tr_vars[1], "_rc")
- )
-})
-
-output$ui_tr_ext_bin <- renderUI({
- if (is.empty(input$tr_vars)) {
- return()
- }
- returnTextInput(
- "tr_ext_bin", i18n$t("Variable name extension:"),
- value = "_dec"
- )
-})
-
-output$ui_tr_roname <- renderUI({
- if (is.empty(input$tr_vars)) {
- return()
- }
- returnTextInput(
- "tr_roname", i18n$t("Variable name:"),
- value = input$tr_vars[1]
- )
-})
-
-output$ui_tr_typename <- renderUI({
- if (is.empty(input$tr_vars)) {
- return()
- }
- returnTextInput(
- "tr_typename", i18n$t("Variable name extension:"),
- value = "",
- placeholder = i18n$t("Add extension to variable name")
- )
-})
-
-output$ui_tr_rename <- renderUI({
- validate(
- need(available(input$tr_vars), i18n$t("Select one or more variables to rename"))
- )
- if (length(input$tr_vars) < 2) {
- mess <- i18n$t("Type a new name for the selected variable and press return")
- } else {
- mess <- i18n$t("Type new names for the selected variables, separated by a , and press return")
- }
- returnTextAreaInput(
- "tr_rename", i18n$t("Rename variable(s):"),
- value = "",
- rows = 3,
- placeholder = mess
- )
-})
-
-output$ui_tr_dataset <- renderUI({
- tr_dataset <- input$dataset
- if (input$tr_change_type == "show_dup") {
- tr_dataset <- paste0(tr_dataset, "_dup")
- } else if (input$tr_change_type == "holdout") {
- tr_dataset <- paste0(tr_dataset, "_holdout")
- } else if (input$tr_change_type == "tab2dat") {
- tr_dataset <- paste0(tr_dataset, "_dat")
- } else if (input$tr_change_type == "gather") {
- tr_dataset <- paste0(tr_dataset, "_gathered")
- } else if (input$tr_change_type == "spread") {
- tr_dataset <- paste0(tr_dataset, "_spread")
- } else if (input$tr_change_type == "expand") {
- tr_dataset <- paste0(tr_dataset, "_expand")
- }
- tags$table(
- tags$td(textInput("tr_name", i18n$t("Store changes in:"), tr_dataset)),
- tags$td(actionButton("tr_store", i18n$t("Store"), icon = icon("plus", verify_fa = FALSE), class = "btn-success"), class = "top")
- )
-})
-
-trans_options <- setNames(
- c("none", "log", "exp", "square", "sqrt", "center", "standardize", "inverse"),
- c(
- i18n$t("None"),
- i18n$t("Ln (natural log)"),
- i18n$t("Exp"),
- i18n$t("Square"),
- i18n$t("Square‑root"),
- i18n$t("Center"),
- i18n$t("Standardize"),
- i18n$t("Inverse")
- )
-)
-
-type_options <- setNames(
- c(
- "none", "as_factor", "as_numeric", "as_integer", "as_character", "ts",
- "as_mdy", "as_dmy", "as_ymd",
- "as_mdy_hms", "as_mdy_hm", "as_dmy_hms", "as_dmy_hm",
- "as_ymd_hms", "as_ymd_hm"
- ),
- c(
- i18n$t("None"),
- i18n$t("As factor"),
- i18n$t("As numeric"),
- i18n$t("As integer"),
- i18n$t("As character"),
- i18n$t("As time series"),
- i18n$t("As date (mdy)"),
- i18n$t("As date (dmy)"),
- i18n$t("As date (ymd)"),
- i18n$t("As date/time (mdy_hms)"),
- i18n$t("As date/time (mdy_hm)"),
- i18n$t("As date/time (dmy_hms)"),
- i18n$t("As date/time (dmy_hm)"),
- i18n$t("As date/time (ymd_hms)"),
- i18n$t("As date/time (ymd_hm)")
- )
-)
-
-trans_types <- list(
- ` ` = i18n$t("None (summarize)"),
-
- `Change variable(s)` = setNames(
- c("Bin", "Change type", "Normalize", "Recode", "Remove/reorder levels", "Rename", "Replace", "Transform"),
- c(i18n$t("Bin"), i18n$t("Change type"), i18n$t("Normalize"), i18n$t("Recode"),
- i18n$t("Remove/reorder levels"), i18n$t("Rename"), i18n$t("Replace"), i18n$t("Transform"))
- ),
-
- `Create new variable(s)` = setNames(
- c("Clipboard", "Create"),
- c(i18n$t("Clipboard"), i18n$t("Create"))
- ),
-
- `Clean data` = setNames(
- c("Remove missing values", "Remove/reorder variables", "Remove duplicates", "Show duplicates"),
- c(i18n$t("Remove missing values"), i18n$t("Remove/reorder variables"), i18n$t("Remove duplicates"), i18n$t("Show duplicates"))
- ),
-
- `Expand data` = setNames(
- c("Expand grid", "Table‑to‑data"),
- c(i18n$t("Expand grid"), i18n$t("Table‑to‑data"))
- ),
-
- `Split data` = setNames(
- c("Holdout sample", "Training variable"),
- c(i18n$t("Holdout sample"), i18n$t("Training variable"))
- ),
-
- `Tidy data` = setNames(
- c("Gather columns", "Spread column"),
- c(i18n$t("Gather columns"), i18n$t("Spread column"))
- )
-)
-
-
-
-output$ui_Transform <- renderUI({
- ## Inspired by Ian Fellow's transform ui in JGR/Deducer
- tagList(
- wellPanel(
- checkboxInput("tr_hide", i18n$t("Hide summaries"), state_init("tr_hide", FALSE)),
- uiOutput("ui_tr_vars"),
- selectizeInput("tr_change_type", i18n$t("Transformation type:"), trans_types, selected = "none"),
- conditionalPanel(
- condition = "input.tr_change_type == 'type'",
- selectInput("tr_typefunction", i18n$t("Change variable type:"), type_options, selected = "none"),
- conditionalPanel(
- condition = "input.tr_typefunction == 'ts'",
- tags$table(
- tags$td(numericInput("tr_ts_start_year", label = i18n$t("Start year:"), min = 1, value = NA)),
- tags$td(numericInput("tr_ts_start_period", label = i18n$t("Start period:"), min = 1, value = 1))
- ),
- tags$table(
- tags$td(numericInput("tr_ts_end_year", label = i18n$t("End year:"), value = NA)),
- tags$td(numericInput("tr_ts_end_period", label = i18n$t("End period:"), value = NA))
- ),
- numericInput("tr_ts_frequency", label = i18n$t("Frequency:"), min = 1, value = 52)
- )
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'transform'",
- selectInput("tr_transfunction", i18n$t("Apply function:"), trans_options)
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'normalize'",
- uiOutput("ui_tr_normalizer")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'tab2dat'",
- uiOutput("ui_tr_tab2dat")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'gather'",
- uiOutput("ui_tr_gather")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'spread'",
- uiOutput("ui_tr_spread")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'create'",
- returnTextAreaInput(
- "tr_create", i18n$t("Create:"),
- rows = 3,
- placeholder = i18n$t("Type a formula to create a new variable (e.g., x = y - z) and press return")
- )
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'bin'",
- numericInput("tr_bin_n", label = i18n$t("Nr bins:"), min = 2, value = 10),
- checkboxInput("tr_bin_rev", i18n$t("Reverse order"), value = FALSE),
- uiOutput("ui_tr_ext_bin")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'training'",
- tags$table(
- tags$td(numericInput("tr_training_n", label = i18n$t("Size:"), min = 0, value = .7)),
- tags$td(textInput("tr_training", i18n$t("Variable name:"), "training"))
- ),
- numericInput("tr_training_seed", label = i18n$t("Seed:"), value = 1234)
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'holdout'",
- checkboxInput("tr_holdout_rev", i18n$t("Reverse filter and slice"), value = TRUE)
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'clip'",
- textAreaInput(
- "tr_paste", i18n$t("Paste from spreadsheet:"),
- rows = 3,
- value = "",
- resize = "vertical",
- placeholder = i18n$t("Copy-and-paste data with a header row from a spreadsheet"),
- )
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'recode'",
- returnTextAreaInput(
- "tr_recode", i18n$t("Recode:"),
- value = "",
- rows = 3,
- placeholder = i18n$t("Select a variable, specify how it should be recoded (e.g., lo:20 = 0; else = 1), and press return")
- )
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'rename'",
- uiOutput("ui_tr_rename")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'replace'",
- uiOutput("ui_tr_replace")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'reorg_vars'",
- uiOutput("ui_tr_reorg_vars")
- ),
- conditionalPanel(
- condition = "input.tr_change_type == 'reorg_levs'",
- uiOutput("ui_tr_reorg_levs")
- ),
- conditionalPanel(
- "input.tr_change_type == 'transform'",
- uiOutput("ui_tr_ext")
- ),
- conditionalPanel(
- "input.tr_change_type == 'recode'",
- uiOutput("ui_tr_rcname")
- ),
- conditionalPanel(
- "input.tr_change_type == 'normalize'",
- uiOutput("ui_tr_ext_nz")
- ),
- conditionalPanel(
- "input.tr_change_type == 'reorg_levs'",
- uiOutput("ui_tr_roname")
- ),
- conditionalPanel(
- "input.tr_change_type == 'type'",
- uiOutput("ui_tr_typename")
- )
- ),
- conditionalPanel(
- "input.tr_change_type != 'none'",
- wellPanel(uiOutput("ui_tr_dataset"))
- ),
- help_and_report(
- modal_title = i18n$t("Transform"),
- fun_name = "transform",
- help_file = inclMD(file.path(getOption("radiant.path.data"), "app/tools/help/transform.md")),
- lic = "by-sa"
- )
- )
-})
-
-## ensure no variables are selected 'by accident' when creating a new variable
-observeEvent(input$tr_change_type, {
- if (input$tr_change_type == "create") {
- updateSelectInput(session = session, inputId = "tr_vars", label = i18n$t("Group by:"), selected = character(0))
- } else if (input$tr_change_type == "training") {
- updateSelectInput(session = session, inputId = "tr_vars", label = i18n$t("Block by:"), selected = character(0))
- } else if (input$tr_change_type == "spread") {
- updateSelectInput(session = session, inputId = "tr_vars", selected = character(0))
- } else {
- updateSelectInput(session = session, inputId = "tr_vars", label = i18n$t("Select variables:"))
- }
-})
-
-fix_ext <- function(ext) {
- gsub("(^\\s+|\\s+$)", "", ext) %>%
- gsub("\\s+", "_", .) %>%
- gsub("[[:punct:]]", "_", .) %>%
- gsub("\\.{2,}", ".", .) %>%
- gsub("_{2,}", "_", .)
-}
-
-.change_type <- function(dataset, fun, tr_ts, vars = "", .ext = "",
- store_dat = "", store = TRUE) {
- .ext <- fix_ext(.ext)
-
- if (!is.empty(tr_ts)) {
- tr_ts <- lapply(tr_ts, function(x) x[!is.na(x)]) %>%
- (function(x) x[sapply(x, length) > 0])
- }
-
- if (!store || !is.character(dataset)) {
- fun <- get(fun)
- if (is.empty(.ext)) {
- do.call(mutate_at, c(list(.tbl = dataset, .vars = vars), .funs = fun, tr_ts))
- } else {
- do.call(mutate_at, c(list(.tbl = dataset, .vars = vars), .funs = fun, tr_ts)) %>%
- set_colnames(paste0(vars, .ext))
- }
- } else {
- if (store_dat == "") store_dat <- dataset
- if (is.empty(tr_ts)) {
- tr_ts <- ""
- } else {
- tr_ts <- deparse(tr_ts, control = getOption("dctrl"), width.cutoff = 500L) %>%
- sub("list\\(", ", ", .) %>%
- sub("\\)$", "", .)
- }
-
- if (is.empty(.ext)) {
- paste0(i18n$t("## change variable type\n"), store_dat, " <- mutate_at(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, tr_ts, ")\n")
- } else {
- paste0(i18n$t("## change variable type\n"), store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, tr_ts, ", .ext = \"", .ext, "\")\n")
- }
- }
-}
-
-.transform <- function(dataset, fun, vars = "", .ext = "",
- store_dat = "", store = TRUE) {
- .ext <- fix_ext(.ext)
-
- if (!store && !is.character(dataset)) {
- fun <- get(fun)
- if (is.empty(.ext)) {
- result <- try(mutate_at(dataset, .vars = vars, .funs = fun), silent = TRUE)
- } else {
- result <- try(mutate_at(dataset, .vars = vars, .funs = fun) %>% set_colnames(paste0(vars, .ext)), silent = TRUE)
- }
- if (inherits(result, "try-error")) {
- paste0(
- "\n", i18n$t("The transformation type you selected generated an error."), "\n\n",
- i18n$t("The error message was:"), "\n\n",
- attr(result, "condition")$message, "\n\n",
- i18n$t("Please change the selection of variables or the transformation type and try again.")
- )
- } else {
- result
- }
- } else {
- if (store_dat == "") store_dat <- dataset
- if (is.empty(.ext)) {
- paste0(i18n$t("## transform variable\n"), store_dat, " <- mutate_at(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, ")\n")
- } else {
- paste0(i18n$t("## transform variable\n"), store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, ", .ext = \"", .ext, "\")\n")
- }
- }
-}
-
-.create <- function(dataset, cmd, byvar = "",
- store_dat = "", store = TRUE) {
- ## replacing problem symbols (e.g., em dash, and curly quotes)
- cmd <- fix_smart(cmd)
-
- if (!store || !is.character(dataset)) {
- if (is.empty(cmd)) {
- return(dataset)
- }
-
- cmd <- gsub("\"", "\'", cmd) %>%
- gsub("<-", "=", .)
- vars <- strsplit(cmd, ";\\s*")[[1]] %>%
- strsplit("=") %>%
- sapply("[", 1) %>%
- gsub("\\s+", "", .)
-
- ## in case the create command tries to over-write the group-by variable ...
- if (any(byvar %in% vars)) {
- byvar <- base::setdiff(byvar, vars)
- updateSelectInput(session = session, inputId = "tr_vars", selected = character(0))
- }
-
- ## useful if functions created in Report > R and Report > Rmd are
- ## called in Data > Transform > Create
- ## add environment to do.call call instead?
- ## https://stackoverflow.com/questions/26028488/do-call-specify-environment-inside-function
- attach(r_data)
- on.exit(detach(r_data))
-
- if (is.empty(byvar)) {
- ## using within and do.call because it provides better err messages
- nvar <- try(do.call(within, list(dataset, parse(text = cmd))), silent = TRUE)
- } else {
- dots <- rlang::parse_exprs(cmd) %>%
- set_names(vars)
-
- nvar <- try(
- group_by_at(dataset, .vars = byvar) %>%
- mutate(!!!dots),
- silent = TRUE
- )
- vars <- c(byvar, vars) ## to avoid the 'added group_by variable' message
- }
- if (inherits(nvar, "try-error")) {
- paste0(
- "\n", i18n$t("The create command was not valid."), "\n",
- i18n$t("The command entered was:"), "\n\n",
- cmd, "\n\n",
- i18n$t("The error message was:"), "\n\n",
- attr(nvar, "condition")$message, "\n\n",
- i18n$t("Please try again. Examples are shown in the help file")
- )
- } else {
- select_at(nvar, .vars = vars) %>%
- ungroup()
- }
- } else {
- if (store_dat == "") store_dat <- dataset
- cmd <- gsub(";", ", ", cmd) %>%
- gsub("<-", "=", .) %>%
- gsub("\\s{2,}", " ", .)
-
- if (is.empty(byvar)) {
- paste0(i18n$t("## create new variable(s)\n"), store_dat, " <- mutate(", dataset, ", ", cmd, ")\n")
- } else {
- paste0(i18n$t("## create new variable(s)\n"), store_dat, " <- group_by(", dataset, ", ", paste0(byvar, collapse = ", "), ") %>%\n mutate(", cmd, ") %>%\n ungroup()\n")
- }
- }
-}
-
-.recode <- function(dataset, var, cmd, rcname = "",
- store_dat = "", store = TRUE) {
- cmd <- cmd %>%
- gsub("\\n", "", .) %>%
- gsub("\"", "\'", .)
- if (is.empty(rcname)) rcname <- paste0(var, "_rc")
-
- if (!store || !is.character(dataset)) {
- if (cmd == "") {
- return(dataset)
- }
- nvar <- try(car::Recode(dataset[[var]], cmd), silent = TRUE)
- if (inherits(nvar, "try-error")) {
- paste0(
- i18n$t("The recode command was not valid."), "\n",
- i18n$t("The error message was:"), "\n",
- attr(nvar, "condition")$message, "\n",
- i18n$t("Please try again. Examples are shown in the help file (click the ? icon).")
- )
- } else {
- as.data.frame(nvar, stringsAsFactors = FALSE) %>% setNames(rcname)
- }
- } else {
- if (store_dat == "") store_dat <- dataset
- paste0(i18n$t("## recode variable\n"), store_dat, " <- mutate(", dataset, ", ", rcname, " = car::Recode(", var, ", \"", cmd, "\"))\n")
- }
-}
-
-.rename <- function(dataset, var, rnm, store_dat = "", store = TRUE) {
- rnm <- gsub(";", ",", rnm)
- if (gsub("\\s+", "", rnm) != "") {
- rnm <- unlist(strsplit(rnm, ",")) %>%
- .[1:min(length(.), length(var))] %>%
- gsub("^\\s+|\\s+$", "", .)
- }
- rnm <- fix_names(rnm)
-
- if (!store || !is.character(dataset)) {
- if (all(rnm == "")) {
- return(dataset)
- }
- names(dataset)[seq_len(length(rnm))] <- rnm
- dataset
- } else {
- if (store_dat == "") store_dat <- dataset
- name_check <- fix_names(var) != var
- if (any(name_check)) var[name_check] <- paste0("`", var[name_check], "`")
- paste0(i18n$t("## rename variable(s)\n"), store_dat, " <- dplyr::rename(", dataset, ", ", paste(rnm, var, sep = " = ", collapse = ", "), ")\n")
- }
-}
-
-.replace <- function(dataset, var, rpl, store_dat = "", store = TRUE) {
- if (!all(fix_names(var) == var) || !all(fix_names(rpl) == rpl)) {
- return(i18n$t("\nSome of the variables names used are not valid. Please use 'Rename' to ensure\nvariable names do not have any spaces or symbols and start with a letter"))
- }
-
- if (!store || !is.character(dataset)) {
- select_at(dataset, .vars = rpl) %>% set_colnames(var)
- } else {
- if (store_dat == "") store_dat <- dataset
- paste0(i18n$t("## replace variable(s)\n"), store_dat, " <- mutate(", dataset, ", ", paste(var, rpl, sep = " = ", collapse = ", "), ") %>% select(", paste0("-", rpl, collapse = ", "), ")\n")
- }
-}
-
-.normalize <- function(dataset, vars, nzvar, .ext = paste0("_", nzvar),
- store_dat = "", store = TRUE) {
- .ext <- fix_ext(.ext)
-
- if (!store && !is.character(dataset)) {
- nz <- select_at(dataset, .vars = nzvar)
- dataset <- select_at(dataset, .vars = vars)
- dc <- get_class(dataset)
-
- isnum <- "numeric" == dc | "integer" == dc
- if (sum(isnum) == 0) {
- return(i18n$t("Please select only integer or numeric variables to normalize"))
- }
- vars <- vars[isnum]
- select_at(dataset, .vars = vars) %>%
- (function(x) x / nz[[1]]) %>%
- set_colnames(paste0(vars, .ext))
- } else {
- if (store_dat == "") store_dat <- dataset
- paste0(i18n$t("## normalize variables\n"), store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ~ normalize(., ", nzvar, "), .ext = \"", .ext, "\")\n")
- }
-}
-
-.tab2dat <- function(dataset, freq, vars = "",
- store_dat = "", store = TRUE) {
- if (!store && !is.character(dataset)) {
- if (is.empty(vars)) vars <- base::setdiff(colnames(dataset), freq)
- select_at(dataset, .vars = unique(c(vars, freq))) %>%
- table2data(freq)
- } else {
- if (store_dat == "") store_dat <- dataset
- if (is.empty(vars)) vars <- base::setdiff(colnames(r_data[[dataset]]), freq)
- vars <- unique(c(vars, freq))
- paste0(i18n$t("## Create data from a table\n"), store_dat, " <- select(", dataset, ", ", paste0(vars, collapse = ", "), ") %>%\n table2data(\"", freq, "\")\n")
- }
-}
-
-.gather <- function(dataset, vars, key, value,
- store_dat = "", store = TRUE) {
- key <- fix_names(key)
- value <- fix_names(value)
-
- if (!store && !is.character(dataset)) {
- gather(dataset, !!key, !!value, !!vars, factor_key = TRUE)
- } else {
- if (store_dat == "") store_dat <- dataset
- paste0(i18n$t("## Gather columns\n"), store_dat, " <- gather(", dataset, ", ", key, ", ", value, ", ", paste0(vars, collapse = ", "), ", factor_key = TRUE)\n")
- }
-}
-
-.spread <- function(dataset, key, value, fill = NA,
- vars = "", store_dat = "", store = TRUE) {
- if (!store && !is.character(dataset)) {
- if (!vars[1] == "") dataset <- select_at(dataset, .vars = vars)
- cn <- colnames(dataset)
- if (!all(key %in% cn) || !value %in% cn) {
- return(i18n$t("Key or value variable is not in the dataset"))
- }
- nr <- distinct_at(dataset, .vars = base::setdiff(cn, value), .keep_all = TRUE) %>%
- nrow()
- if (nr < nrow(dataset)) {
- return(i18n$t("Rows are not unique. Select additional variables"))
- }
- if (length(key) > 1) {
- dataset <- unite_(dataset, paste(key, collapse = "_"), key)
- key <- paste(key, collapse = "_")
- }
- spread(dataset, !!key, !!value, fill = fill)
- } else {
- if (store_dat == "") store_dat <- dataset
- cmd <- ""
- if (!is.empty(vars)) {
- cmd <- paste0(i18n$t("## Select columns\n"), store_dat, " <- select(", dataset, ", ", paste0(vars, collapse = ", "), ")\n")
- dataset <- store_dat
- }
- if (length(key) > 1) {
- cmd <- paste0(cmd, i18n$t("## Unite columns\n"), store_dat, " <- unite(", dataset, ", ", paste(key, collapse = "_"), ", ", paste0(key, collapse = ", "), ")\n")
- key <- paste(key, collapse = "_")
- dataset <- store_dat
- }
- if (!is.na(fill)) {
- paste0(cmd, i18n$t("## Spread columns\n"), store_dat, " <- spread(", dataset, ", ", key, ", ", value, ", fill = ", fill, ")\n")
- } else {
- paste0(cmd, i18n$t("## Spread columns\n"), store_dat, " <- spread(", dataset, ", ", key, ", ", value, ")\n")
- }
- }
-}
-
-.expand <- function(dataset, vars = "", store_dat = "", store = TRUE) {
- if (!store || !is.character(dataset)) {
- if (all(vars == "")) {
- paste0(i18n$t("Select variables to expand"))
- } else {
- expand.grid(level_list(select_at(dataset, .vars = vars)))
- }
- } else {
- paste0(i18n$t("## expanding data\n"), store_dat, " <- expand.grid(level_list(", dataset, ", ", paste0(vars, collapse = ", "), "))\n")
- }
-}
-
-.bin <- function(dataset, vars = "", bins = 10, rev = FALSE,
- .ext = "_dec", store_dat = "", store = TRUE) {
- .ext <- fix_ext(.ext)
-
- if (!store && !is.character(dataset)) {
- if (is.na(bins) || !is.integer(bins)) {
- return(i18n$t("Please specify the (integer) number of bins to use"))
- }
- if (!all(sapply(dataset[, vars, drop = FALSE], is.numeric))) {
- return(i18n$t("Binning can only be applied to numeric variables"))
- }
- select_at(dataset, .vars = vars) %>%
- mutate_all(~ xtile(., bins, rev = rev)) %>%
- set_colnames(paste0(vars, .ext))
- } else {
- if (store_dat == "") store_dat <- dataset
- if (rev) {
- paste0(i18n$t("## bin variables\n"), store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ~ xtile(., ", bins, ", rev = TRUE), .ext = \"", .ext, "\")\n")
- } else {
- paste0(i18n$t("## bin variables\n"), store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ~ xtile(., ", bins, "), .ext = \"", .ext, "\")\n")
- }
- }
-}
-
-.training <- function(dataset, vars = "", n = .7, nr = 100,
- name = "training", seed = 1234,
- store_dat = "", store = TRUE) {
- if (is.empty(name)) {
- name <- "training"
- } else {
- name <- fix_names(name)
- }
- if (!store && !is.character(dataset)) {
- n <- n %>%
- (function(x) ifelse(x < 0 || is.na(x) || x > nr, 0.7, x))
- if (is.empty(vars)) {
- blocks <- NULL
- } else {
- blocks <- dataset[, vars]
- }
-
- make_train(n, nr, blocks = blocks, seed = seed) %>%
- data.frame(stringsAsFactors = FALSE) %>%
- setNames(name)
- } else {
- if (store_dat == "") store_dat <- dataset
- if (is.empty(vars)) {
- paste0(i18n$t("## created variable to select training sample\n"), store_dat, " <- mutate(", dataset, ", ", name, " = make_train(", n, ", n(), seed = ", seed, "))\n")
- } else {
- paste0(i18n$t("## created variable to select training sample\n"), store_dat, " <- mutate(", dataset, ", ", name, " = make_train(", n, ", blocks = select(", dataset, ", ", paste0(vars, collapse = ", "), "), seed = ", seed, "))\n")
- }
- }
-}
-
-## Make a training variable that selects randomly by ID
-# http://rpackages.ianhowson.com/cran/dplyr/man/group_indices.html
-# http://rpackages.ianhowson.com/cran/dplyr/man/sample.html
-
-.reorg_levs <- function(dataset, fct, levs, repl = NA, name = fct,
- store_dat = "", store = TRUE) {
- if (is.empty(name)) name <- fct
- if (!store || !is.character(dataset)) {
- data.frame(refactor(dataset[[fct]], levs = levs, repl = repl), stringsAsFactors = FALSE) %>%
- setNames(name)
- } else {
- if (store_dat == "") store_dat <- dataset
- repl <- if (is.na(repl)) "" else paste0(", repl = \"", repl, "\"")
- paste0(i18n$t("## change factor levels\n"), store_dat, " <- mutate(", dataset, ", ", name, " = refactor(", fct, ", levs = c(\"", paste0(levs, collapse = "\",\""), "\")", repl, "))\n")
- }
-}
-
-.reorg_vars <- function(dataset, vars = "", store_dat = "", store = TRUE) {
- if (!store || !is.character(dataset)) {
- get_data(dataset, vars, filt = "", na.rm = FALSE, envir = r_data)
- } else {
- if (store_dat == "") store_dat <- dataset
- paste0(i18n$t("## reorder/remove variables\n"), store_dat, " <- select(", dataset, ", ", paste0(vars, collapse = ", "), ")\n")
- }
-}
-
-.remove_na <- function(dataset, vars = "", store_dat = "",
- nr_col = 0, store = TRUE) {
- if (!store || !is.character(dataset)) {
- if (all(vars == "") || length(unique(vars)) == ncol(dataset)) {
- dataset %>% filter(complete.cases(.))
- } else {
- ind <- select_at(dataset, .vars = vars) %>% complete.cases()
- filter(dataset, ind)
- }
- } else {
- if (store_dat == "") store_dat <- dataset
- if (all(vars == "") || length(unique(vars)) == nr_col) vars <- "."
- paste0(i18n$t("## remove missing values\n"), store_dat, " <- ", dataset, " %>% filter(complete.cases(", paste0(vars, collapse = ", "), "))\n")
- }
-}
-
-.remove_dup <- function(dataset, vars = "", store_dat = "",
- nr_col = 0, store = TRUE) {
- if (!store || !is.character(dataset)) {
- if (all(vars == "") || length(unique(vars)) == ncol(dataset)) {
- dat <- distinct(dataset)
- } else {
- dat <- distinct_at(dataset, .vars = vars, .keep_all = TRUE)
- }
-
- if (nrow(dat) == nrow(dataset)) {
- paste0(i18n$t("No duplicates found (n_distinct = "), nrow(dat), ")")
- } else {
- dat
- }
- } else {
- if (all(vars == "") || length(unique(vars)) == nr_col) {
- paste0(i18n$t("## remove duplicate rows\n"), store_dat, " <- distinct(", dataset, ")\n")
- } else {
- paste0(i18n$t("## remove rows with duplicate values\n"), store_dat, " <- distinct(", dataset, ", ", paste0(vars, collapse = ", "), ", .keep_all = TRUE)\n")
- }
- }
-}
-
-.show_dup <- function(dataset, vars = "", store_dat = "",
- nr_col = 0, store = TRUE) {
- if (!store || !is.character(dataset)) {
- if (all(vars == "") || length(unique(vars)) == ncol(dataset)) {
- dat <- filter(dataset, duplicated(dataset))
- } else {
- dat <- dataset %>%
- group_by_at(.vars = vars) %>%
- filter(n() > 1)
-
- if (nrow(dat) > 0) {
- dat <- mutate(dat, nr_dup = 1:n()) %>%
- arrange_at(.vars = vars) %>%
- ungroup()
- }
- }
-
- if (nrow(dat) == 0) {
- ## "No duplicates found"
- paste0(i18n$t("No duplicates found (n_distinct = "), nrow(dataset), ")")
- } else {
- dat
- }
- } else {
- if (all(vars == "") || length(unique(vars)) == nr_col) {
- paste0(i18n$t("## show duplicate rows\n"), store_dat, " <- ", dataset, " %>% filter(duplicated(.))\n")
- } else {
- paste0(i18n$t("## show rows with duplicate values\n"), store_dat, " <- show_duplicated(", dataset, ", ", paste0(vars, collapse = ", "), ")\n")
- }
- }
-}
-
-.holdout <- function(dataset, vars = "", filt = "", arr = "", rows = NULL, rev = FALSE,
- store_dat = "", store = TRUE) {
- if (is.empty(filt) && is.empty(rows)) {
- return(paste0(i18n$t("No filter or slice found (n = "), nrow(dataset), ")"))
- }
-
- if (!store || !is.character(dataset)) {
- get_data(dataset, vars = vars, filt = filt, arr = arr, rows = rows, na.rm = FALSE, rev = rev, envir = r_data)
- } else {
- cmd <- glue("{i18n$t('## create holdout sample')}\n{store_dat} <- get_data(\n {dataset}") # ", vars = {vars}, filt = {filt}, arr = {arr}, rows = {rows}, rev = {rev})\n")
-
- if (!all(vars == "")) {
- cmd <- glue('{cmd},\n vars = c("{paste0(vars, collapse = ", ")}")', .trim = FALSE)
- }
- if (!is.empty(filt)) {
- filt <- gsub("\"", "'", filt)
- cmd <- glue('{cmd},\n filt = "{filt}"', .trim = FALSE)
- }
- if (!is.empty(arr)) {
- cmd <- glue('{cmd},\n arr = "{arr}"', .trim = FALSE)
- }
- if (!is.empty(rows)) {
- cmd <- glue('{cmd},\n rows = "{rows}"', .trim = FALSE)
- }
- glue("{cmd},\n rev = {rev}\n)", .trim = FALSE)
- }
-}
-
-inp_vars <- function(inp, rval = "") {
- if (is.empty(input[[inp]]) || !available(input[[inp]])) rval else input[[inp]]
-}
-
-transform_main <- reactive({
- req(input$tr_change_type)
- if (not_available(input$tr_vars)) {
- if (input$tr_change_type == "none" && length(input$tr_vars) == 0) {
- return(i18n$t("Select a transformation type or select variables to summarize"))
- } else if (input$tr_change_type == "none" && length(input$tr_vars) > 0) {
- return(i18n$t("Select a transformation type or select variables to summarize"))
- } else if (input$tr_change_type == "type") {
- return(i18n$t("Select one or more variables to change their type"))
- } else if (input$tr_change_type == "transform") {
- return(i18n$t("Select one or more variables to apply a transformation"))
- } else if (input$tr_change_type == "rename") {
- return(i18n$t("Select one or more variables to rename"))
- } else if (input$tr_change_type == "replace") {
- return(i18n$t("Select one or more variables to replace"))
- } else if (input$tr_change_type == "recode") {
- return(i18n$t("Select a variable to recode"))
- } else if (input$tr_change_type == "bin") {
- return(i18n$t("Select one or more variables to bin"))
- } else if (input$tr_change_type == "reorg_levs") {
- return(i18n$t("Select a single variable of type factor to change the ordering and/or number of levels"))
- } else if (input$tr_change_type == "normalize") {
- return(i18n$t("Select one or more variables to normalize"))
- } else if (input$tr_change_type == "remove_na") {
- return(i18n$t("Select one or more variables to see the effects of removing missing values"))
- } else if (input$tr_change_type %in% c("remove_dup", "show_dup")) {
- return(i18n$t("Select one or more variables to see the effects of removing duplicates"))
- } else if (input$tr_change_type == "gather") {
- return(i18n$t("Select one or more variables to gather"))
- } else if (input$tr_change_type == "expand") {
- return(i18n$t("Select one or more variables to expand"))
- }
- }
-
- ## get the active dataset, filter not applied when called from transform tab
- dat <- .get_data_transform()
-
- ## what data to pass on ...
- if (input$tr_change_type %in% c("", "none")) {
- return(select_at(dat, .vars = input$tr_vars))
- }
-
- ## reorganize variables
- if (input$tr_change_type == "reorg_vars") {
- return(.reorg_vars(dat, inp_vars("tr_reorg_vars"), store = FALSE))
- }
-
- ## create training variable
- if (input$tr_change_type == "training") {
- return(.training(dat, n = input$tr_training_n, nr = nrow(dat), name = input$tr_training, vars = inp_vars("tr_vars"), seed = input$tr_training_seed, store = FALSE))
- }
-
- if (input$tr_change_type == "create") {
- if (input$tr_create == "") {
- return(i18n$t("Specify an equation to create a new variable and press 'return'. **\n** See the help file for examples"))
- } else {
- return(.create(dat, input$tr_create, byvar = inp_vars("tr_vars"), store = FALSE))
- }
- }
-
- if (input$tr_change_type == "tab2dat") {
- if (is.null(input$tr_tab2dat) || input$tr_tab2dat == "none") {
- return(i18n$t("Select a frequency variable"))
- } else if (!is.empty(input$tr_vars) && all(input$tr_vars == input$tr_tab2dat)) {
- return(i18n$t("Select at least one variable that is not the frequency variable"))
- } else {
- req(available(input$tr_tab2dat))
- return(.tab2dat(dat, input$tr_tab2dat, vars = inp_vars("tr_vars"), store = FALSE))
- }
- }
-
- if (input$tr_change_type == "clip") {
- if (input$tr_paste == "") {
- return(i18n$t("Copy-and-paste data with a header row from a spreadsheet"))
- } else {
- cpdat <- try(read.table(header = TRUE, comment.char = "", fill = TRUE, sep = "\t", as.is = TRUE, text = input$tr_paste), silent = TRUE)
- if (inherits(cpdat, "try-error")) {
- return(i18n$t("The pasted data was not well formatted. Please make sure the number of rows **\n** in the data in Radiant and in the spreadsheet are the same and try again."))
- } else if (nrow(cpdat) != nrow(dat)) {
- return(i18n$t("The pasted data does not have the correct number of rows. Please make sure **\n** the number of rows in the data in Radiant and in the spreadsheet are the **\n** same and try again."))
- } else {
- return(as.data.frame(cpdat, check.names = FALSE, stringsAsFactors = FALSE) %>% to_fct())
- }
- }
- }
-
- ## filter data for holdout
- if (input$tr_change_type == "holdout") {
- if (!input$show_filter) {
- return(i18n$t("\nNo filter, arrange, or slice set. Click the 'Filter' checkbox and enter a\nfilter, arrange, and/or a slice of rows to keep as the main data. The holdout\nwill have have all rows not selected by the filter, arrange, and slice"))
- }
- return(.holdout(dat, inp_vars("tr_vars"), filt = input$data_filter, arr = input$data_arrange, rows = input$data_rows, rev = input$tr_holdout_rev, store = FALSE))
- }
-
- ## spread a variable
- if (input$tr_change_type == "spread") {
- if (is.empty(input$tr_spread_key, "none") ||
- is.empty(input$tr_spread_value, "none")) {
- return(i18n$t("Select a Key and Value pair to spread"))
- }
- return(.spread(dat, key = input$tr_spread_key, value = input$tr_spread_value, fill = input$tr_spread_fill, vars = inp_vars("tr_vars"), store = FALSE))
- }
-
- ## only use the functions below if variables have been selected
- if (!is.empty(input$tr_vars)) {
- if (not_available(input$tr_vars)) {
- return()
- }
-
- ## remove missing values
- if (input$tr_change_type == "remove_na") {
- return(.remove_na(dat, inp_vars("tr_vars"), store = FALSE))
- }
-
- ## bin variables
- if (input$tr_change_type == "bin") {
- return(.bin(dat, inp_vars("tr_vars"), bins = input$tr_bin_n, rev = input$tr_bin_rev, .ext = input$tr_ext_bin, store = FALSE))
- }
-
- ## gather variables
- if (input$tr_change_type == "gather") {
- if (is.empty(input$tr_gather_key) || is.empty(input$tr_gather_value)) {
- return(i18n$t("Provide a name for the Key and Value variables"))
- }
- return(.gather(dat, inp_vars("tr_vars"), key = input$tr_gather_key, value = input$tr_gather_value, store = FALSE))
- }
-
- ## remove duplicates
- if (input$tr_change_type == "remove_dup") {
- return(.remove_dup(dat, inp_vars("tr_vars"), store = FALSE))
- }
-
- ## expand grid
- if (input$tr_change_type == "expand") {
- return(.expand(dat, inp_vars("tr_vars"), store = FALSE))
- }
-
- ## show duplicates
- if (input$tr_change_type == "show_dup") {
- return(.show_dup(dat, inp_vars("tr_vars"), store = FALSE))
- }
-
- if (input$tr_change_type == "normalize") {
- if (is.empty(input$tr_normalizer, "none")) {
- return(i18n$t("Select a normalizing variable"))
- } else {
- return(.normalize(dat, inp_vars("tr_vars"), input$tr_normalizer, .ext = input$tr_ext_nz, store = FALSE))
- }
- }
-
- if (input$tr_change_type == "replace") {
- vars <- input$tr_vars
- rpl <- input$tr_replace
- if (available(rpl)) {
- if (length(vars) != length(rpl)) {
- return(i18n$t(
- "The number of replacement variables ({rpl_len}) is not equal to the number of variables to replace ({vars_len})",
- list(rpl_len = length(rpl), vars_len = length(vars))
- ))
- }
- return(.replace(dat, vars, rpl, store = FALSE))
- } else {
- return(i18n$t("Select one or more variable replacements"))
- }
- }
-
- ## selecting the columns to show
- dat <- select_at(dat, .vars = input$tr_vars)
- vars <- colnames(dat)
-
- ## change in type is always done in-place
- if (input$tr_change_type == "type") {
- if (input$tr_typefunction == "none") {
- return(i18n$t("Select a transformation type for the selected variables"))
- } else {
- if (input$tr_typefunction == "ts") {
- tr_ts <- list(
- start = c(input$tr_ts_start_year, input$tr_ts_start_period),
- end = c(input$tr_ts_end_year, input$tr_ts_end_period),
- frequency = input$tr_ts_frequency
- )
- } else {
- tr_ts <- NULL
- }
- return(.change_type(dat, input$tr_typefunction, tr_ts, inp_vars("tr_vars"), input$tr_typename, store = FALSE))
- }
- }
-
- ## change in type is always done in-place
- if (input$tr_change_type == "transform") {
- if (input$tr_transfunction == "none") {
- return(i18n$t("Select a function to apply to the selected variable(s)"))
- } else {
- return(.transform(dat, input$tr_transfunction, inp_vars("tr_vars"), input$tr_ext, store = FALSE))
- }
- }
-
- if (input$tr_change_type == "reorg_levs") {
- fct <- input$tr_vars[1]
- if (length(unique(dat[[fct]])) > 100) {
- return(i18n$t("Interactive re-ordering is only supported up to 100 levels. See\n?radiant.data::refactor for information on how to re-order levels in R"))
- } else {
- return(.reorg_levs(dat, fct, input$tr_reorg_levs, input$tr_rorepl, input$tr_roname, store = FALSE))
- }
- }
-
- if (input$tr_change_type == "recode") {
- if (is.empty(input$tr_recode)) {
- return(i18n$t("Specify a recode statement, assign a name to the recoded variable, and press 'return'. **\n** See the help file for examples"))
- } else {
- return(.recode(dat, inp_vars("tr_vars")[1], input$tr_recode, input$tr_rcname, store = FALSE))
- }
- }
-
- if (input$tr_change_type == "rename") {
- if (is.empty(input$tr_rename)) {
- return(i18n$t("Specify new names for the selected variables (separated by a ',') and press 'return'"))
- } else {
- if (any(input$tr_rename %in% varnames())) {
- return(i18n$t("One or more of the new variables names already exists in the data. **\n** Change the specified names or use the Replace function"))
- } else {
- return(.rename(dat, inp_vars("tr_vars"), input$tr_rename, store = FALSE))
- }
- }
- }
- }
-
- return(invisible())
-})
-
-output$transform_data <- reactive({
- dataset <- transform_main()
- if (is.null(dataset) || is.character(dataset) || nrow(dataset) == 0 || ncol(dataset) == 0) {
- tr_snippet()
- } else {
- show_data_snippet(dataset)
- }
-})
-
-tr_snippet <- reactive({
- show_data_snippet(.get_data_transform())
-})
-
-output$transform_summary <- renderPrint({
- req(!isTRUE(input$tr_hide))
-
- withProgress(message = i18n$t("Generating summary statistics"), value = 1, {
- dataset <- transform_main()
- })
-
- ## with isolate on the summary wouldn't update when the dataset was changed
- if (is.null(dataset)) {
- return(invisible())
- }
- if (is.character(dataset)) {
- cat("**", dataset, "\n**\n\n")
- } else {
- if (min(dim(dataset)) == 0) {
- cat("**", i18n$t("The selected operation resulted in an empty data frame and cannot be executed"), "**\n\n")
- } else {
- if (input$tr_change_type %in% c("", "none")) {
- cat("**", i18n$t("Select a transformation type or select variables to summarize"), "**\n\n")
- } else {
- cat("**", i18n$t("Press the 'Store' button to add your changes to the data"), "**\n\n")
- if (!is.empty(input$tr_vars) && input$tr_change_type == "create") {
- cat("**", i18n$t("Results are grouped by"), paste(input$tr_vars, collapse = ", "), "**\n\n")
- } else if (!is.empty(input$tr_vars) && input$tr_change_type == "training") {
- cat("**", i18n$t("Results are blocked by"), paste(input$tr_vars, collapse = ", "), "**\n\n")
- }
- }
-
- if (input$tr_change_type == "reorg_vars") {
- cat("**", i18n$t("Drag-and-drop to change ordering. Click the x to remove a variable"), "**")
- } else {
- cat(paste0(capture.output(get_summary(dataset)), collapse = "\n"))
- }
- }
- }
-})
-
-observeEvent(input$tr_store, {
- withProgress(message = "Storing transformations", value = 1, {
- dat <- transform_main()
- })
-
- if (is.null(dat)) {
- return()
- } else if (is.character(dat)) {
- return()
- } else if (min(dim(dat)) == 0) {
- return()
- }
-
- ## saving to a new dataset if specified
- df_name <- fix_names(input$tr_name)
- if (input$tr_name != df_name) {
- updateTextInput(session, inputId = "tr_name", value = df_name)
- }
- ncmd <- ""
- if (is.null(r_data[[df_name]])) {
- r_data[[df_name]] <- .get_data_transform()
- r_info[[paste0(df_name, "_descr")]] <- r_info[[paste0(input$dataset, "_descr")]]
- if (!bindingIsActive(as.symbol(df_name), env = r_data)) {
- shiny::makeReactiveBinding(df_name, env = r_data)
- }
- r_info[["datasetlist"]] %<>% c(df_name, .) %>% unique()
-
- ## adding command to ensure new data is in the datasetlist
- if (df_name == input$dataset) {
- ncmd <- paste0("\n", i18n$t("## register the new dataset"), "\nregister(\"", df_name, "\")")
- } else {
- ncmd <- paste0("\n", i18n$t("## register the new dataset"), "\nregister(\"", df_name, "\", \"", input$dataset, "\")")
- }
- } else if (!df_name %in% r_info[["datasetlist"]]) {
- r_info[["datasetlist"]] %<>% c(df_name, .) %>% unique()
-
- ## adding command to ensure new data is in the datasetlist
- if (df_name == input$dataset) {
- ncmd <- paste0("\n", i18n$t("## register the new dataset"), "\nregister(\"", df_name, "\")")
- } else {
- ncmd <- paste0("\n", i18n$t("## register the new dataset"), "\nregister(\"", df_name, "\", \"", input$dataset, "\")")
- }
- }
-
- if (input$tr_change_type == "remove_na") {
- cmd <- .remove_na(input$dataset, vars = input$tr_vars, df_name, nr_col = ncol(dat))
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "remove_dup") {
- cmd <- .remove_dup(input$dataset, vars = input$tr_vars, df_name, nr_col = ncol(dat))
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "show_dup") {
- cmd <- .show_dup(input$dataset, vars = input$tr_vars, df_name, nr_col = ncol(dat))
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "holdout") {
- cmd <- .holdout(input$dataset, vars = input$tr_vars, filt = input$data_filter, arr = input$data_arrange, rows = input$data_rows, rev = input$tr_holdout_rev, df_name)
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "tab2dat") {
- cmd <- .tab2dat(input$dataset, input$tr_tab2dat, vars = input$tr_vars, df_name)
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "gather") {
- cmd <- .gather(input$dataset, vars = input$tr_vars, key = input$tr_gather_key, value = input$tr_gather_value, df_name)
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "spread") {
- cmd <- .spread(input$dataset, key = input$tr_spread_key, value = input$tr_spread_value, fill = input$tr_spread_fill, vars = input$tr_vars, df_name)
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "expand") {
- cmd <- .expand(input$dataset, vars = input$tr_vars, df_name)
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "reorg_vars") {
- cmd <- .reorg_vars(input$dataset, vars = input$tr_reorg_vars, df_name)
- r_data[[df_name]] <- dat
- } else if (input$tr_change_type == "type") {
- if (input$tr_typefunction == "ts") {
- tr_ts <- list(
- start = c(input$tr_ts_start_year, input$tr_ts_start_period),
- end = c(input$tr_ts_end_year, input$tr_ts_end_period),
- frequency = input$tr_ts_frequency
- )
- } else {
- tr_ts <- NULL
- }
- cmd <- .change_type(input$dataset, fun = input$tr_typefunction, tr_ts, vars = input$tr_vars, .ext = input$tr_typename, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "transform") {
- cmd <- .transform(input$dataset, fun = input$tr_transfunction, vars = input$tr_vars, .ext = input$tr_ext, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "training") {
- cmd <- .training(input$dataset, n = input$tr_training_n, nr = nrow(dat), name = input$tr_training, vars = input$tr_vars, seed = input$tr_training_seed, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "normalize") {
- cmd <- .normalize(input$dataset, vars = input$tr_vars, nzvar = input$tr_normalizer, .ext = input$tr_ext_nz, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "bin") {
- cmd <- .bin(input$dataset, vars = input$tr_vars, bins = input$tr_bin_n, rev = input$tr_bin_rev, .ext = input$tr_ext_bin, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "reorg_levs") {
- cmd <- .reorg_levs(input$dataset, input$tr_vars[1], input$tr_reorg_levs, input$tr_rorepl, input$tr_roname, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "recode") {
- cmd <- .recode(input$dataset, input$tr_vars[1], input$tr_recode, input$tr_rcname, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "rename") {
- cmd <- .rename(input$dataset, input$tr_vars, input$tr_rename, df_name)
- r_data[[df_name]] %<>% dplyr::rename(!!!setNames(input$tr_vars, colnames(dat)))
- } else if (input$tr_change_type == "create") {
- cmd <- .create(input$dataset, cmd = input$tr_create, byvar = input$tr_vars, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- } else if (input$tr_change_type == "replace") {
- cmd <- .replace(input$dataset, input$tr_vars, input$tr_replace, df_name)
- r_data[[df_name]][, colnames(dat)] <- dat
- r_data[[df_name]][, input$tr_replace] <- list(NULL)
- } else if (input$tr_change_type == "clip") {
- cmd <- paste0(
- i18n$t("## using the clipboard for data transformation may seem convenient"),
- "\n",
- i18n$t("## but it is not 'reproducible' - no command generated"),
- "\n"
- )
- r_data[[df_name]][, colnames(dat)] <- dat
- }
-
- ## uncomment if you want to revert to resetting the transform UI after Store
- # updateTextAreaInput(session, "tr_log", value = paste0(input$tr_log, paste0(cmd, ncmd), "\n"))
-
- ## update the command log
- shinyAce::updateAceEditor(session, "tr_log", value = paste0(input$tr_log, paste0(cmd, ncmd), "\n"))
-
- ## reset input values once the changes have been applied
- # updateSelectInput(session = session, inputId = "tr_change_type", selected = "none")
-
- ## jumps straight to the new dataset
- # updateSelectInput(session = session, inputId = "dataset", selected = df_name)
- if (input$dataset != df_name) {
- showModal(
- modalDialog(
- title = i18n$t("Data Stored"),
- span(
- i18n$t(
- paste0(
- "Dataset '", df_name, "' was successfully added to ",
- "the datasets dropdown. Add code to Report > Rmd or ",
- "Report > R to (re)create the results by clicking the ",
- "report icon on the bottom left of your screen."
- )
- )
- ),
- footer = modalButton(i18n$t("OK")),
- size = "m",
- easyClose = TRUE
- )
- )
- }
-})
-
-observeEvent(input$tr_change_type, {
- ## reset all values when tr_change_type is changed
- updateTextInput(session = session, inputId = "tr_create", value = "")
- updateTextInput(session = session, inputId = "tr_recode", value = "")
- updateTextInput(session = session, inputId = "tr_rename", value = "")
- updateTextInput(session = session, inputId = "tr_paste", value = "")
- updateTextInput(session = session, inputId = "tr_gather_key", value = "")
- updateTextInput(session = session, inputId = "tr_gather_value", value = "")
- updateTextInput(session = session, inputId = "tr_spread_key", value = "")
- updateTextInput(session = session, inputId = "tr_spread_value", value = "")
- updateSelectInput(session = session, inputId = "tr_typefunction", selected = "none")
- updateSelectInput(session = session, inputId = "tr_transfunction", selected = "none")
- updateSelectInput(session = session, inputId = "tr_replace", selected = "None")
- updateSelectInput(session = session, inputId = "tr_normalizer", selected = "none")
- updateSelectInput(session = session, inputId = "tr_tab2dat", selected = "none")
-})
-
-transform_report <- function() {
- cmd <- NULL
- if (!is.empty(input$tr_log)) {
- cmd <- gsub("\n{2,}", "\n", input$tr_log) %>%
- sub("^\n", "", .) %>%
- sub("\n$", "", .)
-
- shinyAce::updateAceEditor(session, "tr_log", value = "")
- }
- update_report(cmd = cmd, outputs = NULL, figs = FALSE)
-}
-
-observeEvent(input$transform_report, {
- r_info[["latest_screenshot"]] <- NULL
- transform_report()
-})
-
-observeEvent(input$transform_screenshot, {
- r_info[["latest_screenshot"]] <- NULL
- radiant_screenshot_modal("modal_transform_screenshot")
-})
-
-observeEvent(input$modal_transform_screenshot, {
- transform_report()
- removeModal()
-})
+## UI-elements for transform
+output$ui_tr_vars <- renderUI({
+ vars <- varnames()
+ req(available(vars))
+ selectInput(
+ "tr_vars", i18n$t("Select variable(s):"),
+ choices = vars,
+ multiple = TRUE,
+ size = min(8, length(vars)),
+ selectize = FALSE
+ )
+})
+
+output$ui_tr_replace <- renderUI({
+ validate(
+ need(available(input$tr_vars), "Select one or more variables to replace")
+ )
+ vars <- varnames()
+ selectInput(
+ "tr_replace", i18n$t("Select replacement variables:"),
+ choices = vars,
+ multiple = TRUE, size = min(2, length(vars)), selectize = FALSE
+ )
+})
+
+output$ui_tr_normalizer <- renderUI({
+ isNum <- .get_class() %in% c("numeric", "integer", "ts")
+ vars <- varnames()[isNum]
+ if (length(vars) == 0) {
+ return()
+ }
+ selectInput(
+ "tr_normalizer", i18n$t("Normalizing variable:"),
+ c("None" = "none", vars),
+ selected = "none"
+ )
+})
+
+output$ui_tr_tab2dat <- renderUI({
+ isNum <- .get_class() %in% c("numeric", "integer", "ts")
+ vars <- varnames()[isNum]
+ selectInput(
+ "tr_tab2dat", i18n$t("Frequency variable:"),
+ c("None" = "none", vars),
+ selected = "none"
+ )
+})
+
+output$ui_tr_gather <- renderUI({
+ tagList(
+ tags$table(
+ tags$td(returnTextInput("tr_gather_key", i18n$t("Key name:"), value = "key")),
+ tags$td(returnTextInput("tr_gather_value", i18n$t("Value name:"), value = "value"))
+ )
+ )
+})
+
+output$ui_tr_spread <- renderUI({
+ req(input$tr_change_type)
+ vars <- c("None" = "none", varnames())
+ tagList(
+ selectizeInput(
+ "tr_spread_key", i18n$t("Key(s):"),
+ choices = vars[-1],
+ selected = NULL, multiple = TRUE,
+ options = list(placeholder = "None", plugins = list("remove_button", "drag_drop"))
+ ),
+ selectInput("tr_spread_value", i18n$t("Value:"), choices = vars, selected = "none", multiple = FALSE),
+ numericInput("tr_spread_fill", i18n$t("Fill:"), value = NA)
+ )
+})
+
+output$ui_tr_reorg_vars <- renderUI({
+ req(input$tr_change_type)
+ vars <- varnames()
+ validate(
+ need(length(vars) < 101, "Interactive re-ordering is only supported up to 100 variables. See ?dplyr::select for information on how to re-order variables in R")
+ )
+ selectizeInput(
+ "tr_reorg_vars", i18n$t("Reorder/remove variables:"),
+ choices = vars,
+ selected = vars, multiple = TRUE,
+ options = list(placeholder = "Select variable(s)", plugins = list("remove_button", "drag_drop"))
+ )
+})
+
+output$ui_tr_reorg_levs <- renderUI({
+ req(input$tr_change_type)
+ validate(
+ need(available(input$tr_vars), i18n$t("Select a single variable of type factor or character"))
+ )
+ fctCol <- input$tr_vars[1]
+ fct <- .get_data_transform()[[fctCol]]
+ levs <- if (is.factor(fct)) levels(fct) else levels(as_factor(fct))
+ validate(
+ need(length(levs) < 101, i18n$t("Interactive re-ordering is only supported up to 100 levels. See ?radiant.data::refactor for information on how to re-order levels in R"))
+ )
+ tagList(
+ selectizeInput(
+ "tr_reorg_levs", i18n$t("Reorder/remove levels:"),
+ choices = levs,
+ selected = levs, multiple = TRUE,
+ options = list(placeholder = "Select level(s)", plugins = list("remove_button", "drag_drop"))
+ ),
+ textInput(
+ "tr_rorepl", i18n$t("Replacement level name:"),
+ placeholder = i18n$t("Provide name for missing levels"),
+ value = NA
+ )
+ )
+})
+
+transform_auto_complete <- reactive({
+ req(input$dataset)
+ comps <- list(r_info[["datasetlist"]][input$dataset], as.vector(varnames()))
+ names(comps) <- c("{datasets}", paste0("{", input$dataset, "}"))
+ comps
+})
+
+output$ui_tr_log <- renderUI({
+ tagList(
+ HTML("
"),
+ shinyAce::aceEditor(
+ "tr_log",
+ mode = "r",
+ theme = getOption("radiant.ace_theme", default = "tomorrow"),
+ wordWrap = TRUE,
+ debounce = 0,
+ value = state_init("tr_log", "") %>% fix_smart(),
+ vimKeyBinding = getOption("radiant.ace_vim.keys", default = FALSE),
+ tabSize = getOption("radiant.ace_tabSize", 2),
+ useSoftTabs = getOption("radiant.ace_useSoftTabs", TRUE),
+ showInvisibles = getOption("radiant.ace_showInvisibles", FALSE),
+ autoScrollEditorIntoView = TRUE,
+ autoComplete = getOption("radiant.ace_autoComplete", "enable"),
+ autoCompleters = c("static", "rlang"),
+ autoCompleteList = isolate(transform_auto_complete()),
+ minLines = 5,
+ maxLines = 15
+ )
+ )
+})
+
+transform_annotater <- shinyAce::aceAnnotate("tr_log")
+transform_tooltip <- shinyAce::aceTooltip("tr_log")
+transform_ac <- shinyAce::aceAutocomplete("tr_log")
+
+observe({
+ shinyAce::updateAceEditor(
+ session, "tr_log",
+ autoCompleters = c("static", "rlang"),
+ autoCompleteList = transform_auto_complete()
+ )
+})
+
+
+ext_options <- list(
+ "none" = "", "log" = "_ln", "exp" = "_exp",
+ "square" = "_sq", "sqrt" = "_sqrt", "center" = "_ct",
+ "standardize" = "_st", "inverse" = "_inv"
+)
+
+output$ui_tr_ext <- renderUI({
+ trfun <- input$tr_transfunction
+ if (is.empty(trfun)) trfun <- "none"
+ returnTextInput(
+ "tr_ext", i18n$t("Variable name extension:"),
+ value = ext_options[[trfun]]
+ )
+})
+
+output$ui_tr_ext_nz <- renderUI({
+ if (is.empty(input$tr_normalizer, "none")) {
+ return()
+ }
+ returnTextInput(
+ "tr_ext_nz", i18n$t("Variable name extension:"),
+ value = paste0("_", input$tr_normalizer)
+ )
+})
+
+output$ui_tr_rcname <- renderUI({
+ if (is.empty(input$tr_vars)) {
+ return()
+ }
+ returnTextInput(
+ "tr_rcname", i18n$t("Recoded variable name:"),
+ value = paste0(input$tr_vars[1], "_rc")
+ )
+})
+
+output$ui_tr_ext_bin <- renderUI({
+ if (is.empty(input$tr_vars)) {
+ return()
+ }
+ returnTextInput(
+ "tr_ext_bin", i18n$t("Variable name extension:"),
+ value = "_dec"
+ )
+})
+
+output$ui_tr_roname <- renderUI({
+ if (is.empty(input$tr_vars)) {
+ return()
+ }
+ returnTextInput(
+ "tr_roname", i18n$t("Variable name:"),
+ value = input$tr_vars[1]
+ )
+})
+
+output$ui_tr_typename <- renderUI({
+ if (is.empty(input$tr_vars)) {
+ return()
+ }
+ returnTextInput(
+ "tr_typename", i18n$t("Variable name extension:"),
+ value = "",
+ placeholder = i18n$t("Add extension to variable name")
+ )
+})
+
+output$ui_tr_rename <- renderUI({
+ validate(
+ need(available(input$tr_vars), i18n$t("Select one or more variables to rename"))
+ )
+ if (length(input$tr_vars) < 2) {
+ mess <- i18n$t("Type a new name for the selected variable and press return")
+ } else {
+ mess <- i18n$t("Type new names for the selected variables, separated by a , and press return")
+ }
+ returnTextAreaInput(
+ "tr_rename", i18n$t("Rename variable(s):"),
+ value = "",
+ rows = 3,
+ placeholder = mess
+ )
+})
+
+output$ui_tr_dataset <- renderUI({
+ tr_dataset <- input$dataset
+ if (input$tr_change_type == "show_dup") {
+ tr_dataset <- paste0(tr_dataset, "_dup")
+ } else if (input$tr_change_type == "holdout") {
+ tr_dataset <- paste0(tr_dataset, "_holdout")
+ } else if (input$tr_change_type == "tab2dat") {
+ tr_dataset <- paste0(tr_dataset, "_dat")
+ } else if (input$tr_change_type == "gather") {
+ tr_dataset <- paste0(tr_dataset, "_gathered")
+ } else if (input$tr_change_type == "spread") {
+ tr_dataset <- paste0(tr_dataset, "_spread")
+ } else if (input$tr_change_type == "expand") {
+ tr_dataset <- paste0(tr_dataset, "_expand")
+ }
+ tags$table(
+ tags$td(textInput("tr_name", i18n$t("Store changes in:"), tr_dataset)),
+ tags$td(actionButton("tr_store", i18n$t("Store"), icon = icon("plus", verify_fa = FALSE), class = "btn-success"), class = "top")
+ )
+})
+
+trans_options <- list(
+ "None" = "none", "Ln (natural log)" = "log", "Exp" = "exp",
+ "Square" = "square", "Square-root" = "sqrt",
+ "Center" = "center", "Standardize" = "standardize", "Inverse" = "inverse"
+)
+
+type_options <- list(
+ "None" = "none", "As factor" = "as_factor",
+ "As numeric" = "as_numeric", "As integer" = "as_integer",
+ "As character" = "as_character", "As time series" = "ts",
+ "As date (mdy)" = "as_mdy", "As date (dmy)" = "as_dmy",
+ "As date (ymd)" = "as_ymd",
+ "As date/time (mdy_hms)" = "as_mdy_hms",
+ "As date/time (mdy_hm)" = "as_mdy_hm",
+ "As date/time (dmy_hms)" = "as_dmy_hms",
+ "As date/time (dmy_hm)" = "as_dmy_hm",
+ "As date/time (ymd_hms)" = "as_ymd_hms",
+ "As date/time (ymd_hm)" = "as_ymd_hm"
+)
+
+trans_types <- list(
+ ` ` = c("无(汇总)" = "none"),
+ `修改变量` = c(
+ "分箱" = "bin",
+ "更改类型" = "type",
+ "标准化" = "normalize",
+ "重编码" = "recode",
+ "重新排序/移除变量" = "reorg_levs",
+ "重命名" = "rename",
+ "替换" = "replace",
+ "转换" = "transform"
+ ),
+ `创建新变量` = c(
+ "剪贴板" = "clip",
+ "创建" = "create"
+ ),
+ `清洗数据` = c(
+ "移除缺失值" = "remove_na",
+ "重新排序或移除变量" = "reorg_vars",
+ "移除重复值" = "remove_dup",
+ "显示重复值" = "show_dup"
+ ),
+ `扩展数据` = c(
+ "扩展网格" = "expand",
+ "表格转数据" = "tab2dat"
+ ),
+ `拆分数据` = c(
+ "留存样本" = "holdout",
+ "训练变量" = "training"
+ ),
+ `整洁数据` = c(
+ "汇集列" = "gather",
+ "扩展列" = "spread"
+ )
+)
+
+output$ui_Transform <- renderUI({
+ ## Inspired by Ian Fellow's transform ui in JGR/Deducer
+ tagList(
+ wellPanel(
+ checkboxInput("tr_hide", i18n$t("Hide summaries"), state_init("tr_hide", FALSE)),
+ uiOutput("ui_tr_vars"),
+ selectizeInput("tr_change_type", i18n$t("Transformation type:"), trans_types, selected = "none"),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'type'",
+ selectInput("tr_typefunction", i18n$t("Change variable type:"), type_options, selected = "none"),
+ conditionalPanel(
+ condition = "input.tr_typefunction == 'ts'",
+ tags$table(
+ tags$td(numericInput("tr_ts_start_year", label = "Start year:", min = 1, value = NA)),
+ tags$td(numericInput("tr_ts_start_period", label = "Start period:", min = 1, value = 1))
+ ),
+ tags$table(
+ tags$td(numericInput("tr_ts_end_year", label = "End year:", value = NA)),
+ tags$td(numericInput("tr_ts_end_period", label = "End period:", value = NA))
+ ),
+ numericInput("tr_ts_frequency", label = "Frequency:", min = 1, value = 52)
+ )
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'transform'",
+ selectInput("tr_transfunction", i18n$t("Apply function:"), trans_options)
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'normalize'",
+ uiOutput("ui_tr_normalizer")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'tab2dat'",
+ uiOutput("ui_tr_tab2dat")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'gather'",
+ uiOutput("ui_tr_gather")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'spread'",
+ uiOutput("ui_tr_spread")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'create'",
+ returnTextAreaInput(
+ "tr_create", i18n$t("Create:"),
+ rows = 3,
+ placeholder = i18n$t("Type a formula to create a new variable (e.g., x = y - z) and press return")
+ )
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'bin'",
+ numericInput("tr_bin_n", label = i18n$t("Nr bins:"), min = 2, value = 10),
+ checkboxInput("tr_bin_rev", i18n$t("Reverse order"), value = FALSE),
+ uiOutput("ui_tr_ext_bin")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'training'",
+ tags$table(
+ tags$td(numericInput("tr_training_n", label = i18n$t("Size:"), min = 0, value = .7)),
+ tags$td(textInput("tr_training", i18n$t("Variable name:"), "training"))
+ ),
+ numericInput("tr_training_seed", label = i18n$t("Seed:"), value = 1234)
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'holdout'",
+ checkboxInput("tr_holdout_rev", i18n$t("Reverse filter and slice"), value = TRUE)
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'clip'",
+ textAreaInput(
+ "tr_paste", i18n$t("Paste from spreadsheet:"),
+ rows = 3,
+ value = "",
+ resize = "vertical",
+ placeholder = i18n$t("Copy-and-paste data with a header row from a spreadsheet"),
+ )
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'recode'",
+ returnTextAreaInput(
+ "tr_recode", i18n$t("Recode:"),
+ value = "",
+ rows = 3,
+ placeholder = i18n$t("Select a variable, specify how it should be recoded (e.g., lo:20 = 0; else = 1), and press return")
+ )
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'rename'",
+ uiOutput("ui_tr_rename")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'replace'",
+ uiOutput("ui_tr_replace")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'reorg_vars'",
+ uiOutput("ui_tr_reorg_vars")
+ ),
+ conditionalPanel(
+ condition = "input.tr_change_type == 'reorg_levs'",
+ uiOutput("ui_tr_reorg_levs")
+ ),
+ conditionalPanel(
+ "input.tr_change_type == 'transform'",
+ uiOutput("ui_tr_ext")
+ ),
+ conditionalPanel(
+ "input.tr_change_type == 'recode'",
+ uiOutput("ui_tr_rcname")
+ ),
+ conditionalPanel(
+ "input.tr_change_type == 'normalize'",
+ uiOutput("ui_tr_ext_nz")
+ ),
+ conditionalPanel(
+ "input.tr_change_type == 'reorg_levs'",
+ uiOutput("ui_tr_roname")
+ ),
+ conditionalPanel(
+ "input.tr_change_type == 'type'",
+ uiOutput("ui_tr_typename")
+ )
+ ),
+ conditionalPanel(
+ "input.tr_change_type != 'none'",
+ wellPanel(uiOutput("ui_tr_dataset"))
+ ),
+ help_and_report(
+ modal_title = "Transform",
+ fun_name = "transform",
+ help_file = inclMD(file.path(getOption("radiant.path.data"), "app/tools/help/transform.md")),
+ lic = "by-sa"
+ )
+ )
+})
+
+## ensure no variables are selected 'by accident' when creating a new variable
+observeEvent(input$tr_change_type, {
+ if (input$tr_change_type == "create") {
+ updateSelectInput(session = session, inputId = "tr_vars", label = "Group by:", selected = character(0))
+ } else if (input$tr_change_type == "training") {
+ updateSelectInput(session = session, inputId = "tr_vars", label = "Block by:", selected = character(0))
+ } else if (input$tr_change_type == "spread") {
+ updateSelectInput(session = session, inputId = "tr_vars", selected = character(0))
+ } else {
+ updateSelectInput(session = session, inputId = "tr_vars", label = i18n$t("Select variables:"))
+ }
+})
+
+fix_ext <- function(ext) {
+ gsub("(^\\s+|\\s+$)", "", ext) %>%
+ gsub("\\s+", "_", .) %>%
+ gsub("[[:punct:]]", "_", .) %>%
+ gsub("\\.{2,}", ".", .) %>%
+ gsub("_{2,}", "_", .)
+}
+
+.change_type <- function(dataset, fun, tr_ts, vars = "", .ext = "",
+ store_dat = "", store = TRUE) {
+ .ext <- fix_ext(.ext)
+
+ if (!is.empty(tr_ts)) {
+ tr_ts <- lapply(tr_ts, function(x) x[!is.na(x)]) %>%
+ (function(x) x[sapply(x, length) > 0])
+ }
+
+ if (!store || !is.character(dataset)) {
+ fun <- get(fun)
+ if (is.empty(.ext)) {
+ do.call(mutate_at, c(list(.tbl = dataset, .vars = vars), .funs = fun, tr_ts))
+ } else {
+ do.call(mutate_at, c(list(.tbl = dataset, .vars = vars), .funs = fun, tr_ts)) %>%
+ set_colnames(paste0(vars, .ext))
+ }
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ if (is.empty(tr_ts)) {
+ tr_ts <- ""
+ } else {
+ tr_ts <- deparse(tr_ts, control = getOption("dctrl"), width.cutoff = 500L) %>%
+ sub("list\\(", ", ", .) %>%
+ sub("\\)$", "", .)
+ }
+
+ if (is.empty(.ext)) {
+ paste0("## change variable type\n", store_dat, " <- mutate_at(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, tr_ts, ")\n")
+ } else {
+ paste0("## change variable type\n", store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, tr_ts, ", .ext = \"", .ext, "\")\n")
+ }
+ }
+}
+
+.transform <- function(dataset, fun, vars = "", .ext = "",
+ store_dat = "", store = TRUE) {
+ .ext <- fix_ext(.ext)
+
+ if (!store && !is.character(dataset)) {
+ fun <- get(fun)
+ if (is.empty(.ext)) {
+ result <- try(mutate_at(dataset, .vars = vars, .funs = fun), silent = TRUE)
+ } else {
+ result <- try(mutate_at(dataset, .vars = vars, .funs = fun) %>% set_colnames(paste0(vars, .ext)), silent = TRUE)
+ }
+ if (inherits(result, "try-error")) {
+ paste0("\nThe transformation type you selected generated an error.\n\nThe error message was:\n\n", attr(result, "condition")$message, "\n\nPlease change the selection of variables or the transformation type and try again.")
+ } else {
+ result
+ }
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ if (is.empty(.ext)) {
+ paste0("## transform variable\n", store_dat, " <- mutate_at(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, ")\n")
+ } else {
+ paste0("## transform variable\n", store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ", fun, ", .ext = \"", .ext, "\")\n")
+ }
+ }
+}
+
+.create <- function(dataset, cmd, byvar = "",
+ store_dat = "", store = TRUE) {
+ ## replacing problem symbols (e.g., em dash, and curly quotes)
+ cmd <- fix_smart(cmd)
+
+ if (!store || !is.character(dataset)) {
+ if (is.empty(cmd)) {
+ return(dataset)
+ }
+
+ cmd <- gsub("\"", "\'", cmd) %>%
+ gsub("<-", "=", .)
+ vars <- strsplit(cmd, ";\\s*")[[1]] %>%
+ strsplit("=") %>%
+ sapply("[", 1) %>%
+ gsub("\\s+", "", .)
+
+ ## in case the create command tries to over-write the group-by variable ...
+ if (any(byvar %in% vars)) {
+ byvar <- base::setdiff(byvar, vars)
+ updateSelectInput(session = session, inputId = "tr_vars", selected = character(0))
+ }
+
+ ## useful if functions created in Report > R and Report > Rmd are
+ ## called in Data > Transform > Create
+ ## add environment to do.call call instead?
+ ## https://stackoverflow.com/questions/26028488/do-call-specify-environment-inside-function
+ attach(r_data)
+ on.exit(detach(r_data))
+
+ if (is.empty(byvar)) {
+ ## using within and do.call because it provides better err messages
+ nvar <- try(do.call(within, list(dataset, parse(text = cmd))), silent = TRUE)
+ } else {
+ dots <- rlang::parse_exprs(cmd) %>%
+ set_names(vars)
+
+ nvar <- try(
+ group_by_at(dataset, .vars = byvar) %>%
+ mutate(!!!dots),
+ silent = TRUE
+ )
+ vars <- c(byvar, vars) ## to avoid the 'added group_by variable' message
+ }
+ if (inherits(nvar, "try-error")) {
+ paste0("\nThe create command was not valid. The command entered was:\n\n", cmd, "\n\nThe error message was:\n\n", attr(nvar, "condition")$message, "\n\nPlease try again. Examples are shown in the help file")
+ } else {
+ select_at(nvar, .vars = vars) %>%
+ ungroup()
+ }
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ cmd <- gsub(";", ", ", cmd) %>%
+ gsub("<-", "=", .) %>%
+ gsub("\\s{2,}", " ", .)
+
+ if (is.empty(byvar)) {
+ paste0("## create new variable(s)\n", store_dat, " <- mutate(", dataset, ", ", cmd, ")\n")
+ } else {
+ paste0("## create new variable(s)\n", store_dat, " <- group_by(", dataset, ", ", paste0(byvar, collapse = ", "), ") %>%\n mutate(", cmd, ") %>%\n ungroup()\n")
+ }
+ }
+}
+
+.recode <- function(dataset, var, cmd, rcname = "",
+ store_dat = "", store = TRUE) {
+ cmd <- cmd %>%
+ gsub("\\n", "", .) %>%
+ gsub("\"", "\'", .)
+ if (is.empty(rcname)) rcname <- paste0(var, "_rc")
+
+ if (!store || !is.character(dataset)) {
+ if (cmd == "") {
+ return(dataset)
+ }
+ nvar <- try(car::Recode(dataset[[var]], cmd), silent = TRUE)
+ if (inherits(nvar, "try-error")) {
+ paste0("The recode command was not valid. The error message was:\n", attr(nvar, "condition")$message, "\nPlease try again. Examples are shown in the help file (click the ? icon).")
+ } else {
+ as.data.frame(nvar, stringsAsFactors = FALSE) %>% setNames(rcname)
+ }
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ paste0("## recode variable\n", store_dat, " <- mutate(", dataset, ", ", rcname, " = car::Recode(", var, ", \"", cmd, "\"))\n")
+ }
+}
+
+.rename <- function(dataset, var, rnm, store_dat = "", store = TRUE) {
+ rnm <- gsub(";", ",", rnm)
+ if (gsub("\\s+", "", rnm) != "") {
+ rnm <- unlist(strsplit(rnm, ",")) %>%
+ .[1:min(length(.), length(var))] %>%
+ gsub("^\\s+|\\s+$", "", .)
+ }
+ rnm <- fix_names(rnm)
+
+ if (!store || !is.character(dataset)) {
+ if (all(rnm == "")) {
+ return(dataset)
+ }
+ names(dataset)[seq_len(length(rnm))] <- rnm
+ dataset
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ name_check <- fix_names(var) != var
+ if (any(name_check)) var[name_check] <- paste0("`", var[name_check], "`")
+ paste0("## rename variable(s)\n", store_dat, " <- dplyr::rename(", dataset, ", ", paste(rnm, var, sep = " = ", collapse = ", "), ")\n")
+ }
+}
+
+.replace <- function(dataset, var, rpl, store_dat = "", store = TRUE) {
+ if (!all(fix_names(var) == var) || !all(fix_names(rpl) == rpl)) {
+ return("\nSome of the variables names used are not valid. Please use 'Rename' to ensure\nvariable names do not have any spaces or symbols and start with a letter")
+ }
+
+ if (!store || !is.character(dataset)) {
+ select_at(dataset, .vars = rpl) %>% set_colnames(var)
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ paste0("## replace variable(s)\n", store_dat, " <- mutate(", dataset, ", ", paste(var, rpl, sep = " = ", collapse = ", "), ") %>% select(", paste0("-", rpl, collapse = ", "), ")\n")
+ }
+}
+
+.normalize <- function(dataset, vars, nzvar, .ext = paste0("_", nzvar),
+ store_dat = "", store = TRUE) {
+ .ext <- fix_ext(.ext)
+
+ if (!store && !is.character(dataset)) {
+ nz <- select_at(dataset, .vars = nzvar)
+ dataset <- select_at(dataset, .vars = vars)
+ dc <- get_class(dataset)
+
+ isnum <- "numeric" == dc | "integer" == dc
+ if (sum(isnum) == 0) {
+ return("Please select only integer or numeric variables to normalize")
+ }
+ vars <- vars[isnum]
+ select_at(dataset, .vars = vars) %>%
+ (function(x) x / nz[[1]]) %>%
+ set_colnames(paste0(vars, .ext))
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ paste0("## normalize variables\n", store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ~ normalize(., ", nzvar, "), .ext = \"", .ext, "\")\n")
+ }
+}
+
+.tab2dat <- function(dataset, freq, vars = "",
+ store_dat = "", store = TRUE) {
+ if (!store && !is.character(dataset)) {
+ if (is.empty(vars)) vars <- base::setdiff(colnames(dataset), freq)
+ select_at(dataset, .vars = unique(c(vars, freq))) %>%
+ table2data(freq)
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ if (is.empty(vars)) vars <- base::setdiff(colnames(r_data[[dataset]]), freq)
+ vars <- unique(c(vars, freq))
+ paste0("## Create data from a table\n", store_dat, " <- select(", dataset, ", ", paste0(vars, collapse = ", "), ") %>%\n table2data(\"", freq, "\")\n")
+ }
+}
+
+.gather <- function(dataset, vars, key, value,
+ store_dat = "", store = TRUE) {
+ key <- fix_names(key)
+ value <- fix_names(value)
+
+ if (!store && !is.character(dataset)) {
+ gather(dataset, !!key, !!value, !!vars, factor_key = TRUE)
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ paste0("## Gather columns\n", store_dat, " <- gather(", dataset, ", ", key, ", ", value, ", ", paste0(vars, collapse = ", "), ", factor_key = TRUE)\n")
+ }
+}
+
+.spread <- function(dataset, key, value, fill = NA,
+ vars = "", store_dat = "", store = TRUE) {
+ if (!store && !is.character(dataset)) {
+ if (!vars[1] == "") dataset <- select_at(dataset, .vars = vars)
+ cn <- colnames(dataset)
+ if (!all(key %in% cn) || !value %in% cn) {
+ return("Key or value variable is not in the dataset")
+ }
+ nr <- distinct_at(dataset, .vars = base::setdiff(cn, value), .keep_all = TRUE) %>%
+ nrow()
+ if (nr < nrow(dataset)) {
+ return("Rows are not unique. Select additional variables")
+ }
+ if (length(key) > 1) {
+ dataset <- unite_(dataset, paste(key, collapse = "_"), key)
+ key <- paste(key, collapse = "_")
+ }
+ spread(dataset, !!key, !!value, fill = fill)
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ cmd <- ""
+ if (!is.empty(vars)) {
+ cmd <- paste0("## Select columns\n", store_dat, " <- select(", dataset, ", ", paste0(vars, collapse = ", "), ")\n")
+ dataset <- store_dat
+ }
+ if (length(key) > 1) {
+ cmd <- paste0(cmd, "## Unite columns\n", store_dat, " <- unite(", dataset, ", ", paste(key, collapse = "_"), ", ", paste0(key, collapse = ", "), ")\n")
+ key <- paste(key, collapse = "_")
+ dataset <- store_dat
+ }
+ if (!is.na(fill)) {
+ paste0(cmd, "## Spread columns\n", store_dat, " <- spread(", dataset, ", ", key, ", ", value, ", fill = ", fill, ")\n")
+ } else {
+ paste0(cmd, "## Spread columns\n", store_dat, " <- spread(", dataset, ", ", key, ", ", value, ")\n")
+ }
+ }
+}
+
+.expand <- function(dataset, vars = "", store_dat = "", store = TRUE) {
+ if (!store || !is.character(dataset)) {
+ if (all(vars == "")) {
+ paste0("Select variables to expand")
+ } else {
+ expand.grid(level_list(select_at(dataset, .vars = vars)))
+ }
+ } else {
+ paste0("## expanding data\n", store_dat, " <- expand.grid(level_list(", dataset, ", ", paste0(vars, collapse = ", "), "))\n")
+ }
+}
+
+.bin <- function(dataset, vars = "", bins = 10, rev = FALSE,
+ .ext = "_dec", store_dat = "", store = TRUE) {
+ .ext <- fix_ext(.ext)
+
+ if (!store && !is.character(dataset)) {
+ if (is.na(bins) || !is.integer(bins)) {
+ return("Please specify the (integer) number of bins to use")
+ }
+ if (!all(sapply(dataset[, vars, drop = FALSE], is.numeric))) {
+ return("Binning can only be applied to numeric variables")
+ }
+ select_at(dataset, .vars = vars) %>%
+ mutate_all(~ xtile(., bins, rev = rev)) %>%
+ set_colnames(paste0(vars, .ext))
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ if (rev) {
+ paste0("## bin variables\n", store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ~ xtile(., ", bins, ", rev = TRUE), .ext = \"", .ext, "\")\n")
+ } else {
+ paste0("## bin variables\n", store_dat, " <- mutate_ext(", dataset, ", .vars = vars(", paste0(vars, collapse = ", "), "), .funs = ~ xtile(., ", bins, "), .ext = \"", .ext, "\")\n")
+ }
+ }
+}
+
+.training <- function(dataset, vars = "", n = .7, nr = 100,
+ name = "training", seed = 1234,
+ store_dat = "", store = TRUE) {
+ if (is.empty(name)) {
+ name <- "training"
+ } else {
+ name <- fix_names(name)
+ }
+ if (!store && !is.character(dataset)) {
+ n <- n %>%
+ (function(x) ifelse(x < 0 || is.na(x) || x > nr, 0.7, x))
+ if (is.empty(vars)) {
+ blocks <- NULL
+ } else {
+ blocks <- dataset[, vars]
+ }
+
+ make_train(n, nr, blocks = blocks, seed = seed) %>%
+ data.frame(stringsAsFactors = FALSE) %>%
+ setNames(name)
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ if (is.empty(vars)) {
+ paste0("## created variable to select training sample\n", store_dat, " <- mutate(", dataset, ", ", name, " = make_train(", n, ", n(), seed = ", seed, "))\n")
+ } else {
+ paste0("## created variable to select training sample\n", store_dat, " <- mutate(", dataset, ", ", name, " = make_train(", n, ", blocks = select(", dataset, ", ", paste0(vars, collapse = ", "), "), seed = ", seed, "))\n")
+ }
+ }
+}
+
+## Make a training variable that selects randomly by ID
+# http://rpackages.ianhowson.com/cran/dplyr/man/group_indices.html
+# http://rpackages.ianhowson.com/cran/dplyr/man/sample.html
+
+.reorg_levs <- function(dataset, fct, levs, repl = NA, name = fct,
+ store_dat = "", store = TRUE) {
+ if (is.empty(name)) name <- fct
+ if (!store || !is.character(dataset)) {
+ data.frame(refactor(dataset[[fct]], levs = levs, repl = repl), stringsAsFactors = FALSE) %>%
+ setNames(name)
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ repl <- if (is.na(repl)) "" else paste0(", repl = \"", repl, "\"")
+ paste0("## change factor levels\n", store_dat, " <- mutate(", dataset, ", ", name, " = refactor(", fct, ", levs = c(\"", paste0(levs, collapse = "\",\""), "\")", repl, "))\n")
+ }
+}
+
+.reorg_vars <- function(dataset, vars = "", store_dat = "", store = TRUE) {
+ if (!store || !is.character(dataset)) {
+ get_data(dataset, vars, filt = "", na.rm = FALSE, envir = r_data)
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ paste0("## reorder/remove variables\n", store_dat, " <- select(", dataset, ", ", paste0(vars, collapse = ", "), ")\n")
+ }
+}
+
+.remove_na <- function(dataset, vars = "", store_dat = "",
+ nr_col = 0, store = TRUE) {
+ if (!store || !is.character(dataset)) {
+ if (all(vars == "") || length(unique(vars)) == ncol(dataset)) {
+ dataset %>% filter(complete.cases(.))
+ } else {
+ ind <- select_at(dataset, .vars = vars) %>% complete.cases()
+ filter(dataset, ind)
+ }
+ } else {
+ if (store_dat == "") store_dat <- dataset
+ if (all(vars == "") || length(unique(vars)) == nr_col) vars <- "."
+ paste0("## remove missing values\n", store_dat, " <- ", dataset, " %>% filter(complete.cases(", paste0(vars, collapse = ", "), "))\n")
+ }
+}
+
+.remove_dup <- function(dataset, vars = "", store_dat = "",
+ nr_col = 0, store = TRUE) {
+ if (!store || !is.character(dataset)) {
+ if (all(vars == "") || length(unique(vars)) == ncol(dataset)) {
+ dat <- distinct(dataset)
+ } else {
+ dat <- distinct_at(dataset, .vars = vars, .keep_all = TRUE)
+ }
+
+ if (nrow(dat) == nrow(dataset)) {
+ paste0("No duplicates found (n_distinct = ", nrow(dat), ")")
+ } else {
+ dat
+ }
+ } else {
+ if (all(vars == "") || length(unique(vars)) == nr_col) {
+ paste0("## remove duplicate rows\n", store_dat, " <- distinct(", dataset, ")\n")
+ } else {
+ paste0("## remove rows with duplicate values\n", store_dat, " <- distinct(", dataset, ", ", paste0(vars, collapse = ", "), ", .keep_all = TRUE)\n")
+ }
+ }
+}
+
+.show_dup <- function(dataset, vars = "", store_dat = "",
+ nr_col = 0, store = TRUE) {
+ if (!store || !is.character(dataset)) {
+ if (all(vars == "") || length(unique(vars)) == ncol(dataset)) {
+ dat <- filter(dataset, duplicated(dataset))
+ } else {
+ dat <- dataset %>%
+ group_by_at(.vars = vars) %>%
+ filter(n() > 1)
+
+ if (nrow(dat) > 0) {
+ dat <- mutate(dat, nr_dup = 1:n()) %>%
+ arrange_at(.vars = vars) %>%
+ ungroup()
+ }
+ }
+
+ if (nrow(dat) == 0) {
+ ## "No duplicates found"
+ paste0("No duplicates found (n_distinct = ", nrow(dataset), ")")
+ } else {
+ dat
+ }
+ } else {
+ if (all(vars == "") || length(unique(vars)) == nr_col) {
+ paste0("## show duplicate rows\n", store_dat, " <- ", dataset, " %>% filter(duplicated(.))\n")
+ } else {
+ paste0("## show rows with duplicate values\n", store_dat, " <- show_duplicated(", dataset, ", ", paste0(vars, collapse = ", "), ")\n")
+ }
+ }
+}
+
+.holdout <- function(dataset, vars = "", filt = "", arr = "", rows = NULL, rev = FALSE,
+ store_dat = "", store = TRUE) {
+ if (is.empty(filt) && is.empty(rows)) {
+ return(paste0("No filter or slice found (n = ", nrow(dataset), ")"))
+ }
+
+ if (!store || !is.character(dataset)) {
+ get_data(dataset, vars = vars, filt = filt, arr = arr, rows = rows, na.rm = FALSE, rev = rev, envir = r_data)
+ } else {
+ cmd <- glue("## create holdout sample\n{store_dat} <- get_data(\n {dataset}") # ", vars = {vars}, filt = {filt}, arr = {arr}, rows = {rows}, rev = {rev})\n")
+
+ if (!all(vars == "")) {
+ cmd <- glue('{cmd},\n vars = c("{paste0(vars, collapse = ", ")}")', .trim = FALSE)
+ }
+ if (!is.empty(filt)) {
+ filt <- gsub("\"", "'", filt)
+ cmd <- glue('{cmd},\n filt = "{filt}"', .trim = FALSE)
+ }
+ if (!is.empty(arr)) {
+ cmd <- glue('{cmd},\n arr = "{arr}"', .trim = FALSE)
+ }
+ if (!is.empty(rows)) {
+ cmd <- glue('{cmd},\n rows = "{rows}"', .trim = FALSE)
+ }
+ glue("{cmd},\n rev = {rev}\n)", .trim = FALSE)
+ }
+}
+
+inp_vars <- function(inp, rval = "") {
+ if (is.empty(input[[inp]]) || !available(input[[inp]])) rval else input[[inp]]
+}
+
+transform_main <- reactive({
+ req(input$tr_change_type)
+ if (not_available(input$tr_vars)) {
+ if (input$tr_change_type == "none" && length(input$tr_vars) == 0) {
+ return("Select a transformation type or select variables to summarize")
+ } else if (input$tr_change_type == "none" && length(input$tr_vars) > 0) {
+ return("Select a transformation type or select variables to summarize")
+ } else if (input$tr_change_type == "type") {
+ return("Select one or more variables to change their type")
+ } else if (input$tr_change_type == "transform") {
+ return("Select one or more variables to apply a transformation")
+ } else if (input$tr_change_type == "rename") {
+ return("Select one or more variables to rename")
+ } else if (input$tr_change_type == "replace") {
+ return("Select one or more variables to replace")
+ } else if (input$tr_change_type == "recode") {
+ return("Select a variable to recode")
+ } else if (input$tr_change_type == "bin") {
+ return("Select one or more variables to bin")
+ } else if (input$tr_change_type == "reorg_levs") {
+ return("Select a single variable of type factor to change the ordering and/or number of levels")
+ } else if (input$tr_change_type == "normalize") {
+ return("Select one or more variables to normalize")
+ } else if (input$tr_change_type == "remove_na") {
+ return("Select one or more variables to see the effects of removing missing values")
+ } else if (input$tr_change_type %in% c("remove_dup", "show_dup")) {
+ return("Select one or more variables to see the effects of removing duplicates")
+ } else if (input$tr_change_type == "gather") {
+ return("Select one or more variables to gather")
+ } else if (input$tr_change_type == "expand") {
+ return("Select one or more variables to expand")
+ }
+ }
+
+ ## get the active dataset, filter not applied when called from transform tab
+ dat <- .get_data_transform()
+
+ ## what data to pass on ...
+ if (input$tr_change_type %in% c("", "none")) {
+ return(select_at(dat, .vars = input$tr_vars))
+ }
+
+ ## reorganize variables
+ if (input$tr_change_type == "reorg_vars") {
+ return(.reorg_vars(dat, inp_vars("tr_reorg_vars"), store = FALSE))
+ }
+
+ ## create training variable
+ if (input$tr_change_type == "training") {
+ return(.training(dat, n = input$tr_training_n, nr = nrow(dat), name = input$tr_training, vars = inp_vars("tr_vars"), seed = input$tr_training_seed, store = FALSE))
+ }
+
+ if (input$tr_change_type == "create") {
+ if (input$tr_create == "") {
+ return("Specify an equation to create a new variable and press 'return'. **\n** See the help file for examples")
+ } else {
+ return(.create(dat, input$tr_create, byvar = inp_vars("tr_vars"), store = FALSE))
+ }
+ }
+
+ if (input$tr_change_type == "tab2dat") {
+ if (is.null(input$tr_tab2dat) || input$tr_tab2dat == "none") {
+ return("Select a frequency variable")
+ } else if (!is.empty(input$tr_vars) && all(input$tr_vars == input$tr_tab2dat)) {
+ return("Select at least one variable that is not the frequency variable")
+ } else {
+ req(available(input$tr_tab2dat))
+ return(.tab2dat(dat, input$tr_tab2dat, vars = inp_vars("tr_vars"), store = FALSE))
+ }
+ }
+
+ if (input$tr_change_type == "clip") {
+ if (input$tr_paste == "") {
+ return("Copy-and-paste data with a header row from a spreadsheet")
+ } else {
+ cpdat <- try(read.table(header = TRUE, comment.char = "", fill = TRUE, sep = "\t", as.is = TRUE, text = input$tr_paste), silent = TRUE)
+ if (inherits(cpdat, "try-error")) {
+ return("The pasted data was not well formatted. Please make sure the number of rows **\n** in the data in Radiant and in the spreadsheet are the same and try again.")
+ } else if (nrow(cpdat) != nrow(dat)) {
+ return("The pasted data does not have the correct number of rows. Please make sure **\n** the number of rows in the data in Radiant and in the spreadsheet are the **\n** same and try again.")
+ } else {
+ return(as.data.frame(cpdat, check.names = FALSE, stringsAsFactors = FALSE) %>% to_fct())
+ }
+ }
+ }
+
+ ## filter data for holdout
+ if (input$tr_change_type == "holdout") {
+ if (!input$show_filter) {
+ return("\nNo filter, arrange, or slice set. Click the 'Filter' checkbox and enter a\nfilter, arrange, and/or a slice of rows to keep as the main data. The holdout\nwill have have all rows not selected by the filter, arrange, and slice")
+ }
+ return(.holdout(dat, inp_vars("tr_vars"), filt = input$data_filter, arr = input$data_arrange, rows = input$data_rows, rev = input$tr_holdout_rev, store = FALSE))
+ }
+
+ ## spread a variable
+ if (input$tr_change_type == "spread") {
+ if (is.empty(input$tr_spread_key, "none") ||
+ is.empty(input$tr_spread_value, "none")) {
+ return("Select a Key and Value pair to spread")
+ }
+ return(.spread(dat, key = input$tr_spread_key, value = input$tr_spread_value, fill = input$tr_spread_fill, vars = inp_vars("tr_vars"), store = FALSE))
+ }
+
+ ## only use the functions below if variables have been selected
+ if (!is.empty(input$tr_vars)) {
+ if (not_available(input$tr_vars)) {
+ return()
+ }
+
+ ## remove missing values
+ if (input$tr_change_type == "remove_na") {
+ return(.remove_na(dat, inp_vars("tr_vars"), store = FALSE))
+ }
+
+ ## bin variables
+ if (input$tr_change_type == "bin") {
+ return(.bin(dat, inp_vars("tr_vars"), bins = input$tr_bin_n, rev = input$tr_bin_rev, .ext = input$tr_ext_bin, store = FALSE))
+ }
+
+ ## gather variables
+ if (input$tr_change_type == "gather") {
+ if (is.empty(input$tr_gather_key) || is.empty(input$tr_gather_value)) {
+ return("Provide a name for the Key and Value variables")
+ }
+ return(.gather(dat, inp_vars("tr_vars"), key = input$tr_gather_key, value = input$tr_gather_value, store = FALSE))
+ }
+
+ ## remove duplicates
+ if (input$tr_change_type == "remove_dup") {
+ return(.remove_dup(dat, inp_vars("tr_vars"), store = FALSE))
+ }
+
+ ## expand grid
+ if (input$tr_change_type == "expand") {
+ return(.expand(dat, inp_vars("tr_vars"), store = FALSE))
+ }
+
+ ## show duplicates
+ if (input$tr_change_type == "show_dup") {
+ return(.show_dup(dat, inp_vars("tr_vars"), store = FALSE))
+ }
+
+ if (input$tr_change_type == "normalize") {
+ if (is.empty(input$tr_normalizer, "none")) {
+ return("Select a normalizing variable")
+ } else {
+ return(.normalize(dat, inp_vars("tr_vars"), input$tr_normalizer, .ext = input$tr_ext_nz, store = FALSE))
+ }
+ }
+
+ if (input$tr_change_type == "replace") {
+ vars <- input$tr_vars
+ rpl <- input$tr_replace
+ if (available(rpl)) {
+ if (length(vars) != length(rpl)) {
+ return(paste0("The number of replacement variables (", length(rpl), ") is not equal to the number of variables to replace (", length(vars), ")"))
+ }
+ return(.replace(dat, vars, rpl, store = FALSE))
+ } else {
+ return("Select one or more variable replacements")
+ }
+ }
+
+ ## selecting the columns to show
+ dat <- select_at(dat, .vars = input$tr_vars)
+ vars <- colnames(dat)
+
+ ## change in type is always done in-place
+ if (input$tr_change_type == "type") {
+ if (input$tr_typefunction == "none") {
+ return("Select a transformation type for the selected variables")
+ } else {
+ if (input$tr_typefunction == "ts") {
+ tr_ts <- list(
+ start = c(input$tr_ts_start_year, input$tr_ts_start_period),
+ end = c(input$tr_ts_end_year, input$tr_ts_end_period),
+ frequency = input$tr_ts_frequency
+ )
+ } else {
+ tr_ts <- NULL
+ }
+ return(.change_type(dat, input$tr_typefunction, tr_ts, inp_vars("tr_vars"), input$tr_typename, store = FALSE))
+ }
+ }
+
+ ## change in type is always done in-place
+ if (input$tr_change_type == "transform") {
+ if (input$tr_transfunction == "none") {
+ return("Select a function to apply to the selected variable(s)")
+ } else {
+ return(.transform(dat, input$tr_transfunction, inp_vars("tr_vars"), input$tr_ext, store = FALSE))
+ }
+ }
+
+ if (input$tr_change_type == "reorg_levs") {
+ fct <- input$tr_vars[1]
+ if (length(unique(dat[[fct]])) > 100) {
+ return("Interactive re-ordering is only supported up to 100 levels. See\n?radiant.data::refactor for information on how to re-order levels in R")
+ } else {
+ return(.reorg_levs(dat, fct, input$tr_reorg_levs, input$tr_rorepl, input$tr_roname, store = FALSE))
+ }
+ }
+
+ if (input$tr_change_type == "recode") {
+ if (is.empty(input$tr_recode)) {
+ return("Specify a recode statement, assign a name to the recoded variable, and press 'return'. **\n** See the help file for examples")
+ } else {
+ return(.recode(dat, inp_vars("tr_vars")[1], input$tr_recode, input$tr_rcname, store = FALSE))
+ }
+ }
+
+ if (input$tr_change_type == "rename") {
+ if (is.empty(input$tr_rename)) {
+ return("Specify new names for the selected variables (separated by a ',') and press 'return'")
+ } else {
+ if (any(input$tr_rename %in% varnames())) {
+ return("One or more of the new variables names already exists in the data. **\n** Change the specified names or use the Replace function")
+ } else {
+ return(.rename(dat, inp_vars("tr_vars"), input$tr_rename, store = FALSE))
+ }
+ }
+ }
+ }
+
+ return(invisible())
+})
+
+output$transform_data <- reactive({
+ dataset <- transform_main()
+ if (is.null(dataset) || is.character(dataset) || nrow(dataset) == 0 || ncol(dataset) == 0) {
+ tr_snippet()
+ } else {
+ show_data_snippet(dataset)
+ }
+})
+
+tr_snippet <- reactive({
+ show_data_snippet(.get_data_transform())
+})
+
+output$transform_summary <- renderPrint({
+ req(!isTRUE(input$tr_hide))
+
+ withProgress(message = "Generating summary statistics", value = 1, {
+ dataset <- transform_main()
+ })
+
+ ## with isolate on the summary wouldn't update when the dataset was changed
+ if (is.null(dataset)) {
+ return(invisible())
+ }
+ if (is.character(dataset)) {
+ cat("**", dataset, "\n**\n\n")
+ } else {
+ if (min(dim(dataset)) == 0) {
+ cat("** The selected operation resulted in an empty data frame and cannot be executed **\n\n")
+ } else {
+ if (input$tr_change_type %in% c("", "none")) {
+ cat("** Select a transformation type or select variables to summarize **\n\n")
+ } else {
+ cat("** Press the 'Store' button to add your changes to the data **\n\n")
+ if (!is.empty(input$tr_vars) && input$tr_change_type == "create") {
+ cat("** Results are grouped by", paste(input$tr_vars, collapse = ", "), "**\n\n")
+ } else if (!is.empty(input$tr_vars) && input$tr_change_type == "training") {
+ cat("** Results are blocked by", paste(input$tr_vars, collapse = ", "), "**\n\n")
+ }
+ }
+
+ if (input$tr_change_type == "reorg_vars") {
+ cat("** Drag-and-drop to change ordering. Click the x to remove a variable **")
+ } else {
+ cat(paste0(capture.output(get_summary(dataset)), collapse = "\n"))
+ }
+ }
+ }
+})
+
+observeEvent(input$tr_store, {
+ withProgress(message = "Storing transformations", value = 1, {
+ dat <- transform_main()
+ })
+
+ if (is.null(dat)) {
+ return()
+ } else if (is.character(dat)) {
+ return()
+ } else if (min(dim(dat)) == 0) {
+ return()
+ }
+
+ ## saving to a new dataset if specified
+ df_name <- fix_names(input$tr_name)
+ if (input$tr_name != df_name) {
+ updateTextInput(session, inputId = "tr_name", value = df_name)
+ }
+ ncmd <- ""
+ if (is.null(r_data[[df_name]])) {
+ r_data[[df_name]] <- .get_data_transform()
+ r_info[[paste0(df_name, "_descr")]] <- r_info[[paste0(input$dataset, "_descr")]]
+ if (!bindingIsActive(as.symbol(df_name), env = r_data)) {
+ shiny::makeReactiveBinding(df_name, env = r_data)
+ }
+ r_info[["datasetlist"]] %<>% c(df_name, .) %>% unique()
+
+ ## adding command to ensure new data is in the datasetlist
+ if (df_name == input$dataset) {
+ ncmd <- paste0("\n## register the new dataset\nregister(\"", df_name, "\")")
+ } else {
+ ncmd <- paste0("\n## register the new dataset\nregister(\"", df_name, "\", \"", input$dataset, "\")")
+ }
+ } else if (!df_name %in% r_info[["datasetlist"]]) {
+ r_info[["datasetlist"]] %<>% c(df_name, .) %>% unique()
+
+ ## adding command to ensure new data is in the datasetlist
+ if (df_name == input$dataset) {
+ ncmd <- paste0("\n## register the new dataset\nregister(\"", df_name, "\")")
+ } else {
+ ncmd <- paste0("\n## register the new dataset\nregister(\"", df_name, "\", \"", input$dataset, "\")")
+ }
+ }
+
+ if (input$tr_change_type == "remove_na") {
+ cmd <- .remove_na(input$dataset, vars = input$tr_vars, df_name, nr_col = ncol(dat))
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "remove_dup") {
+ cmd <- .remove_dup(input$dataset, vars = input$tr_vars, df_name, nr_col = ncol(dat))
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "show_dup") {
+ cmd <- .show_dup(input$dataset, vars = input$tr_vars, df_name, nr_col = ncol(dat))
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "holdout") {
+ cmd <- .holdout(input$dataset, vars = input$tr_vars, filt = input$data_filter, arr = input$data_arrange, rows = input$data_rows, rev = input$tr_holdout_rev, df_name)
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "tab2dat") {
+ cmd <- .tab2dat(input$dataset, input$tr_tab2dat, vars = input$tr_vars, df_name)
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "gather") {
+ cmd <- .gather(input$dataset, vars = input$tr_vars, key = input$tr_gather_key, value = input$tr_gather_value, df_name)
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "spread") {
+ cmd <- .spread(input$dataset, key = input$tr_spread_key, value = input$tr_spread_value, fill = input$tr_spread_fill, vars = input$tr_vars, df_name)
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "expand") {
+ cmd <- .expand(input$dataset, vars = input$tr_vars, df_name)
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "reorg_vars") {
+ cmd <- .reorg_vars(input$dataset, vars = input$tr_reorg_vars, df_name)
+ r_data[[df_name]] <- dat
+ } else if (input$tr_change_type == "type") {
+ if (input$tr_typefunction == "ts") {
+ tr_ts <- list(
+ start = c(input$tr_ts_start_year, input$tr_ts_start_period),
+ end = c(input$tr_ts_end_year, input$tr_ts_end_period),
+ frequency = input$tr_ts_frequency
+ )
+ } else {
+ tr_ts <- NULL
+ }
+ cmd <- .change_type(input$dataset, fun = input$tr_typefunction, tr_ts, vars = input$tr_vars, .ext = input$tr_typename, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "transform") {
+ cmd <- .transform(input$dataset, fun = input$tr_transfunction, vars = input$tr_vars, .ext = input$tr_ext, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "training") {
+ cmd <- .training(input$dataset, n = input$tr_training_n, nr = nrow(dat), name = input$tr_training, vars = input$tr_vars, seed = input$tr_training_seed, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "normalize") {
+ cmd <- .normalize(input$dataset, vars = input$tr_vars, nzvar = input$tr_normalizer, .ext = input$tr_ext_nz, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "bin") {
+ cmd <- .bin(input$dataset, vars = input$tr_vars, bins = input$tr_bin_n, rev = input$tr_bin_rev, .ext = input$tr_ext_bin, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "reorg_levs") {
+ cmd <- .reorg_levs(input$dataset, input$tr_vars[1], input$tr_reorg_levs, input$tr_rorepl, input$tr_roname, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "recode") {
+ cmd <- .recode(input$dataset, input$tr_vars[1], input$tr_recode, input$tr_rcname, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "rename") {
+ cmd <- .rename(input$dataset, input$tr_vars, input$tr_rename, df_name)
+ r_data[[df_name]] %<>% dplyr::rename(!!!setNames(input$tr_vars, colnames(dat)))
+ } else if (input$tr_change_type == "create") {
+ cmd <- .create(input$dataset, cmd = input$tr_create, byvar = input$tr_vars, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ } else if (input$tr_change_type == "replace") {
+ cmd <- .replace(input$dataset, input$tr_vars, input$tr_replace, df_name)
+ r_data[[df_name]][, colnames(dat)] <- dat
+ r_data[[df_name]][, input$tr_replace] <- list(NULL)
+ } else if (input$tr_change_type == "clip") {
+ cmd <- paste0("## using the clipboard for data transformation may seem convenient]\n## but it is not 'reproducible' - no command generated\n")
+ r_data[[df_name]][, colnames(dat)] <- dat
+ }
+
+ ## uncomment if you want to revert to resetting the transform UI after Store
+ # updateTextAreaInput(session, "tr_log", value = paste0(input$tr_log, paste0(cmd, ncmd), "\n"))
+
+ ## update the command log
+ shinyAce::updateAceEditor(session, "tr_log", value = paste0(input$tr_log, paste0(cmd, ncmd), "\n"))
+
+ ## reset input values once the changes have been applied
+ # updateSelectInput(session = session, inputId = "tr_change_type", selected = "none")
+
+ ## jumps straight to the new dataset
+ # updateSelectInput(session = session, inputId = "dataset", selected = df_name)
+ if (input$dataset != df_name) {
+ showModal(
+ modalDialog(
+ title = "Data Stored",
+ span(
+ paste0("Dataset '", df_name, "' was successfully added to
+ the datasets dropdown. Add code to Report > Rmd or
+ Report > R to (re)create the results by clicking the
+ report icon on the bottom left of your screen.")
+ ),
+ footer = modalButton("OK"),
+ size = "m",
+ easyClose = TRUE
+ )
+ )
+ }
+})
+
+observeEvent(input$tr_change_type, {
+ ## reset all values when tr_change_type is changed
+ updateTextInput(session = session, inputId = "tr_create", value = "")
+ updateTextInput(session = session, inputId = "tr_recode", value = "")
+ updateTextInput(session = session, inputId = "tr_rename", value = "")
+ updateTextInput(session = session, inputId = "tr_paste", value = "")
+ updateTextInput(session = session, inputId = "tr_gather_key", value = "")
+ updateTextInput(session = session, inputId = "tr_gather_value", value = "")
+ updateTextInput(session = session, inputId = "tr_spread_key", value = "")
+ updateTextInput(session = session, inputId = "tr_spread_value", value = "")
+ updateSelectInput(session = session, inputId = "tr_typefunction", selected = "none")
+ updateSelectInput(session = session, inputId = "tr_transfunction", selected = "none")
+ updateSelectInput(session = session, inputId = "tr_replace", selected = "None")
+ updateSelectInput(session = session, inputId = "tr_normalizer", selected = "none")
+ updateSelectInput(session = session, inputId = "tr_tab2dat", selected = "none")
+})
+
+transform_report <- function() {
+ cmd <- NULL
+ if (!is.empty(input$tr_log)) {
+ cmd <- gsub("\n{2,}", "\n", input$tr_log) %>%
+ sub("^\n", "", .) %>%
+ sub("\n$", "", .)
+
+ shinyAce::updateAceEditor(session, "tr_log", value = "")
+ }
+ update_report(cmd = cmd, outputs = NULL, figs = FALSE)
+}
+
+observeEvent(input$transform_report, {
+ r_info[["latest_screenshot"]] <- NULL
+ transform_report()
+})
+
+observeEvent(input$transform_screenshot, {
+ r_info[["latest_screenshot"]] <- NULL
+ radiant_screenshot_modal("modal_transform_screenshot")
+})
+
+observeEvent(input$modal_transform_screenshot, {
+ transform_report()
+ removeModal()
+})
\ No newline at end of file