RDocumentation: sdf_expand_grid

Note that this notebook was automatically generated from an RDocumentation page. It depends on the package and the example code whether this code will run without errors. You may need to edit the code to make things work.

if(!require('sparklyr')) {
    install.packages('sparklyr')
    library('sparklyr')
}

# }
sc <- spark_connect(master = "local")
grid_sdf <- sdf_expand_grid(sc, seq(5), rnorm(10), letters)
# }

grid_sdf

c(seq(5), rnorm(10), letters)

library("tibble")

data <- tibble(
  id = c(1, 1, 2, 2),
  dates = c("2020-01-02", "2020-01-04", "2020-01-01", "2020-01-03"),
  values = c(1, 2, 3, 4)
)

data_spark <- copy_to(sc, data)

data_spark

data_spark%>%select(values)%>%distinct

library("dplyr")
days_info <-
  data_spark %>%
  summarise(
    first_date = min(dates),
    total_days = datediff(max(dates), min(dates))
  ) %>% collect()
days_info

dates_id_combinations <- 
  sdf_seq(
    sc,
    from = 0,
    to = days_info$total_days,
    repartition = 1
  ) %>%
  transmute(
    dates = date_add(!!days_info$first_date, id),
    join_by = TRUE
  ) %>%
  full_join(data_spark %>% distinct(id) %>% mutate(join_by = TRUE)) 
# %>%
#   select(dates, id)

data_spark %>% distinct(id) %>% mutate(join_by = TRUE)

head(dates_id_combinations)

1:5