Unnest nested lists into a flat data.frames.
Usage
unnest(
x,
spec = NULL,
dedupe = FALSE,
stack_atomic = NULL,
process_atomic = NULL,
process_unnamed_lists = NULL,
cross_join = TRUE
)
Arguments
- x
a nested list to unnest
- spec
spec to use for unnesting. See
spec()
.- dedupe
whether to dedupe repeated elements. If TRUE, if a node is visited for a second time and is not explicitly declared in the
spec
the node is skipped. This is particularly useful withgroup
ed specs.- stack_atomic
Whether atomic leaf vectors should be stacked or not. If NULL, the default, data.frame vectors are stacked, all others are spread.
- process_atomic
Process spec for atomic leaf vectors. Either NULL for no processing (the default), "as_is" to return the entire element in a list column, "paste" to paste elements together into a character column.
- process_unnamed_lists
How to process unnamed lists. Can be one of "as_is" - return a list column, "exclude" - drop these elements unless they are explicitly included in the spec, "paste" - return a character column, "stack" - automatically stack. If NULL (the default), do nothing - process them normally according to the specs.
- cross_join
Specifies how the results from sibling nodes are joined (
cbind
ed) together. The shorter data.frames (fewer rows) can be either recycled to the max number of rows across all joined components withcross_join = FALSE
. Or, the results are cross joined (produce all combinations of rows across all components) withcross_join = TRUE
.cross_join = TRUE
is the default because of no data loss and it is more conducive for earlier error detection with incorrect specs
Value
A data.frame
, data.table
or a tibble
as specified by the option
unnest.return.type
. Defaults to data.frame
.
Examples
x <- list(a = list(b = list(x = 1, y = 1:2, z = 10),
c = list(x = 2, y = 100:102)))
xxx <- list(x, x, x)
## spreading
unnest(x, s("a"))
#> a.b.x a.b.y a.b.y.2 a.b.z a.c.x a.c.y a.c.y.2 a.c.y.3
#> 1 1 1 2 10 2 100 101 102
unnest(x, s("a"), stack_atomic = TRUE)
#> a.b.x a.b.y a.b.z a.c.x a.c.y
#> 1 1 1 10 2 100
#> 2 1 2 10 2 101
#> 3 1 1 10 2 102
#> 4 1 2 10 2 100
#> 5 1 1 10 2 101
#> 6 1 2 10 2 102
unnest(x, s("a/b"), stack_atomic = TRUE)
#> a.b.x a.b.y a.b.z
#> 1 1 1 10
#> 2 1 2 10
unnest(x, s("a/c"), stack_atomic = TRUE)
#> a.c.x a.c.y
#> 1 2 100
#> 2 2 101
#> 3 2 102
unnest(x, s("a"), stack_atomic = TRUE, cross_join = TRUE)
#> a.b.x a.b.y a.b.z a.c.x a.c.y
#> 1 1 1 10 2 100
#> 2 1 2 10 2 101
#> 3 1 1 10 2 102
#> 4 1 2 10 2 100
#> 5 1 1 10 2 101
#> 6 1 2 10 2 102
unnest(x, s("a//x"))
#> a.b.x a.c.x
#> 1 1 2
unnest(x, s("a//x,z"))
#> a.b.x a.b.z a.c.x
#> 1 1 10 2
unnest(x, s("a/2/x,y"))
#> a.c.x a.c.y a.c.y.2 a.c.y.3
#> 1 2 100 101 102
## stacking
unnest(x, s("a/", stack = TRUE))
#> a.x a.y a.y.2 a.y.3 a.z
#> 1 1 1 2 NA 10
#> 2 2 100 101 102 NA
unnest(x, s("a/", stack = TRUE, as = "A"))
#> A.x A.y A.y.2 A.y.3 A.z
#> 1 1 1 2 NA 10
#> 2 2 100 101 102 NA
unnest(x, s("a/", stack = TRUE, as = "A"), stack_atomic = TRUE)
#> A.x A.y A.z
#> 1 1 1 10
#> 2 1 2 10
#> 3 2 100 NA
#> 4 2 101 NA
#> 5 2 102 NA
unnest(x, s("a/", stack = "id"), stack_atomic = TRUE)
#> a.id a.x a.y a.z
#> 1 b 1 1 10
#> 2 b 1 2 10
#> 3 c 2 100 NA
#> 4 c 2 101 NA
#> 5 c 2 102 NA
unnest(x, s("a/", stack = "id", as = ""), stack_atomic = TRUE)
#> id x y z
#> 1 b 1 1 10
#> 2 b 1 2 10
#> 3 c 2 100 NA
#> 4 c 2 101 NA
#> 5 c 2 102 NA
unnest(xxx, s(stack = "id"))
#> a.b.x a.b.y a.b.y.2 a.b.z a.c.x a.c.y a.c.y.2 a.c.y.3 id
#> 1 1 1 2 10 2 100 101 102 1
#> 2 1 1 2 10 2 100 101 102 2
#> 3 1 1 2 10 2 100 101 102 3
unnest(xxx, s(stack = "id"), stack_atomic = TRUE)
#> a.b.x a.b.y a.b.z a.c.x a.c.y id
#> 1 1 1 10 2 100 1
#> 2 1 2 10 2 101 1
#> 3 1 1 10 2 102 1
#> 4 1 2 10 2 100 1
#> 5 1 1 10 2 101 1
#> 6 1 2 10 2 102 1
#> 7 1 1 10 2 100 2
#> 8 1 2 10 2 101 2
#> 9 1 1 10 2 102 2
#> 10 1 2 10 2 100 2
#> 11 1 1 10 2 101 2
#> 12 1 2 10 2 102 2
#> 13 1 1 10 2 100 3
#> 14 1 2 10 2 101 3
#> 15 1 1 10 2 102 3
#> 16 1 2 10 2 100 3
#> 17 1 1 10 2 101 3
#> 18 1 2 10 2 102 3
unnest(xxx, s(stack = "id", s("a/b/y/", stack = TRUE)))
#> a.b.y id
#> 1 1 1
#> 2 2 1
#> 3 1 2
#> 4 2 2
#> 5 1 3
#> 6 2 3
## exclusion
unnest(x, s("a/b/", exclude = "x"))
#> a.b.y a.b.y.2 a.b.z
#> 1 1 2 10
## dedupe
unnest(x, s("a", s("b/y"), s("b")), stack_atomic = TRUE)
#> a.b.x a.b.y a.b.y a.b.z
#> 1 1 1 1 10
#> 2 1 2 2 10
#> 3 1 1 1 10
#> 4 1 2 2 10
unnest(x, s("a", s("b/y"), s("b")), dedupe = TRUE, stack_atomic = TRUE)
#> a.b.x a.b.y a.b.z
#> 1 1 1 10
#> 2 1 2 10
## grouping
unnest(xxx, stack_atomic = TRUE,
s(stack = TRUE,
groups = list(first = s("a/b/x,y"),
second = s("a/b"))))
#> $first
#> a.b.x a.b.y
#> 1 1 1
#> 2 1 2
#> 3 1 1
#> 4 1 2
#> 5 1 1
#> 6 1 2
#>
#> $second
#> a.b.x a.b.y a.b.z
#> 1 1 1 10
#> 2 1 2 10
#> 3 1 1 10
#> 4 1 2 10
#> 5 1 1 10
#> 6 1 2 10
#>
unnest(xxx, stack_atomic = TRUE, dedupe = TRUE,
s(stack = TRUE,
groups = list(first = s("a/b/x,y"),
second = s("a/b"))))
#> $first
#> a.b.x a.b.y
#> 1 1 1
#> 2 1 2
#> 3 1 1
#> 4 1 2
#> 5 1 1
#> 6 1 2
#>
#> $second
#> a.b.z
#> 1 10
#> 2 10
#> 3 10
#>
## processing as_is
str(unnest(xxx, s(stack = "id",
s("a/b/y", process = "as_is"),
s("a/c", process = "as_is"))))
#> 'data.frame': 3 obs. of 3 variables:
#> $ a.b.y:List of 3
#> ..$ : int 1 2
#> ..$ : int 1 2
#> ..$ : int 1 2
#> $ a.c :List of 3
#> ..$ :List of 2
#> .. ..$ x: num 2
#> .. ..$ y: int 100 101 102
#> ..$ :List of 2
#> .. ..$ x: num 2
#> .. ..$ y: int 100 101 102
#> ..$ :List of 2
#> .. ..$ x: num 2
#> .. ..$ y: int 100 101 102
#> $ id : int 1 2 3
str(unnest(xxx, s(stack = "id", s("a/b/", process = "as_is"))))
#> 'data.frame': 3 obs. of 4 variables:
#> $ a.b.x:List of 3
#> ..$ : num 1
#> ..$ : num 1
#> ..$ : num 1
#> $ a.b.y:List of 3
#> ..$ : int 1 2
#> ..$ : int 1 2
#> ..$ : int 1 2
#> $ a.b.z:List of 3
#> ..$ : num 10
#> ..$ : num 10
#> ..$ : num 10
#> $ id : int 1 2 3
str(unnest(xxx, s(stack = "id", s("a/b", process = "as_is"))))
#> 'data.frame': 3 obs. of 2 variables:
#> $ a.b:List of 3
#> ..$ :List of 3
#> .. ..$ x: num 1
#> .. ..$ y: int 1 2
#> .. ..$ z: num 10
#> ..$ :List of 3
#> .. ..$ x: num 1
#> .. ..$ y: int 1 2
#> .. ..$ z: num 10
#> ..$ :List of 3
#> .. ..$ x: num 1
#> .. ..$ y: int 1 2
#> .. ..$ z: num 10
#> $ id : int 1 2 3
## processing paste
str(unnest(x, s("a/b/y", process = "paste")))
#> 'data.frame': 1 obs. of 1 variable:
#> $ a.b.y: chr "1,2"
str(unnest(xxx, s(stack = TRUE, s("a/b/", process = "paste"))))
#> 'data.frame': 3 obs. of 3 variables:
#> $ a.b.x: chr "1" "1" "1"
#> $ a.b.y: chr "1,2" "1,2" "1,2"
#> $ a.b.z: chr "10" "10" "10"
str(unnest(xxx, s(stack = TRUE, s("a/b", process = "paste"))))
#> 'data.frame': 3 obs. of 1 variable:
#> $ a.b: chr "1,1:2,10" "1,1:2,10" "1,1:2,10"
## default
unnest(x, s("a/b/c/", s("b", default = 100)))
#> data frame with 0 columns and 0 rows
unnest(x, s("a/b/c/", stack = "ix", s("b", default = 100)))
#> data frame with 0 columns and 0 rows