3.3.1 Select and Order Data

OREdplyr functions for selecting and ordering data in columns and rows of an ore.frame object.

Table 3-2 Selecting and Ordering Columns and Rows

Function Description

arrange

arrange_

Orders rows by the specified columns.

desc

Sorts an ore.number, ore.factor, or ore.character object in descending order

distinct

distinct_

Selects unique rows from an input ore.frame object over the specified columns.

filter

filter_

Filters rows by matching the specified condition.

mutate

mutate_

Adds new columns.

rename

rename_

Renames the specified columns and keeps all columns.

select

select_

Selects only the specified columns.

slice

slice_

Selects rows by position; ignores the grouping of the input ordered ore.frame object.

tranmute

tranmute_

Adds new columns and drops the existing columns.

3.3.1.1 Examples of Selecting Columns

Examples of the select and rename functions of the OREdplyr package.

Example 3-65 Selecting Columns

The following examples select columns from the IRIS ore.frame object that is created by using the ore.push function on the iris data.frame objects.

IRIS <- ore.push(iris)
# Select the specified column
names(select(IRIS, Petal.Length))
names(select(IRIS, petal_length = Petal.Length))

# Drop the specified column
names(select(IRIS, -Petal.Length))

# rename() keeps all variables
names(rename(IRIS, petal_length = Petal.Length))

Listing for This Example

R> IRIS <- ore.push(iris)
R> # Select the specified column
R> names(select(IRIS, Petal.Length))
[1] "Petal.Length"
R> names(select(IRIS, petal_length = Petal.Length))
[1] "petal_length"
R>
R> # Drop the specified column
R> names(select(IRIS, -Petal.Length))
[1] "Sepal.Length" "Sepal.Width"  "Petal.Width"  "Species" 
R>
R> # rename() keeps all variables
R> names(rename(IRIS, petal_length = Petal.Length))
[1] "Sepal.Length" "Sepal.Width"  "petal_length" "Petal.Width"  "Species"

3.3.1.2 Examples of Programming with select_

Examples of the select_ function of the OREdplyr package.

Example 3-66 Programming with select

This example uses the select_ function to select columns from the IRIS ore.frame object that is created by using the ore.push function on the iris data.frame object.

IRIS <- ore.push(iris)
# Use ~, double quote, or quote function to specify the column to select
head(select_(IRIS, ~Petal.Length))
head(select_(IRIS, "Petal.Length"))
head(select_(IRIS, quote(-Petal.Length), quote(-Petal.Width)))
head(select_(IRIS, .dots = list(quote(-Petal.Length), quote(-Petal.Width))))

Listing for This Example

R> IRIS <- ore.push(iris)
R> # Use ~, double quote, or quote function to specify the column to select
R> head(select_(IRIS, ~Petal.Length))
  Petal.Length
1          1.4
2          1.4
3          1.3
4          1.5
5          1.4
6          1.7
R> head(select_(IRIS, "Petal.Length"))
  Petal.Length
1          1.4
2          1.4
3          1.3
4          1.5
5          1.4
6          1.7
R> head(select_(IRIS, quote(-Petal.Length), quote(-Petal.Width)))
  Sepal.Length Sepal.Width Species
1          5.1         3.5  setosa
2          4.9         3.0  setosa
3          4.7         3.2  setosa
4          4.6         3.1  setosa
5          5.0         3.6  setosa
6          5.4         3.9  setosa
R> head(select_(IRIS, .dots = list(quote(-Petal.Length), quote(-Petal.Width))))
  Sepal.Length Sepal.Width Species
1          5.1         3.5  setosa
2          4.9         3.0  setosa
3          4.7         3.2  setosa
4          4.6         3.1  setosa
5          5.0         3.6  setosa
6          5.4         3.9  setosa

3.3.1.3 Examples of Selecting Distinct Columns

Examples of the distinct and arrange functions of the OREdplyr package.

Example 3-67 Selecting Distinct Columns

df <- data.frame(
  x = sample(10, 100, rep = TRUE),
  y = sample(10, 100, rep = TRUE)
)
DF <- ore.push(df)
nrow(DF)
nrow(distinct(DF))
arrange(distinct(DF, x), x)
arrange(distinct(DF, y), y)

# Use distinct on computed variables
arrange(distinct(DF, diff = abs(x - y)), diff)

Listing for This Example

R> df <- data.frame(
+   x = sample(10, 100, rep = TRUE),
+   y = sample(10, 100, rep = TRUE)
+ )
R> DF <- ore.push(df)
R> nrow(DF)
[1] 100
R> nrow(distinct(DF))
[1] 66
R> arrange(distinct(DF, x), x)
    x
1   1
2   2
3   3
4   4
5   5
6   6
7   7
8   8
9   9
10 10
R> arrange(distinct(DF, y), y)
    y
1   1
2   2
3   3
4   4
5   5
6   6
7   7
8   8
9   9
R> 
R> # Use distinct on computed variables
R> arrange(distinct(DF, diff = abs(x - y)), diff)
   diff
1     0
2     1
3     2
4     3
5     4
6     5
7     6
8     7
9     8
10    9

3.3.1.4 Examples of Selecting Rows by Position

Examples of the slice and filter functions of the OREdplyr package.

Example 3-68 Selecting Rows by Position

MTCARS <- ore.push(mtcars)
# Display the names of the rows in MTCARS
rownames(MTCARS)
# Select the first row
slice(MTCARS, 1L)

# Arrange the rows by horsepower, then select the first row by position
MTCARS <- arrange(MTCARS, hp)
slice(MTCARS, 1L)

by_cyl <- group_by(MTCARS, cyl)
# Grouping is ignored by slice.
slice(by_cyl, 1:2)
# Use filter and row_number to obtain slices per group.
filter(by_cyl, row_number(hp) < 3L) 

Listing for This Example

R> MTCARS <- ore.push(mtcars)
R> # Display the names of the rows in MTCARS
R> rownames(MTCARS)
 [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"          "Hornet 4 Drive"      "Hornet Sportabout"  
 [6] "Valiant"             "Duster 360"          "Merc 240D"           "Merc 230"            "Merc 280"           
[11] "Merc 280C"           "Merc 450SE"          "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood" 
[16] "Lincoln Continental" "Chrysler Imperial"   "Fiat 128"            "Honda Civic"         "Toyota Corolla"     
[21] "Toyota Corona"       "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"          "Pontiac Firebird"   
[26] "Fiat X1-9"           "Porsche 914-2"       "Lotus Europa"        "Ford Pantera L"      "Ferrari Dino"       
[31] "Maserati Bora"       "Volvo 142E"  
R> # Select the first row
R> slice(MTCARS, 1L)
          mpg cyl disp  hp drat   wt  qsec vs am gear carb
Mazda RX4  21   6  160 110  3.9 2.62 16.46  0  1    4    4
R>
R> # Arrange the rows by horsepower, then select the first row by position
R> MTCARS <- arrange(MTCARS, hp)
R> slice(MTCARS, 1L)
   mpg cyl disp hp drat    wt  qsec vs am gear carb
1 30.4   4 75.7 52 4.93 1.615 18.52  1  1    4    2
R>
R> by_cyl <- group_by(MTCARS, cyl)
R> # Grouping is ignored by slice
R> slice(by_cyl, 1:2)
   mpg cyl  disp hp drat    wt  qsec vs am gear carb
1 30.4   4  75.7 52 4.93 1.615 18.52  1  1    4    2
2 24.4   4 146.7 62 3.69 3.190 20.00  1  0    4    2
Warning message:
In slice_.ore.frame(.data, .dots = .ore.dplyr.exprall(..., env = parent.frame())) :
  grouping is ignored
R> # Use filter and row_number to obtain slices per group
R> filter(by_cyl, row_number(hp) < 3L)
   mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
2 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
3 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
4 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
5 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
6 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2

3.3.1.5 Examples of Arranging Columns

Examples of the arrange and desc functions of the OREdplyr package.

Example 3-69 Arranging Columns

This example arranges columns from the ore.frame object MTCARS that is created by using the ore.push function on the mtcars data.frame object. The second arrange() invocation calls the desc() function to arrange the values in descending order.

MTCARS <- ore.push(mtcars)
head(arrange(mtcars, cyl, disp))
head(arrange(MTCARS, desc(disp)))

Listing for This Example

R> MTCARS <- ore.push(mtcars)
R> head(arrange(MTCARS, cyl, disp))
   mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
2 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
3 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
4 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
5 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
6 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
R> head(arrange(MTCARS, desc(disp)))
   mpg cyl disp  hp drat    wt  qsec vs am gear carb
1 10.4   8  472 205 2.93 5.250 17.98  0  0    3    4
2 10.4   8  460 215 3.00 5.424 17.82  0  0    3    4
3 14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
4 19.2   8  400 175 3.08 3.845 17.05  0  0    3    2
5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
6 14.3   8  360 245 3.21 3.570 15.84  0  0    3    4

3.3.1.6 Examples of Filtering Columns

Examples of the filter function of the OREdplyr package.

Example 3-70 Filtering Columns

This example filters columns from the MTCARS ore.frame object that is created by using the ore.push function on the mtcars data.frame object.

MTCARS <- ore.push(mtcars)
head(filter(MTCARS, cyl == 8))
# Using multiple criteria
head(filter(MTCARS, cyl < 6 & vs == 1))

# Using multiple arguments is the equivalent to using &
head(filter(MTCARS, cyl < 6, vs == 1))

Listing for This Example

R> MTCARS <- ore.push(mtcars)
R> head(filter(MTCARS, cyl == 8))
   mpg cyl  disp  hp drat   wt  qsec vs am gear carb
1 18.7   8 360.0 175 3.15 3.44 17.02  0  0    3    2
2 14.3   8 360.0 245 3.21 3.57 15.84  0  0    3    4
3 16.4   8 275.8 180 3.07 4.07 17.40  0  0    3    3
4 17.3   8 275.8 180 3.07 3.73 17.60  0  0    3    3
5 15.2   8 275.8 180 3.07 3.78 18.00  0  0    3    3
6 10.4   8 472.0 205 2.93 5.25 17.98  0  0    3    4
R> head(filter(MTCARS, cyl < 6 & vs == 1))
   mpg cyl  disp hp drat    wt  qsec vs am gear carb
1 22.8   4 108.0 93 3.85 2.320 18.61  1  1    4    1
2 24.4   4 146.7 62 3.69 3.190 20.00  1  0    4    2
3 22.8   4 140.8 95 3.92 3.150 22.90  1  0    4    2
4 32.4   4  78.7 66 4.08 2.200 19.47  1  1    4    1
5 30.4   4  75.7 52 4.93 1.615 18.52  1  1    4    2
6 33.9   4  71.1 65 4.22 1.835 19.90  1  1    4    1
R>
R> # Using multiple arguments is the equivalent to using &
R> head(filter(MTCARS, cyl < 6, vs == 1))
   mpg cyl  disp hp drat    wt  qsec vs am gear carb
1 22.8   4 108.0 93 3.85 2.320 18.61  1  1    4    1
2 24.4   4 146.7 62 3.69 3.190 20.00  1  0    4    2
3 22.8   4 140.8 95 3.92 3.150 22.90  1  0    4    2
4 32.4   4  78.7 66 4.08 2.200 19.47  1  1    4    1
5 30.4   4  75.7 52 4.93 1.615 18.52  1  1    4    2
6 33.9   4  71.1 65 4.22 1.835 19.90  1  1    4    1

3.3.1.7 Examples of Mutating Columns

Examples of the mutate and transmute functions of the OREdplyr package.

Example 3-71 Mutating Columns

This example uses the MTCARS ore.frame object that is created by using the ore.push function on the mtcars data.frame object.

The mutate function adds the extra column displ_1 with the value derived from that of column disp. Setting the column to NULL removes the column.

MTCARS <- ore.push(mtcars)
head(mutate(MTCARS, displ_l = disp / 61.0237))
head(transmute(MTCARS, displ_l = disp / 61.0237))
head(mutate(MTCARS, cyl = NULL))
head(mutate(MTCARS, cyl = NULL, hp = NULL, displ_l = disp / 61.0237))

Listing for This Example

R> MTCARS <- ore.push(mtcars)
R> head(mutate(MTCARS, displ_l = disp / 61.0237))
   mpg cyl disp  hp drat    wt  qsec vs am gear carb  displ_l
1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4 2.621932
2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4 2.621932
3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1 1.769804
4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1 4.227866
5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2 5.899347
6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1 3.687092
R> head(transmute(MTCARS, displ_l = disp / 61.0237))
   displ_l
1 2.621932
2 2.621932
3 1.769804
4 4.227866
5 5.899347
6 3.687092
R> head(mutate(mtcars, cyl = NULL))
   mpg disp  hp drat    wt  qsec vs am gear carb
1 21.0  160 110 3.90 2.620 16.46  0  1    4    4
2 21.0  160 110 3.90 2.875 17.02  0  1    4    4
3 22.8  108  93 3.85 2.320 18.61  1  1    4    1
4 21.4  258 110 3.08 3.215 19.44  1  0    3    1
5 18.7  360 175 3.15 3.440 17.02  0  0    3    2
6 18.1  225 105 2.76 3.460 20.22  1  0    3    1
R> head(mutate(mtcars, cyl = NULL, hp = NULL, displ_l = disp / 61.0237))
   mpg disp drat    wt  qsec vs am gear carb  displ_l
1 21.0  160 3.90 2.620 16.46  0  1    4    4 2.621932
2 21.0  160 3.90 2.875 17.02  0  1    4    4 2.621932
3 22.8  108 3.85 2.320 18.61  1  1    4    1 1.769804
4 21.4  258 3.08 3.215 19.44  1  0    3    1 4.227866
5 18.7  360 3.15 3.440 17.02  0  0    3    2 5.899347
6 18.1  225 2.76 3.460 20.22  1  0    3    1 3.687092