This image shows the "Load and Select HDFS Data" paragraph with the following code: 

%spark
val df1 = spark.read.
    format("csv").
    option("header", "true").
    option("inferSchema", "true").
    load("hdfs:///user/demo/*.csv").
    na.drop()

val df2 = df1.select("trip_distance", "passenger_count", "Trip_duration_minutes", "tip_percentage", "total_amount", "Trip_ave_speed")