This image shows the "Load and Select HDFS Data" paragraph with the following code: %spark val df1 = spark.read. format("csv"). option("header", "true"). option("inferSchema", "true"). load("hdfs:///user/demo/*.csv"). na.drop() val df2 = df1.select("trip_distance", "passenger_count", "Trip_duration_minutes", "tip_percentage", "total_amount", "Trip_ave_speed")