# summary functions, e.g. describe df.points.describe()
# It's type-aware, its output changes based on the data type of the input
# mean df.points.mean()
# to see a list of unique values: unique df["taster_name"].unique()
# to see a list of unique values and how often they occur in the dataset df["taster_name"].value_counts()
# Maps # suppose that we wanted to remean the scores the wines received to 0 review_points_mean = df.points.mean() df.points.map(lambda p: p - review_points_mean)
# `map` processes Series by single value # `apply` processes DataFrame by row # Not that `map()` and `apply()` return new, transformed Series and DataFrame # They don't modify the original data they're called on.
# All of the standard Python operators (`>, <, ==`, and so on) work in this manner review_points_mean = df.points.mean() df.points - review_points_mean
# change index names and/or column names # e.g. change the points column to score df.rename(columns={"points": "score"})
# change the index df.rename(index={0: "firstEntry", 1: "secondEntry"})
# Both the row index and the column index can have their own `name` attribute # use `rename_axis()` to change these names df.rename_axis("wines", axis="rows").rename_axis("fields", axis="columns")
# combine data from different place: concat, join, merge