def func(df, args):
    transformed_df = transform(df, args)
    return transformed_df


transformed_df = (
    df.pipe(func, args)
    .pipe(some_other_func, other_args)
    .pipe(yet_another_func, yet_more_args)
)


FEATURES = ["column 1", "column 2", ...]


def drop_non_features(df):
    return df[FEATURES]


class NormalizeNumerical:
    def __call__(self, df, is_train):
        if is_train:
            self.scaler = Normalizer()
            self.scaler.fit(df[NUMERICAL_COLUMNS])

        df = df.copy()
        df[NUMERICAL_COLUMNS] = self.scaler.transform(df[NUMERICAL_COLUMNS])
        return df


normalize_numerical = NormalizeNumerical()


class StandardizeNumerical:
    def __call__(self, df, is_train):
        if is_train:
            self.scaler = StandardScaler()
            self.scaler.fit(df[NUMERICAL_COLUMNS])

        df = df.copy()
        df[NUMERICAL_COLUMNS] = self.scaler.transform(df[NUMERICAL_COLUMNS])
        return df


standardize_numerical = StandardizeNumerical()


def preprocess(df, is_train):
    return (
        df.pipe(drop_non_features)
        # .pipe(normalize_numerical, is_train=is_train)
        .pipe(standardize_numerical, is_train=is_train)
    )


model = Model() # Using whatever model we want
model.fit(X_train.pipe(preprocess, is_train=True), y_train)


model.score(X_test.pipe(preprocess, is_train=False), y_test)


y_unlabeled = model.predict(X_unlabeled.pipe(preprocess, is_train=False))


def preprocess(df, is_train):
    return (
        df.pipe(drop_non_features)
        # .pipe(normalize_numerical, is_train=is_train)
        .pipe(standardize_numerical, is_train=is_train)
        # .pipe(replace_missing_numerical, fill=-999)
        .pipe(impute_missing_numerical, is_train=is_train)
        .pipe(onehot_encode_categorical, is_train=is_train)
    )


def preprocess(df, is_train):
    tdf = df.copy()
    tdf = drop_non_features(tdf)
    # tdf = normalize_numerical(tdf, is_train=is_train)
    tdf = standardize_numerical(tdf, is_train=is_train)
    # tdf = replace_missing_numerical(tdf, fill=-999)
    tdf = impute_missing_numerical(tdf, is_train=is_train)
    tdf = onehot_encode_categorical(tdf, is_train=is_train)
    return tdf


def preprocess(df, is_train):
    return onehot_encode_categorical(
        impute_missing_numerical(
            standardize_numerical(drop_non_features(df), is_train=is_train),
            is_train=is_train,
        ),
        is_train=is_train,
    )

Usefulness of `.pipe()`¶

Challenges in working on an ML project¶

Definition¶

Usage¶

Practical Example¶

Stateful functions for `.pipe()`¶

Fitting, Testing, and Predicting¶

Fit¶

Test¶

Predict¶

Example from our notebook for the Datathon¶

Benefits of using `.pipe()`¶

How is this different from just using functions?¶

Usefulness of .pipe()¶

Challenges in working on an ML project¶

Definition¶

Usage¶

Practical Example¶

Stateful functions for .pipe()¶

Fitting, Testing, and Predicting¶

Fit¶

Test¶

Predict¶

Example from our notebook for the Datathon¶

Benefits of using .pipe()¶

How is this different from just using functions?¶

Usefulness of `.pipe()`¶

Stateful functions for `.pipe()`¶

Benefits of using `.pipe()`¶