In [None]:
import os
from pathlib import Path

import pandas as pd
from alibi_detect.cd import TabularDrift
from joblib import load
from alibi_detect.saving import save_detector

### Load model 

In [None]:
proj_path = Path(os.getcwd()).parent.absolute()

model_path = proj_path/'models'/'clf-model.joblib'
model = load(model_path)

### Load train and test data

In [None]:
X_test = pd.read_pickle(proj_path/'data'/'processed'/'X_test.pkl')
X_train = pd.read_pickle(proj_path/'data'/'processed'/'X_train.pkl')

In [None]:
X_train.head()

In [None]:
feat_cols = X_train.columns.tolist()
feat_cols

### Load data from new geography (Germany) that the model has never seen

In [None]:
df_germany = pd.read_csv(proj_path/'data'/'more_data'/'Churn_Modelling_Germany.csv')
df_germany.shape

In [None]:
X_germany = df_germany[feat_cols]

### Train drift detection model

In [None]:
# need preprocessor from sklearn pipeline 
# in order to process the data the exact same way as it was during training
preprocessor = model[:-1]

In [None]:
categories_per_feature = {i:None for i,k in enumerate(feat_cols) if k.startswith('cat__')}

cd = TabularDrift(X_train, 
                  p_val=.05, 
                  preprocess_fn=preprocessor.transform,
                  categories_per_feature=categories_per_feature)

### Will there be drift if data was unseen by the model (test data), but it comes from the same geographies (France and Spain)?

In [None]:
preds = cd.predict(X_test)
labels = ['No!', 'Yes!']
print('Drift? {}'.format(labels[preds['data']['is_drift']]))

In [None]:
preds

In [None]:
preds['data']['p_val']

### Will there be drift if data comes from a different geography (Germany) when what the model was trained on?

In [None]:
preds = cd.predict(X_germany)
labels = ['No!', 'Yes!']
print('Drift? {}'.format(labels[preds['data']['is_drift']]))

In [None]:
preds

In [None]:
p_val = preds['data']['p_val']
p_val

In [None]:
import datetime
now = datetime.datetime.now()

df_p_val = pd.DataFrame([[now] + p_val.tolist()], columns=['time'] + feat_cols)
df_p_val

### Save drift detector

In [None]:
detector_path = proj_path/'models'/'drift_detector'
save_detector(cd, detector_path)