import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeRegressor
import numpy as np
import pandas as pd
df = pd.DataFrame(dict(
x=np.concatenate(
(np.linspace(0, 5, 10),
np.linspace(10, 15, 10),
np.linspace(30, 39, 20),
np.linspace(40, 49, 10),
np.linspace(50, 70, 10)))
))
# generate some data for the example
y1 = np.random.uniform(10,15,10)
y2 = np.random.uniform(20,30,10)
y3 = np.random.uniform(0,5,20)
y4 = np.random.uniform(30,40,10)
y5 = np.random.uniform(13,17,10)
y = np.concatenate((y1,y2,y3,y4,y5))
fig = plt.Figure()
sns.scatterplot(x="x", y=y, data=df, ax=fig.gca())
fig
# a tree with depth = 1
tree = DecisionTreeRegressor(max_depth=1)
tree.fit(df, y)
# another tree with depth = 3
tree2 = DecisionTreeRegressor(max_depth=3)
tree2.fit(df, y)
# another tree with depth = 8
tree3 = DecisionTreeRegressor(max_depth=8)
tree3.fit(df, y)
fig, axes = plt.subplots(1, 3, figsize=(14,5))
for ax in axes:
sns.scatterplot(x="x", y=y, data=df, ax=ax)
# to avoid matplotlib creating a false "slope" to connect points further away
x = np.linspace(df.x.min(), df.x.max(), 1000)
sns.lineplot(x=x, y=tree.predict(x[:, np.newaxis]),
color='r', ax = axes[0],
label="tree with depth=1").set_ylabel("y_true")
sns.lineplot(x=x, y=tree2.predict(x[:, np.newaxis]),
color='b', ax = axes[1],
label="tree with depth=3").set_ylabel("y_true")
sns.lineplot(x=x, y=tree3.predict(x[:, np.newaxis]),
color='g', ax = axes[2],
label="tree with depth=8").set_ylabel("y_true")
Text(0, 0.5, 'y_true')
df = pd.DataFrame(dict(
x=np.concatenate(
(np.linspace(0, 5, 10),
np.linspace(10, 15, 10),
np.linspace(30, 39, 20),
np.linspace(40, 49, 10),
np.linspace(50, 70, 10)))
))
# generate some data for the example
y1 = np.random.uniform(10,12,10)
y2 = np.random.uniform(20,25,10)
y3 = np.random.uniform(0,5,20)
y4 = np.random.uniform(30,32,10)
y5 = np.random.uniform(13,17,10)
y = np.concatenate((y1,y2,y3,y4,y5))
fig = plt.Figure()
sns.scatterplot(x="x", y=y, data=df, ax=fig.gca())
fig
# a tree with depth = 1
tree = DecisionTreeRegressor(max_depth=1)
tree.fit(df, y)
# another tree with depth = 2
tree2 = DecisionTreeRegressor(max_depth=2)
tree2.fit(df, y)
DecisionTreeRegressor(max_depth=2)
fig = plt.Figure()
# to avoid matplotlib creating a false "slope" to connect points further away
x = np.linspace(df.x.min(), df.x.max(), 1000)
sns.lineplot(x, y=tree.predict(x[:, np.newaxis]),
color='r', ax=fig.gca(),
label="tree with depth=1").set_ylabel("y_true")
fig
/Users/lucbertin/.pyenv/versions/3.8.6/envs/base/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
sns.lineplot(x, y=tree2.predict(x[:, np.newaxis]),
color='g', ax=fig.gca(),
label="tree with depth=2").set_ylabel("y_true")
fig
/Users/lucbertin/.pyenv/versions/3.8.6/envs/base/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
fig = plt.Figure()
sns.scatterplot(x="x", y=y, data=df, ax=fig.gca())
fig
def create_tree_graph(model, df):
from six import StringIO
import pydotplus
from sklearn.tree import export_graphviz
dot_data = StringIO()
export_graphviz(model, out_file=dot_data,
filled=True, rounded=True,
special_characters=True,
feature_names=df.columns)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
return graph.create_png()
tree = DecisionTreeRegressor(max_depth=1)
tree.fit(df,y)
# to avoid matplotlib creating a false "slope" to connect points further away
x = np.linspace(df.x.min(), df.x.max(), 1000)
sns.lineplot(x=x,
y=tree.predict(x[:, np.newaxis]),
color='r', ax=fig.gca())
fig
dd
!pip install imageio
Collecting imageio Downloading imageio-2.9.0-py3-none-any.whl (3.3 MB) |████████████████████████████████| 3.3 MB 2.9 MB/s eta 0:00:01 Requirement already satisfied: numpy in /Users/lucbertin/.pyenv/versions/3.8.6/envs/base/lib/python3.8/site-packages (from imageio) (1.19.4) Requirement already satisfied: pillow in /Users/lucbertin/.pyenv/versions/3.8.6/envs/base/lib/python3.8/site-packages (from imageio) (8.0.1) Installing collected packages: imageio Successfully installed imageio-2.9.0
from matplotlib import gridspec
import imageio
def create_gradient_descent_demo(df, y, lr=0.3, iterations=10):
# to avoid matplotlib creating a false "slope" to connect points further away
x = np.linspace(df.x.min(), df.x.max(), 1000)
xi, yi = df[["x"]].copy(), y.copy()
# Initialize predictions with average
predf = np.ones(len(yi)) * np.mean(yi)
# same predictions on lot of x points
predf_x = np.ones(len(x)) * np.mean(yi)
# Compute residuals
ei = y.reshape(-1,) - predf
# Iterate according to the number of iterations chosen
for i in range(iterations):
# creating the plot
# Every iteration, plot the prediction vs the actual data
# Create 2x2 sub plots
fig, axes = plt.subplots(figsize=(20,8))
gs = gridspec.GridSpec(2, 2)
plt.subplot(gs[0, 0])
plt.title("Iteration " + str(i))
plt.scatter(xi, yi)
plt.plot(x, predf_x, c='b', label="Previous predictions")
# Fit the a stump (max_depth = 1) on xi, ei
tree = DecisionTreeRegressor(max_depth=1).fit(xi, ei)
# Final predictions
pred_new = predf + lr * tree.predict(xi)
# Final predictions on lot of x points
pred_new_x = predf_x + lr * tree.predict(x[:, np.newaxis])
# plotting
plt.plot(x, pred_new_x, c='r', label='Overall predictions (learning rate)')
# previous residuals, on which the tree is fit
plt.subplot(gs[1, 0])
plt.scatter(df.x, ei, c='g')
plt.plot(x, tree.predict(x[:, np.newaxis]), c='g', label='Single tree predictions on residuals')
plt.legend()
# Compute the new residuals,
ei = y.reshape(-1,) - pred_new
plt.legend()
axis = plt.subplot(gs[:, 1])
plt.imshow(imageio.imread(create_tree_graph(tree, df)))
axis.xaxis.set_visible(False) # hide the x axis
axis.yaxis.set_visible(False) # hide the y axis
#plt.savefig('bonus_ressources_gradient_boosting/iterations/imgs_iteration{}.png'.format(str(i).zfill(2)))
plt.show()
# update
predf = pred_new
predf_x = pred_new_x
create_gradient_descent_demo(df, y, lr=0.3, iterations=5)
create_gradient_descent_demo(df, y, lr=1, iterations=5)