Skip to content

Commit 56fbb46

Browse files
authored
avoid using @pipeline macro in tutorials and end-to end examples (#180)
* avoid using deprecated @pipeline macro * avoid deprecated @pipeline macro * avoid deprecated @pipline syntax in Ex-Wine * remove deprecated @pipeline macro from ISL tutorials
1 parent 8211a69 commit 56fbb46

File tree

14 files changed

+115
-79
lines changed

14 files changed

+115
-79
lines changed

_literate/A-composing-models/Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
121121

122122
[[deps.Distributions]]
123123
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
124-
git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
124+
git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
125125
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
126-
version = "0.25.37"
126+
version = "0.25.38"
127127

128128
[[deps.DocStringExtensions]]
129129
deps = ["LibGit2"]

_literate/A-composing-models/tutorial.jl

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,27 +29,36 @@ scitype(X.age)
2929

3030
# A typical workflow for such data is to one-hot-encode the categorical data and then apply some regression model on the data.
3131
# Let's say that we want to apply the following steps:
32-
# 1. standardize the target variable (`:height`)
33-
# 1. one hot encode the categorical data
34-
# 1. train a KNN regression model
32+
# 1. One hot encode the categorical features in `X`
33+
# 1. Standardize the target variable (`:height`)
34+
# 1. Train a KNN regression model on the one hot encoded data and the Standardized target.
3535

36-
# The `@pipeline` macro helps you define such a simple (non-branching) pipeline of steps to be applied in order:
36+
# The `Pipeline` constructor helps you define such a simple (non-branching) pipeline of steps to be applied in order:
3737

38-
pipe = @pipeline(
39-
X -> coerce(X, :age=>Continuous),
40-
OneHotEncoder(),
41-
KNNRegressor(K=3),
42-
target = UnivariateStandardizer());
38+
pipe = Pipeline(
39+
coercer = X -> coerce(X, :age=>Continuous),
40+
one_hot_encoder = OneHotEncoder(),
41+
transformed_target_model = TransformedTargetModel(
42+
model = KNNRegressor(K=3);
43+
target=UnivariateStandardizer()
44+
)
45+
)
4346

4447
# Note the coercion of the `:age` variable to Continuous since `KNNRegressor` expects `Continuous` input.
45-
# Note also the `target` keyword where you can specify a transformation of the target variable.
48+
# Note also the `TransformedTargetModel` which allows one to learn a transformation (in this case Standardization) of the
49+
# target variable to be passed to the `KNNRegressor`.
4650

4751
# Hyperparameters of this pipeline can be accessed (and set) using dot syntax:
4852

49-
pipe.knn_regressor.K = 2
53+
pipe.transformed_target_model.model.K = 2
5054
pipe.one_hot_encoder.drop_last = true;
5155

5256
# Evaluation for a pipe can be done with the `evaluate!` method; implicitly it will construct machines that will contain the fitted parameters etc:
5357

54-
evaluate(pipe, X, height, resampling=Holdout(),
55-
measure=rms) |> pprint
58+
evaluate(
59+
pipe,
60+
X,
61+
height,
62+
resampling=Holdout(),
63+
measure=rms
64+
) |> pprint

_literate/EX-horse/Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
157157

158158
[[deps.Distributions]]
159159
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
160-
git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
160+
git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
161161
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
162-
version = "0.25.37"
162+
version = "0.25.38"
163163

164164
[[deps.DocStringExtensions]]
165165
deps = ["LibGit2"]

_literate/EX-horse/tutorial.jl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,17 @@ ytrain = y[train];
147147

148148
# And let's define a pipeline corresponding to the operations above
149149

150-
SimplePipe = @pipeline(OneHotEncoder(),
151-
MultinomialClassifier(), prediction_type=:probabilistic)
150+
SimplePipe = Pipeline(
151+
OneHotEncoder(),
152+
MultinomialClassifier()
153+
prediction_type=:probabilistic
154+
)
152155
mach = machine(SimplePipe, Xtrain, ytrain)
153-
res = evaluate!(mach; resampling=Holdout(fraction_train=0.9),
154-
measure=cross_entropy)
156+
res = evaluate!(
157+
mach;
158+
resampling=Holdout(fraction_train=0.9),
159+
measure=cross_entropy
160+
)
155161
round(res.measurement[1], sigdigits=3)
156162

157163
# This is the cross entropy on some held-out 10% of the training set.

_literate/EX-wine/Manifest.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ version = "3.5.0+3"
2626

2727
[[deps.ArrayInterface]]
2828
deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
29-
git-tree-sha1 = "d0d82f1c0b651173a4f839d84f662d03f3417740"
29+
git-tree-sha1 = "ffc6588e17bcfcaa79dfa5b4f417025e755f83fc"
3030
uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
31-
version = "4.0.0"
31+
version = "4.0.1"
3232

3333
[[deps.Artifacts]]
3434
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -181,9 +181,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
181181

182182
[[deps.Distributions]]
183183
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
184-
git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
184+
git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
185185
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
186-
version = "0.25.37"
186+
version = "0.25.38"
187187

188188
[[deps.DocStringExtensions]]
189189
deps = ["LibGit2"]

_literate/EX-wine/tutorial.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ describe(df)
3939
# the target is the `Class` column, everything else is a feature; we can
4040
# dissociate the two using the `unpack` function:
4141

42-
y, X = unpack(df, ==(:Class), colname->true);
42+
y, X = unpack(df, ==(:Class));
4343

4444
# ### Setting the scientific type
4545
#
@@ -93,8 +93,10 @@ describe(Xc, :mean, :std)
9393
KNNC = @load KNNClassifier
9494
MNC = @load MultinomialClassifier pkg=MLJLinearModels;
9595

96-
KnnPipe = @pipeline(Standardizer(), KNNC())
97-
MnPipe = @pipeline(Standardizer(), MNC());
96+
KnnPipe = Standardizer |> KNNC
97+
MnPipe = Standardizer |> MNC
98+
99+
# Note the `|>` syntax, which is syntactic sugar for creating a linear `Pipeline` from components models.
98100

99101
# We can now fit this on a train split of the data setting aside 20% of the data for eventual testing.
100102

@@ -136,7 +138,7 @@ println(rpad("MNC mcr:", 10), round(mcr_m, sigdigits=3))
136138
# One way to get intuition for why the dataset is so easy to classify is to project it onto a 2D space using the PCA and display the two classes to see if they are well separated; we use the arrow-syntax here (if you're on Julia <= 1.2, use the commented-out lines as you won't be able to use the arrow-syntax)
137139

138140
PCA = @load PCA
139-
pca_pipe = @pipeline(Standardizer(), PCA(maxoutdim=2))
141+
pca_pipe = Standardizer() |> PCA(maxoutdim=2)
140142
pca = machine(pca_pipe, Xtrain)
141143
fit!(pca)
142144
W = transform(pca, Xtrain)

_literate/ISL-lab-10/Manifest.toml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
163163

164164
[[deps.Distributions]]
165165
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
166-
git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
166+
git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
167167
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
168-
version = "0.25.37"
168+
version = "0.25.38"
169169

170170
[[deps.DocStringExtensions]]
171171
deps = ["LibGit2"]
@@ -184,9 +184,9 @@ uuid = "792122b4-ca99-40de-a6bc-6742525f08b6"
184184
version = "0.3.0"
185185

186186
[[deps.ExprTools]]
187-
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
187+
git-tree-sha1 = "24565044e60bc48a7562e75bcf14f084901dc0b6"
188188
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
189-
version = "0.1.6"
189+
version = "0.1.7"
190190

191191
[[deps.FileIO]]
192192
deps = ["Pkg", "Requires", "UUIDs"]
@@ -273,9 +273,9 @@ version = "1.0.0"
273273

274274
[[deps.JLLWrappers]]
275275
deps = ["Preferences"]
276-
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
276+
git-tree-sha1 = "22df5b96feef82434b07327e2d3c770a9b21e023"
277277
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
278-
version = "1.3.0"
278+
version = "1.4.0"
279279

280280
[[deps.JLSO]]
281281
deps = ["BSON", "CodecZlib", "FilePathsBase", "Memento", "Pkg", "Serialization"]
@@ -596,9 +596,9 @@ version = "1.2.2"
596596

597597
[[deps.Requires]]
598598
deps = ["UUIDs"]
599-
git-tree-sha1 = "8f82019e525f4d5c669692772a6f4b0a58b06a6a"
599+
git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
600600
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
601-
version = "1.2.0"
601+
version = "1.3.0"
602602

603603
[[deps.Rmath]]
604604
deps = ["Random", "Rmath_jll"]
@@ -666,9 +666,9 @@ version = "1.0.0"
666666

667667
[[deps.StaticArrays]]
668668
deps = ["LinearAlgebra", "Random", "Statistics"]
669-
git-tree-sha1 = "88a559da57529581472320892576a486fa2377b9"
669+
git-tree-sha1 = "2ae4fe21e97cd13efd857462c1869b73c9f61be3"
670670
uuid = "90137ffa-7385-5640-81b9-e52037218182"
671-
version = "1.3.1"
671+
version = "1.3.2"
672672

673673
[[deps.StatisticalTraits]]
674674
deps = ["ScientificTypesBase"]

_literate/ISL-lab-10/tutorial.jl

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,21 @@ cumsum(r.principalvars ./ r.tvar)
5454

5555
data = dataset("ISLR", "OJ")
5656

57-
X = select(data, [:PriceCH, :PriceMM, :DiscCH, :DiscMM, :SalePriceMM,
58-
:SalePriceCH, :PriceDiff, :PctDiscMM, :PctDiscCH]);
57+
feature_names = [
58+
:PriceCH, :PriceMM, :DiscCH, :DiscMM, :SalePriceMM, :SalePriceCH,
59+
:PriceDiff, :PctDiscMM, :PctDiscCH
60+
]
61+
62+
X = select(data, feature_names);
5963

6064
# ### PCA pipeline
6165

6266
Random.seed!(1515)
6367

64-
SPCA = @pipeline(Standardizer(),
65-
PCA(pratio=1-1e-4))
68+
SPCA = Pipeline(
69+
Standardizer(),
70+
PCA(pratio=1-1e-4)
71+
)
6672

6773
spca = machine(SPCA, X)
6874
fit!(spca)
@@ -98,9 +104,11 @@ savefig(joinpath(@OUTPUT, "ISL-lab-10-g1.svg")) # hide
98104
Random.seed!(1515)
99105

100106
KMeans = @load KMeans pkg=Clustering
101-
SPCA2 = @pipeline(Standardizer(),
102-
PCA(),
103-
KMeans(k=3))
107+
SPCA2 = Pipeline(
108+
Standardizer(),
109+
PCA(),
110+
KMeans(k=3)
111+
)
104112

105113
spca2 = machine(SPCA2, X)
106114
fit!(spca2)

_literate/ISL-lab-5/Manifest.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
1414

1515
[[deps.ArrayInterface]]
1616
deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
17-
git-tree-sha1 = "d0d82f1c0b651173a4f839d84f662d03f3417740"
17+
git-tree-sha1 = "ffc6588e17bcfcaa79dfa5b4f417025e755f83fc"
1818
uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
19-
version = "4.0.0"
19+
version = "4.0.1"
2020

2121
[[deps.Artifacts]]
2222
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -169,9 +169,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
169169

170170
[[deps.Distributions]]
171171
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
172-
git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
172+
git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
173173
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
174-
version = "0.25.37"
174+
version = "0.25.38"
175175

176176
[[deps.DocStringExtensions]]
177177
deps = ["LibGit2"]

_literate/ISL-lab-5/tutorial.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,10 @@ Xhp = DataFrame(hp1=hp, hp2=hp.^2, hp3=hp.^3);
7171

7272
# Now we can write a simple pipeline where the first step selects the features we want (and with it the degree of the polynomial) and the second is the linear regressor:
7373

74-
LinMod = @pipeline(FeatureSelector(features=[:hp1]),
75-
LR());
74+
LinMod = Pipeline(
75+
FeatureSelector(features=[:hp1]),
76+
LR()
77+
);
7678

7779
# Then we can instantiate and fit 3 models where we specify the features each time:
7880

_literate/ISL-lab-6b/Manifest.toml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
1414

1515
[[deps.ArrayInterface]]
1616
deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
17-
git-tree-sha1 = "d0d82f1c0b651173a4f839d84f662d03f3417740"
17+
git-tree-sha1 = "ffc6588e17bcfcaa79dfa5b4f417025e755f83fc"
1818
uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
19-
version = "4.0.0"
19+
version = "4.0.1"
2020

2121
[[deps.Artifacts]]
2222
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -169,9 +169,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
169169

170170
[[deps.Distributions]]
171171
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
172-
git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
172+
git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
173173
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
174-
version = "0.25.37"
174+
version = "0.25.38"
175175

176176
[[deps.DocStringExtensions]]
177177
deps = ["LibGit2"]
@@ -190,9 +190,9 @@ uuid = "792122b4-ca99-40de-a6bc-6742525f08b6"
190190
version = "0.3.0"
191191

192192
[[deps.ExprTools]]
193-
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
193+
git-tree-sha1 = "24565044e60bc48a7562e75bcf14f084901dc0b6"
194194
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
195-
version = "0.1.6"
195+
version = "0.1.7"
196196

197197
[[deps.FileIO]]
198198
deps = ["Pkg", "Requires", "UUIDs"]
@@ -214,9 +214,9 @@ version = "0.12.7"
214214

215215
[[deps.FiniteDiff]]
216216
deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
217-
git-tree-sha1 = "b374f22e8565a01d6e5db1e8640c3c5e3fe7d564"
217+
git-tree-sha1 = "6eae72e9943d8992d14359c32aed5f892bda1569"
218218
uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
219-
version = "2.9.0"
219+
version = "2.10.0"
220220

221221
[[deps.FixedPointNumbers]]
222222
deps = ["Statistics"]
@@ -302,9 +302,9 @@ version = "1.0.0"
302302

303303
[[deps.JLLWrappers]]
304304
deps = ["Preferences"]
305-
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
305+
git-tree-sha1 = "22df5b96feef82434b07327e2d3c770a9b21e023"
306306
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
307-
version = "1.3.0"
307+
version = "1.4.0"
308308

309309
[[deps.JLSO]]
310310
deps = ["BSON", "CodecZlib", "FilePathsBase", "Memento", "Pkg", "Serialization"]
@@ -642,9 +642,9 @@ version = "1.2.2"
642642

643643
[[deps.Requires]]
644644
deps = ["UUIDs"]
645-
git-tree-sha1 = "8f82019e525f4d5c669692772a6f4b0a58b06a6a"
645+
git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
646646
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
647-
version = "1.2.0"
647+
version = "1.3.0"
648648

649649
[[deps.Rmath]]
650650
deps = ["Random", "Rmath_jll"]
@@ -718,9 +718,9 @@ version = "0.5.1"
718718

719719
[[deps.StaticArrays]]
720720
deps = ["LinearAlgebra", "Random", "Statistics"]
721-
git-tree-sha1 = "88a559da57529581472320892576a486fa2377b9"
721+
git-tree-sha1 = "2ae4fe21e97cd13efd857462c1869b73c9f61be3"
722722
uuid = "90137ffa-7385-5640-81b9-e52037218182"
723-
version = "1.3.1"
723+
version = "1.3.2"
724724

725725
[[deps.StatisticalTraits]]
726726
deps = ["ScientificTypesBase"]

_literate/ISL-lab-6b/tutorial.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ scitype(Xc)
9797
#
9898
# Let's first fit a simple pipeline with a standardizer, a one-hot-encoder and a basic linear regression:
9999

100-
model = @pipeline(Standardizer(),
101-
OneHotEncoder(),
102-
LinearRegressor())
100+
model = Pipeline(Standardizer(), OneHotEncoder(), LinearRegressor())
103101

104102
pipe = machine(model, Xc, y)
105103
fit!(pipe, rows=train)

_literate/ISL-lab-8/Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
145145

146146
[[deps.Distributions]]
147147
deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
148-
git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
148+
git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
149149
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
150-
version = "0.25.37"
150+
version = "0.25.38"
151151

152152
[[deps.DocStringExtensions]]
153153
deps = ["LibGit2"]

0 commit comments

Comments
 (0)