avoid using @pipeline macro in tutorials and end-to end examples (#180)

OkonSamuel · web-flow · commit 56fbb464373c · 2022-01-14T10:27:33.000+01:00
* avoid using deprecated @pipeline macro * avoid deprecated @pipeline macro * avoid deprecated @pipline syntax in Ex-Wine * remove deprecated @pipeline macro from ISL tutorials
diff --git a/_literate/A-composing-models/Manifest.toml b/_literate/A-composing-models/Manifest.toml
@@ -121,9 +121,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[deps.Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
+git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.37"
+version = "0.25.38"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
diff --git a/_literate/A-composing-models/tutorial.jl b/_literate/A-composing-models/tutorial.jl
@@ -29,27 +29,36 @@ scitype(X.age)
 
 # A typical workflow for such data is to one-hot-encode the categorical data and then apply some regression model on the data.
 # Let's say that we want to apply the following steps:
-# 1. standardize the target variable (`:height`)
-# 1. one hot encode the categorical data
-# 1. train a KNN regression model
+# 1. One hot encode the categorical features in `X`
+# 1. Standardize the target variable (`:height`)
+# 1. Train a KNN regression model on the one hot encoded data and the Standardized target.
 
-# The `@pipeline` macro helps you define such a simple (non-branching) pipeline of steps to be applied in order:
+# The `Pipeline` constructor helps you define such a simple (non-branching) pipeline of steps to be applied in order:
 
-pipe = @pipeline(
-    X -> coerce(X, :age=>Continuous),
-    OneHotEncoder(),
-    KNNRegressor(K=3),
-    target = UnivariateStandardizer());
+pipe = Pipeline(
+    coercer = X -> coerce(X, :age=>Continuous),
+    one_hot_encoder = OneHotEncoder(),
+    transformed_target_model = TransformedTargetModel(
+        model = KNNRegressor(K=3);
+        target=UnivariateStandardizer()
+    )
+)
 
 # Note the coercion of the `:age` variable to Continuous since `KNNRegressor` expects `Continuous` input.
-# Note also the `target` keyword where you can specify a transformation of the target variable.
+# Note also the `TransformedTargetModel` which allows one to learn a transformation (in this case Standardization) of the 
+# target variable to be passed to the `KNNRegressor`.
 
 # Hyperparameters of this pipeline can be accessed (and set) using dot syntax:
 
-pipe.knn_regressor.K = 2
+pipe.transformed_target_model.model.K = 2
 pipe.one_hot_encoder.drop_last = true;
 
 # Evaluation for a pipe can be done with the `evaluate!` method; implicitly it will construct machines that will contain the fitted parameters etc:
 
-evaluate(pipe, X, height, resampling=Holdout(),
-         measure=rms) |> pprint
+evaluate(
+    pipe,
+    X,
+    height,
+    resampling=Holdout(),
+    measure=rms
+) |> pprint
diff --git a/_literate/EX-horse/Manifest.toml b/_literate/EX-horse/Manifest.toml
@@ -157,9 +157,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[deps.Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
+git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.37"
+version = "0.25.38"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
diff --git a/_literate/EX-horse/tutorial.jl b/_literate/EX-horse/tutorial.jl
@@ -147,11 +147,17 @@ ytrain = y[train];
 
 # And let's define a pipeline corresponding to the operations above
 
-SimplePipe = @pipeline(OneHotEncoder(),
-                       MultinomialClassifier(), prediction_type=:probabilistic)
+SimplePipe = Pipeline(
+    OneHotEncoder(),
+    MultinomialClassifier()
+    prediction_type=:probabilistic
+)
 mach = machine(SimplePipe, Xtrain, ytrain)
-res = evaluate!(mach; resampling=Holdout(fraction_train=0.9),
-                measure=cross_entropy)
+res = evaluate!(
+    mach;
+    resampling=Holdout(fraction_train=0.9),
+    measure=cross_entropy
+)
 round(res.measurement[1], sigdigits=3)
 
 # This is the cross entropy on some held-out 10% of the training set.
diff --git a/_literate/EX-wine/Manifest.toml b/_literate/EX-wine/Manifest.toml
@@ -26,9 +26,9 @@ version = "3.5.0+3"
 
 [[deps.ArrayInterface]]
 deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "d0d82f1c0b651173a4f839d84f662d03f3417740"
+git-tree-sha1 = "ffc6588e17bcfcaa79dfa5b4f417025e755f83fc"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "4.0.0"
+version = "4.0.1"
 
 [[deps.Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -181,9 +181,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[deps.Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
+git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.37"
+version = "0.25.38"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
diff --git a/_literate/EX-wine/tutorial.jl b/_literate/EX-wine/tutorial.jl
@@ -39,7 +39,7 @@ describe(df)
 # the target is the `Class` column, everything else is a feature; we can
 # dissociate the two  using the `unpack` function:
 
-y, X = unpack(df, ==(:Class), colname->true);
+y, X = unpack(df, ==(:Class));
 
 # ### Setting the scientific type
 #
@@ -93,8 +93,10 @@ describe(Xc, :mean, :std)
 KNNC = @load KNNClassifier
 MNC = @load MultinomialClassifier pkg=MLJLinearModels;
 
-KnnPipe = @pipeline(Standardizer(), KNNC())
-MnPipe = @pipeline(Standardizer(), MNC());
+KnnPipe = Standardizer |> KNNC
+MnPipe = Standardizer |> MNC
+
+# Note the `|>` syntax, which is syntactic sugar for creating a linear `Pipeline` from components models.
 
 # We can now fit this on a train split of the data setting aside 20% of the data for eventual testing.
 
@@ -136,7 +138,7 @@ println(rpad("MNC mcr:", 10), round(mcr_m, sigdigits=3))
 # One way to get intuition for why the dataset is so easy to classify is to project it onto a 2D space using the PCA and display the two classes to see if they are well separated; we use the arrow-syntax here (if you're on Julia <= 1.2, use the commented-out lines as you won't be able to use the arrow-syntax)
 
 PCA = @load PCA
-pca_pipe = @pipeline(Standardizer(), PCA(maxoutdim=2))
+pca_pipe = Standardizer() |> PCA(maxoutdim=2)
 pca = machine(pca_pipe, Xtrain)
 fit!(pca)
 W = transform(pca, Xtrain)
diff --git a/_literate/ISL-lab-10/Manifest.toml b/_literate/ISL-lab-10/Manifest.toml
@@ -163,9 +163,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[deps.Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
+git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.37"
+version = "0.25.38"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
@@ -184,9 +184,9 @@ uuid = "792122b4-ca99-40de-a6bc-6742525f08b6"
 version = "0.3.0"
 
 [[deps.ExprTools]]
-git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
+git-tree-sha1 = "24565044e60bc48a7562e75bcf14f084901dc0b6"
 uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
-version = "0.1.6"
+version = "0.1.7"
 
 [[deps.FileIO]]
 deps = ["Pkg", "Requires", "UUIDs"]
@@ -273,9 +273,9 @@ version = "1.0.0"
 
 [[deps.JLLWrappers]]
 deps = ["Preferences"]
-git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
+git-tree-sha1 = "22df5b96feef82434b07327e2d3c770a9b21e023"
 uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
-version = "1.3.0"
+version = "1.4.0"
 
 [[deps.JLSO]]
 deps = ["BSON", "CodecZlib", "FilePathsBase", "Memento", "Pkg", "Serialization"]
@@ -596,9 +596,9 @@ version = "1.2.2"
 
 [[deps.Requires]]
 deps = ["UUIDs"]
-git-tree-sha1 = "8f82019e525f4d5c669692772a6f4b0a58b06a6a"
+git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
 uuid = "ae029012-a4dd-5104-9daa-d747884805df"
-version = "1.2.0"
+version = "1.3.0"
 
 [[deps.Rmath]]
 deps = ["Random", "Rmath_jll"]
@@ -666,9 +666,9 @@ version = "1.0.0"
 
 [[deps.StaticArrays]]
 deps = ["LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "88a559da57529581472320892576a486fa2377b9"
+git-tree-sha1 = "2ae4fe21e97cd13efd857462c1869b73c9f61be3"
 uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.3.1"
+version = "1.3.2"
 
 [[deps.StatisticalTraits]]
 deps = ["ScientificTypesBase"]
diff --git a/_literate/ISL-lab-10/tutorial.jl b/_literate/ISL-lab-10/tutorial.jl
@@ -54,15 +54,21 @@ cumsum(r.principalvars ./ r.tvar)
 
 data = dataset("ISLR", "OJ")
 
-X = select(data, [:PriceCH, :PriceMM, :DiscCH, :DiscMM, :SalePriceMM,
-                  :SalePriceCH, :PriceDiff, :PctDiscMM, :PctDiscCH]);
+feature_names = [
+    :PriceCH, :PriceMM, :DiscCH, :DiscMM, :SalePriceMM, :SalePriceCH,
+    :PriceDiff, :PctDiscMM, :PctDiscCH
+]
+
+X = select(data, feature_names);
 
 # ### PCA pipeline
 
 Random.seed!(1515)
 
-SPCA = @pipeline(Standardizer(),
-                 PCA(pratio=1-1e-4))
+SPCA = Pipeline(
+    Standardizer(),
+    PCA(pratio=1-1e-4)
+)
 
 spca = machine(SPCA, X)
 fit!(spca)
@@ -98,9 +104,11 @@ savefig(joinpath(@OUTPUT, "ISL-lab-10-g1.svg")) # hide
 Random.seed!(1515)
 
 KMeans = @load KMeans pkg=Clustering
-SPCA2 = @pipeline(Standardizer(),
-                  PCA(),
-                  KMeans(k=3))
+SPCA2 = Pipeline(
+    Standardizer(),
+    PCA(),
+    KMeans(k=3)
+)
 
 spca2 = machine(SPCA2, X)
 fit!(spca2)
diff --git a/_literate/ISL-lab-5/Manifest.toml b/_literate/ISL-lab-5/Manifest.toml
@@ -14,9 +14,9 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 
 [[deps.ArrayInterface]]
 deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "d0d82f1c0b651173a4f839d84f662d03f3417740"
+git-tree-sha1 = "ffc6588e17bcfcaa79dfa5b4f417025e755f83fc"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "4.0.0"
+version = "4.0.1"
 
 [[deps.Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -169,9 +169,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[deps.Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
+git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.37"
+version = "0.25.38"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
diff --git a/_literate/ISL-lab-5/tutorial.jl b/_literate/ISL-lab-5/tutorial.jl
@@ -71,8 +71,10 @@ Xhp = DataFrame(hp1=hp, hp2=hp.^2, hp3=hp.^3);
 
 # Now we  can write a simple pipeline where the first step selects the features we want (and with it the degree of the polynomial) and the second is the linear regressor:
 
-LinMod = @pipeline(FeatureSelector(features=[:hp1]),
-                   LR());
+LinMod = Pipeline(
+    FeatureSelector(features=[:hp1]),
+    LR()
+);
 
 # Then we can  instantiate and fit 3 models where we specify the features each time:
 
diff --git a/_literate/ISL-lab-6b/Manifest.toml b/_literate/ISL-lab-6b/Manifest.toml
@@ -14,9 +14,9 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 
 [[deps.ArrayInterface]]
 deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "d0d82f1c0b651173a4f839d84f662d03f3417740"
+git-tree-sha1 = "ffc6588e17bcfcaa79dfa5b4f417025e755f83fc"
 uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "4.0.0"
+version = "4.0.1"
 
 [[deps.Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -169,9 +169,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[deps.Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
+git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.37"
+version = "0.25.38"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
@@ -190,9 +190,9 @@ uuid = "792122b4-ca99-40de-a6bc-6742525f08b6"
 version = "0.3.0"
 
 [[deps.ExprTools]]
-git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
+git-tree-sha1 = "24565044e60bc48a7562e75bcf14f084901dc0b6"
 uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
-version = "0.1.6"
+version = "0.1.7"
 
 [[deps.FileIO]]
 deps = ["Pkg", "Requires", "UUIDs"]
@@ -214,9 +214,9 @@ version = "0.12.7"
 
 [[deps.FiniteDiff]]
 deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
-git-tree-sha1 = "b374f22e8565a01d6e5db1e8640c3c5e3fe7d564"
+git-tree-sha1 = "6eae72e9943d8992d14359c32aed5f892bda1569"
 uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.9.0"
+version = "2.10.0"
 
 [[deps.FixedPointNumbers]]
 deps = ["Statistics"]
@@ -302,9 +302,9 @@ version = "1.0.0"
 
 [[deps.JLLWrappers]]
 deps = ["Preferences"]
-git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
+git-tree-sha1 = "22df5b96feef82434b07327e2d3c770a9b21e023"
 uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
-version = "1.3.0"
+version = "1.4.0"
 
 [[deps.JLSO]]
 deps = ["BSON", "CodecZlib", "FilePathsBase", "Memento", "Pkg", "Serialization"]
@@ -642,9 +642,9 @@ version = "1.2.2"
 
 [[deps.Requires]]
 deps = ["UUIDs"]
-git-tree-sha1 = "8f82019e525f4d5c669692772a6f4b0a58b06a6a"
+git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
 uuid = "ae029012-a4dd-5104-9daa-d747884805df"
-version = "1.2.0"
+version = "1.3.0"
 
 [[deps.Rmath]]
 deps = ["Random", "Rmath_jll"]
@@ -718,9 +718,9 @@ version = "0.5.1"
 
 [[deps.StaticArrays]]
 deps = ["LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "88a559da57529581472320892576a486fa2377b9"
+git-tree-sha1 = "2ae4fe21e97cd13efd857462c1869b73c9f61be3"
 uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.3.1"
+version = "1.3.2"
 
 [[deps.StatisticalTraits]]
 deps = ["ScientificTypesBase"]
diff --git a/_literate/ISL-lab-6b/tutorial.jl b/_literate/ISL-lab-6b/tutorial.jl
@@ -97,9 +97,7 @@ scitype(Xc)
 #
 # Let's first fit a simple pipeline with a standardizer, a one-hot-encoder and a basic linear regression:
 
-model = @pipeline(Standardizer(),
-                     OneHotEncoder(),
-                     LinearRegressor())
+model = Pipeline(Standardizer(), OneHotEncoder(), LinearRegressor())
 
 pipe  = machine(model, Xc, y)
 fit!(pipe, rows=train)
diff --git a/_literate/ISL-lab-8/Manifest.toml b/_literate/ISL-lab-8/Manifest.toml
@@ -145,9 +145,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[deps.Distributions]]
 deps = ["ChainRulesCore", "DensityInterface", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Test"]
-git-tree-sha1 = "6a8dc9f82e5ce28279b6e3e2cea9421154f5bd0d"
+git-tree-sha1 = "97e9e9d0b8303bae296f3bdd1c2b0065dcb7e7ef"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.37"
+version = "0.25.38"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
diff --git a/_literate/ISL-lab-8/tutorial.jl b/_literate/ISL-lab-8/tutorial.jl

Original file line number	Diff line number	Diff line change
`@@ -97,9 +97,7 @@ scitype(Xc)`
`97`	`97`	`#`
`98`	`98`	`# Let's first fit a simple pipeline with a standardizer, a one-hot-encoder and a basic linear regression:`
`99`	`99`
`100`		`-model = @pipeline(Standardizer(),`
`101`		`- OneHotEncoder(),`
`102`		`- LinearRegressor())`
	`100`	`+model = Pipeline(Standardizer(), OneHotEncoder(), LinearRegressor())`
`103`	`101`
`104`	`102`	`pipe = machine(model, Xc, y)`
`105`	`103`	`fit!(pipe, rows=train)`