diff --git a/.github/workflows/metaflow.s3_tests.minio.yml b/.github/workflows/metaflow.s3_tests.minio.yml index b3c8f2baa2d..fa6cd67237b 100644 --- a/.github/workflows/metaflow.s3_tests.minio.yml +++ b/.github/workflows/metaflow.s3_tests.minio.yml @@ -21,7 +21,7 @@ jobs: matrix: os: [ubuntu-22.04] ver: ['3.8', '3.9', '3.10', '3.11', '3.12'] - + steps: - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 with: @@ -52,7 +52,7 @@ jobs: export METAFLOW_DATASTORE_SYSROOT_S3=s3://metaflow-test/metaflow/ export AWS_ENDPOINT_URL_S3=http://localhost:9000 export MINIO_TEST=1 - + # Run the same test command as the original workflow cd test/data PYTHONPATH=\$(pwd)/../../ python3 -m pytest --benchmark-skip -s -v diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8fede429e85..c78e77bf344 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -3,7 +3,7 @@ on: release: types: [published] jobs: - test: + test: uses: './.github/workflows/test.yml' test-stubs: uses: './.github/workflows/test-stubs.yml' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4254d532b55..6f7a9da6887 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,6 +5,8 @@ repos: hooks: - id: check-json - id: check-yaml + - id: trailing-whitespace + exclude: metaflow/plugins/cards/card_modules/main.js - repo: https://github.com/ambv/black rev: 25.12.0 hooks: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3118862097f..e881f89669f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,7 +3,7 @@ First off, thanks for taking the time! We'd love to hear from you! Drop us a line in our [chatroom](http://chat.metaflow.org)! If you are interested in contributing to Metaflow, we wrote a [guide](https://docs.metaflow.org/introduction/contributing-to-metaflow#contributing-code-and-issues) -to get you started. +to get you started. We'd appreciate [issue reports](https://github.com/Netflix/metaflow/issues) if you run into trouble using Metaflow. @@ -12,8 +12,8 @@ We'd appreciate [issue reports](https://github.com/Netflix/metaflow/issues) if y Everyone is welcome to join us in our [chatroom](http://chat.metaflow.org)! Please maintain appropriate, professional conduct while participating in our community. This includes all channels of -communication. We take reports of harassment or unwelcoming behavior very seriously. To report such behavior, please -contact us via [email](mailto:help@metaflow.org). +communication. We take reports of harassment or unwelcoming behavior very seriously. To report such behavior, please +contact us via [email](mailto:help@metaflow.org). ### Code style diff --git a/R/DESCRIPTION b/R/DESCRIPTION index 9f4d78a8a9c..bec1abc1290 100644 --- a/R/DESCRIPTION +++ b/R/DESCRIPTION @@ -2,13 +2,13 @@ Package: metaflow Type: Package Title: Metaflow for R-Lang Version: 2.3.0 -Author: Jason Ge [aut] , +Author: Jason Ge [aut] , Savin Goyal [aut, cre] , David Neuzerling [ctb] Maintainer: Jason Ge -Description: Metaflow is a human-friendly R package - that helps scientists and engineers build and manage real-life data science projects. - Metaflow was originally developed at Netflix to boost productivity of data scientists +Description: Metaflow is a human-friendly R package + that helps scientists and engineers build and manage real-life data science projects. + Metaflow was originally developed at Netflix to boost productivity of data scientists who work on a wide variety of projects from classical statistics to state-of-the-art deep learning. Encoding: UTF-8 License: Apache License (>= 2.0) | file LICENSE @@ -30,7 +30,7 @@ Suggests: rmarkdown RoxygenNote: 7.1.1 Roxygen: list(markdown = TRUE) -Collate: +Collate: 'decorators-aws.R' 'decorators-environment.R' 'decorators-errors.R' diff --git a/R/R/decorators-aws.R b/R/R/decorators-aws.R index 6c06bde39ee..a4ed66e3675 100644 --- a/R/R/decorators-aws.R +++ b/R/R/decorators-aws.R @@ -1,6 +1,6 @@ #' Decorator that configures resources allocated to a step #' -#' @description +#' @description #' These decorators control the resources allocated to step running either #' locally or on _AWS Batch_. The `resources` decorator allocates resources for #' local execution. However, when a flow is executed with the `batch` argument @@ -42,27 +42,27 @@ #' absolutely necessary. A swappiness value of `100` causes pages to be #' swapped very aggressively. Accepted values are whole numbers between `0` #' and `100`. -#' +#' #' @inherit decorator return #' #' @export -#' +#' #' @examples \dontrun{ -#' # This example will generate a large random matrix which takes up roughly +#' # This example will generate a large random matrix which takes up roughly #' # 48GB of memory, and sums the entries. The `batch` decorator forces this #' # step to run in an environment with 60000MB of memory. -#' +#' #' start <- function(self) { #' big_matrix <- matrix(rexp(80000*80000), 80000) #' self$sum <- sum(big_matrix) #' } -#' +#' #' end <- function(self) { #' message( #' "sum is: ", self$sum #' ) #' } -#' +#' #' metaflow("BigSumFlowR") %>% #' step( #' batch(memory=60000, cpu=1), @@ -91,7 +91,7 @@ batch <- function( queue = queue %||% pkg.env$mf$metaflow_config$BATCH_JOB_QUEUE iam_role = iam_role %||% pkg.env$mf$metaflow_config$ECS_S3_ACCESS_IAM_ROLE execution_role = execution_role %||% pkg.env$mf$metaflow_config$ECS_FARGATE_EXECUTION_ROLE - + decorator( "batch", cpu = cpu, diff --git a/R/R/decorators-environment.R b/R/R/decorators-environment.R index 025c3a3a2f0..9d929c0bd36 100644 --- a/R/R/decorators-environment.R +++ b/R/R/decorators-environment.R @@ -6,7 +6,7 @@ #' execution. #' #' @inherit decorator return -#' +#' #' @export #' #' @examples \dontrun{ @@ -14,13 +14,13 @@ #' print(paste("The cutest animal is the", Sys.getenv("CUTEST_ANIMAL"))) #' print(paste("The", Sys.getenv("ALSO_CUTE"), "is also cute, though")) #' } -#' +#' #' metaflow("EnvironmentVariables") %>% -#' step(step="start", +#' step(step="start", #' environment_variables(CUTEST_ANIMAL = "corgi", ALSO_CUTE = "penguin"), -#' r_function=start, +#' r_function=start, #' next_step="end") %>% -#' step(step="end") %>% +#' step(step="end") %>% #' run() #' } environment_variables <- function(...) { @@ -32,7 +32,7 @@ environment_variables <- function(...) { if (is.null(env_vars_names) || "" %in% env_vars_names) { stop("All environment variables must be named") } - + # Note that in this case, "TRUE" does not become Pythonic "True" --- # each environment variable value is immediately coerced to a character. env_var_dict <- lapply( @@ -47,6 +47,6 @@ environment_variables <- function(...) { ) env_var_dict <- paste0("{", paste(env_var_dict, collapse = ", "), "}") } - + decorator("environment", vars = env_var_dict, .convert_args = FALSE) } \ No newline at end of file diff --git a/R/R/decorators-errors.R b/R/R/decorators-errors.R index 454ea0d4eb6..185e1f6e3a7 100644 --- a/R/R/decorators-errors.R +++ b/R/R/decorators-errors.R @@ -1,10 +1,10 @@ #' Decorator that configures a step to retry upon failure -#' -#' @description +#' +#' @description #' Use this decorator to configure a step to retry if it fails. Alternatively, #' retry _any_ failing steps in an entire flow with `run(with = c("retry")`. -#' -#' See \url{https://docs.metaflow.org/v/r/metaflow/failures} for more +#' +#' See \url{https://docs.metaflow.org/v/r/metaflow/failures} for more #' information on how to use this decorator. #' #' @param times Integer number of times to retry this step. Defaults to `3`. Set @@ -13,9 +13,9 @@ #' retried. #' @param minutes_between_retries Integer Number of minutes between retries. #' Defaults to `2`. -#' +#' #' @inherit decorator return -#' +#' #' @export #' #' @examples \dontrun{ @@ -24,23 +24,23 @@ #' start <- function(self){ #' n <- rbinom(n=1, size=1, prob=0.5) #' if (n==0){ -#' stop("Bad Luck!") +#' stop("Bad Luck!") #' } else{ #' print("Lucky you!") #' } #' } -#' +#' #' end <- function(self){ #' print("Phew!") #' } -#' +#' #' metaflow("RetryFlow") %>% -#' step(step="start", +#' step(step="start", #' retry(times=3), -#' r_function=start, +#' r_function=start, #' next_step="end") %>% -#' step(step="end", -#' r_function=end) %>% +#' step(step="end", +#' r_function=end) %>% #' run() #' } retry <- function(times = 3L, minutes_between_retries = 2L) { @@ -53,11 +53,11 @@ retry <- function(times = 3L, minutes_between_retries = 2L) { #' Decorator that configures a step to catch an error #' -#' @description +#' @description #' Use this decorator to configure a step to catch any errors that occur during #' evaluation. For steps that can't be safely retried, it is a good idea to use #' this decorator along with `retry(times = 0)`. -#' +#' #' See \url{https://docs.metaflow.org/v/r/metaflow/failures#catching-exceptions-with-the-catch-decorator} #' for more information on how to use this decorator. #' @@ -65,23 +65,23 @@ retry <- function(times = 3L, minutes_between_retries = 2L) { #' exception. If `NULL` (the default), the exception is not stored. #' @param print_exception Boolean. Determines whether or not the exception is #' printed to stdout when caught. Defaults to `TRUE`. -#' +#' #' @inherit decorator return #' #' @export #' #' @examples \donttest{ -#' +#' #' start <- function(self) { #' stop("Oh no!") #' } -#' +#' #' end <- function(self) { #' message( #' "Error is : ", self$start_failed #' ) #' } -#' +#' #' metaflow("AlwaysErrors") %>% #' step( #' catch(var = "start_failed"), diff --git a/R/R/decorators.R b/R/R/decorators.R index 77690698de9..e2f772e3929 100644 --- a/R/R/decorators.R +++ b/R/R/decorators.R @@ -1,11 +1,11 @@ #' Metaflow Decorator. #' -#' @description +#' @description #' Decorates the `step` with the parameters present in its arguments. For this #' method to work properly, the `...` arguments should be named, and decorator #' type should be the first argument. It may be more convenient to use one of #' the _decorator wrappers_ listed below: -#' +#' #' * \code{\link{resources}} #' * \code{\link{batch}} #' * \code{\link{retry}} @@ -18,16 +18,16 @@ #' @param .convert_args Boolean. If `TRUE` (the default), argument values will #' be converted to analogous Python values, with strings quoted and escaped. #' Disable this if argument values are already formatted for Python. -#' +#' #' @return A object of class "decorator" -#' +#' #' @export -#' +#' #' @examples \dontrun{ #' decorator("catch", print_exception=FALSE) #' decorator("resources", cpu=2, memory=10000) #' } -#' +#' decorator <- function(x, ..., .convert_args = TRUE) { fmt_decorator(x, ..., .convert_args = .convert_args) %>% new_decorator() @@ -55,7 +55,7 @@ new_decorator <- function(x) { #' #' @return character vector #' @keywords internal -#' +#' #' @examples \dontrun{ #' add_decorators(list(decorator("batch", cpu = 4), decorator("retry"))) #' #> c("@batch(cpu=4)", "\n", "@retry", "\n") @@ -66,13 +66,13 @@ add_decorators <- function(decorators) { } #' Format an R decorator as a Python decorator -#' +#' #' @inheritSection add_decorators Python decorators #' #' @param x Decorator name. #' @inheritParams decorator #' -#' @return character vector of length two, in which the first element is the +#' @return character vector of length two, in which the first element is the #' translated decorator and the second element is a new line character. #' @keywords internal #' @@ -118,13 +118,13 @@ decorator_arguments <- function(args, .convert_args = TRUE) { stop("duplicate decorator arguments") } unlist(lapply(seq_along(args), function(x) { - + wrapped_arg <- if (.convert_args) { wrap_argument(args[x]) } else { args[x] } - + if (x != length(args)) { paste0(names(args[x]), "=", wrapped_arg, ",") } else { diff --git a/R/R/flags.R b/R/R/flags.R index 87a6d2e3f8f..1ac4f9d0ef7 100644 --- a/R/R/flags.R +++ b/R/R/flags.R @@ -8,7 +8,7 @@ flags <- function(...) { parse_flags <- function(arguments = commandArgs(TRUE)) { config_name <- Sys.getenv("R_CONFIG_ACTIVE", unset = "default") - configs <- pkg.env$configs + configs <- pkg.env$configs loaded_configs <- list() for (key in names(configs[[config_name]])) { loaded_configs[[key]] <- eval(configs[[config_name]][[key]]) @@ -44,7 +44,7 @@ parse_arguments <- function(arguments = NULL) { if (i <= n){ values$step_functions <- arguments[i] } else { - values$step_functions <- "" + values$step_functions <- "" } next } @@ -143,12 +143,12 @@ split_parameters <- function(flags) { "help", "resume", "max_num_splits", "max_workers", "other_args", "show", "user", - "my_runs", "run_id", + "my_runs", "run_id", "origin_run_id", "with", "tag", # step-functions subcommands and options - "step_functions", + "step_functions", "only_json", "generate_new_token", - "running", "succeeded", "failed", + "running", "succeeded", "failed", "timed_out", "aborted", "namespace", "new_token", "workflow_timeout" ) diff --git a/R/R/install.R b/R/R/install.R index fe282ed14f0..adc545f4357 100644 --- a/R/R/install.R +++ b/R/R/install.R @@ -31,13 +31,13 @@ install_metaflow <- function(method = c("conda", "virtualenv"), env_set <- check_environment(envname) if (method == "conda" && env_set[["virtualenv"]]) { - stop("An existing virtualenv <", envname, "> detected for Metaflow installation.\n", + stop("An existing virtualenv <", envname, "> detected for Metaflow installation.\n", "To continue, remove that environment by executing metaflow::remove_metaflow_env()", " and try installing Metaflow again.", call.=FALSE) } if (method == "virtualenv" && env_set[["conda"]]) { - stop("An existing conda environment <", envname, "> detected for Metaflow installation.\n", + stop("An existing conda environment <", envname, "> detected for Metaflow installation.\n", "To continue, remove that environment by executing metaflow::remove_metaflow_env()", " and try installing Metaflow again.", call.=FALSE) } diff --git a/R/R/run.R b/R/R/run.R index 7a4db20f2fb..ee4c5072665 100644 --- a/R/R/run.R +++ b/R/R/run.R @@ -93,8 +93,8 @@ run_cmd <- function(flow_file, ...) { if ("step_functions" %in% names(flags)) { sfn_cmd <- paste("step-functions", flags$step_functions) # subcommands without an argument - for (subcommand in c("generate_new_token", - "only_json", "running", "succeeded", + for (subcommand in c("generate_new_token", + "only_json", "running", "succeeded", "failed", "timed_out", "aborted")){ if (subcommand %in% names(flags)){ subcommand_valid <- gsub("_", "-", subcommand) @@ -103,7 +103,7 @@ run_cmd <- function(flow_file, ...) { } # subcommands following an argument - for (subcommand in c("authorize", "new_token", "tag", "namespace", + for (subcommand in c("authorize", "new_token", "tag", "namespace", "max_workers", "workflow_timeout")){ if (subcommand %in% names(flags)){ subcommand_valid <- gsub("_", "-", subcommand) @@ -188,8 +188,8 @@ run_cmd <- function(flow_file, ...) { } if ("step_functions" %in% names(flags)){ - cmd <- paste("Rscript", run_path, flow_RDS, - "--no-pylint", package_suffixes, sfn_cmd, + cmd <- paste("Rscript", run_path, flow_RDS, + "--no-pylint", package_suffixes, sfn_cmd, parameters, other_args) } diff --git a/R/README.md b/R/README.md index 51500f37c66..9f70c975216 100644 --- a/R/README.md +++ b/R/README.md @@ -24,6 +24,6 @@ or jump straight into the [docs](https://docs.metaflow.org/v/r). ## Get in Touch There are several ways to get in touch with us: -* Open an issue at: https://github.com/Netflix/metaflow +* Open an issue at: https://github.com/Netflix/metaflow * Email us at: help@metaflow.org -* Chat with us on: http://chat.metaflow.org +* Chat with us on: http://chat.metaflow.org diff --git a/R/check_as_cran.sh b/R/check_as_cran.sh index 464336c4509..e0715250fbb 100755 --- a/R/check_as_cran.sh +++ b/R/check_as_cran.sh @@ -1,7 +1,7 @@ rm -rf cran_check mkdir -p cran_check; -cp -r inst ./cran_check/ -cp -r man ./cran_check/ +cp -r inst ./cran_check/ +cp -r man ./cran_check/ cp -r R ./cran_check/ cp -r vignettes ./cran_check/ cp DESCRIPTION ./cran_check/ diff --git a/R/doc/metaflow.html b/R/doc/metaflow.html index b7a844a38e4..9ea9af9b640 100644 --- a/R/doc/metaflow.html +++ b/R/doc/metaflow.html @@ -119,7 +119,7 @@ background-color: #f7f7f7; border-radius: 3px; color: #333; -white-space: pre-wrap; +white-space: pre-wrap; } pre { border-radius: 3px; @@ -187,18 +187,18 @@ a[href^="https:"] { text-decoration: underline; } -code > span.kw { color: #555; font-weight: bold; } -code > span.dt { color: #902000; } -code > span.dv { color: #40a070; } -code > span.bn { color: #d14; } -code > span.fl { color: #d14; } -code > span.ch { color: #d14; } -code > span.st { color: #d14; } -code > span.co { color: #888888; font-style: italic; } -code > span.ot { color: #007020; } -code > span.al { color: #ff0000; font-weight: bold; } -code > span.fu { color: #900; font-weight: bold; } -code > span.er { color: #a61717; background-color: #e3d2d2; } +code > span.kw { color: #555; font-weight: bold; } +code > span.dt { color: #902000; } +code > span.dv { color: #40a070; } +code > span.bn { color: #d14; } +code > span.fl { color: #d14; } +code > span.ch { color: #d14; } +code > span.st { color: #d14; } +code > span.co { color: #888888; font-style: italic; } +code > span.ot { color: #007020; } +code > span.al { color: #ff0000; font-weight: bold; } +code > span.fu { color: #900; font-weight: bold; } +code > span.er { color: #a61717; background-color: #e3d2d2; } diff --git a/R/inst/run_batch.R b/R/inst/run_batch.R index 5143f2d92f1..95c2c08aded 100644 --- a/R/inst/run_batch.R +++ b/R/inst/run_batch.R @@ -9,7 +9,7 @@ install_dep <- function(dep) { # dependencies for metaflow invisible(lapply(c("R6", "reticulate", "magrittr", "cli", "lubridate", "digest"), install_dep)) -# install numpy and pandas in Python to handle R matrix and data.frame +# install numpy and pandas in Python to handle R matrix and data.frame system("python3 -m pip install numpy pandas -qqq") Sys.setenv(METAFLOW_PYTHON = system("which python3", intern=TRUE)) diff --git a/R/inst/tutorials/00-helloworld/helloworld.R b/R/inst/tutorials/00-helloworld/helloworld.R index 2a993eff16c..a90bf74c5f4 100644 --- a/R/inst/tutorials/00-helloworld/helloworld.R +++ b/R/inst/tutorials/00-helloworld/helloworld.R @@ -3,7 +3,7 @@ library(metaflow) -# This is the 'start' step. All flows must have a step named +# This is the 'start' step. All flows must have a step named # 'start' that is the first step in the flow. start <- function(self){ print("HelloFlow is starting.") @@ -11,22 +11,22 @@ start <- function(self){ # A step for metaflow to introduce itself. hello <- function(self){ - print("Metaflow says: Hi!") + print("Metaflow says: Hi!") } -# This is the 'end' step. All flows must have an 'end' step, +# This is the 'end' step. All flows must have an 'end' step, # which is the last step in the flow. end <- function(self){ print("HelloFlow is all done.") } metaflow("HelloFlow") %>% - step(step = "start", - r_function = start, + step(step = "start", + r_function = start, next_step = "hello") %>% - step(step = "hello", - r_function = hello, + step(step = "hello", + r_function = hello, next_step = "end") %>% - step(step = "end", - r_function = end) %>% + step(step = "end", + r_function = end) %>% run() diff --git a/R/inst/tutorials/01-playlist/playlist.R b/R/inst/tutorials/01-playlist/playlist.R index 8e26d5261b4..5c4a35d2ddb 100644 --- a/R/inst/tutorials/01-playlist/playlist.R +++ b/R/inst/tutorials/01-playlist/playlist.R @@ -10,7 +10,7 @@ library(metaflow) -# Parse the CSV file +# Parse the CSV file start <- function(self){ self$df <- read.csv("./movies.csv", stringsAsFactors=FALSE) } @@ -48,20 +48,20 @@ end <- function(self){ for (i in 1:nrow(self$playlist)){ message(sprintf("Pick %d: %s", i, self$playlist$movie_title[i])) - if (i >= self$top_k) break; + if (i >= self$top_k) break; } } -metaflow("PlayListFlow") %>% - parameter("genre", - help = "Filter movies for a particular genre.", - default = "Sci-Fi") %>% +metaflow("PlayListFlow") %>% + parameter("genre", + help = "Filter movies for a particular genre.", + default = "Sci-Fi") %>% parameter("top_k", help = "The number of movies to recommend in the playlist.", default = 5, type = "int") %>% - step(step = "start", - r_function = start, + step(step = "start", + r_function = start, next_step = c("pick_movie", "bonus_movie")) %>% step(step = "pick_movie", r_function = pick_movie, @@ -73,7 +73,7 @@ metaflow("PlayListFlow") %>% r_function = join, join = TRUE, next_step = "end") %>% - step(step = "end", + step(step = "end", r_function = end) %>% run() diff --git a/R/inst/tutorials/02-statistics/README.md b/R/inst/tutorials/02-statistics/README.md index 523b74f94c5..bc24ee9fd24 100644 --- a/R/inst/tutorials/02-statistics/README.md +++ b/R/inst/tutorials/02-statistics/README.md @@ -10,7 +10,7 @@ plots.** - Plotting results in a Markdown Notebook. #### Before playing this episode: -1. Configure your metadata provider to a user-wise global provider, if you haven't done it already. +1. Configure your metadata provider to a user-wise global provider, if you haven't done it already. ```bash $mkdir -p /path/to/home/.metaflow $export METAFLOW_DEFAULT_METADATA=local diff --git a/R/inst/tutorials/02-statistics/stats.R b/R/inst/tutorials/02-statistics/stats.R index d033e42bbd1..540e7a3f900 100644 --- a/R/inst/tutorials/02-statistics/stats.R +++ b/R/inst/tutorials/02-statistics/stats.R @@ -14,13 +14,13 @@ compute_stats <- function(self){ self$genre <- self$input message("Computing statistics for ", self$genre) - # Find all the movies that have this genre + # Find all the movies that have this genre self$df_by_genre <- self$df[self$df$genre == self$genre, ] gross <- self$df_by_genre$gross # Get some statistics on the gross box office for these titles. - self$median <- median(gross) + self$median <- median(gross) self$mean <- mean(gross) } @@ -30,7 +30,7 @@ join <- function(self, inputs){ "genres" = unlist(lapply(inputs, function(inp){inp$genre})), "median" = unlist(lapply(inputs, function(inp){inp$median})), "mean" = unlist(lapply(inputs, function(inp){inp$mean}))) - + print(head(self$stats)) } @@ -48,5 +48,5 @@ metaflow("MovieStatsFlow") %>% join = TRUE) %>% step(step = "end") %>% run() - + diff --git a/R/inst/tutorials/03-playlist-redux/README.md b/R/inst/tutorials/03-playlist-redux/README.md index 9519c9feea3..ae833382138 100644 --- a/R/inst/tutorials/03-playlist-redux/README.md +++ b/R/inst/tutorials/03-playlist-redux/README.md @@ -7,7 +7,7 @@ #### Before playing this episode: 1. Run 'Episode 02-statistics: Is this Data Science?' -2. Configure your metadata provider to a user-wise global provider, if you haven't done it already. +2. Configure your metadata provider to a user-wise global provider, if you haven't done it already. ```bash $mkdir -p /path/to/home/.metaflow $export METAFLOW_DEFAULT_METADATA=local diff --git a/R/inst/tutorials/03-playlist-redux/playlist.R b/R/inst/tutorials/03-playlist-redux/playlist.R index 13042a1098b..436df342507 100644 --- a/R/inst/tutorials/03-playlist-redux/playlist.R +++ b/R/inst/tutorials/03-playlist-redux/playlist.R @@ -15,7 +15,7 @@ start <- function(self){ self$genre_stats <- run$artifact("stats") } -# Pick some movies from the genre with highest median gross box office +# Pick some movies from the genre with highest median gross box office # which we calculated in MovieStatsFlow pick_movie <- function(self){ sort_order <- order(self$genre_stats$median, decreasing=TRUE) @@ -37,7 +37,7 @@ end <- function(self){ for (i in 1:nrow(self$playlist)){ message(sprintf("Pick %d: %s", i, self$playlist$movie_title[i])) - if (i >= self$top_k) break; + if (i >= self$top_k) break; } } @@ -46,13 +46,12 @@ metaflow("PlayListReduxFlow") %>% help = "The number of movies to recommend in the playlist.", default = 5, type = "int") %>% - step(step = "start", - r_function = start, + step(step = "start", + r_function = start, next_step = "pick_movie") %>% step(step = "pick_movie", r_function = pick_movie, next_step = "end") %>% - step(step = "end", + step(step = "end", r_function = end) %>% run() - \ No newline at end of file diff --git a/R/inst/tutorials/04-helloaws/helloaws.R b/R/inst/tutorials/04-helloaws/helloaws.R index 8a3cd576f86..9dcc4f20aea 100644 --- a/R/inst/tutorials/04-helloaws/helloaws.R +++ b/R/inst/tutorials/04-helloaws/helloaws.R @@ -3,7 +3,7 @@ library(metaflow) -# This is the 'start' step. All flows must have a step named +# This is the 'start' step. All flows must have a step named # 'start' that is the first step in the flow. start <- function(self){ message("HelloAWS is starting.") @@ -13,25 +13,25 @@ start <- function(self){ # A step for metaflow to introduce itself. hello <- function(self){ self$message <- "We're on the cloud! Metaflow says: Hi!" - print(self$message) + print(self$message) message("Using metadata provider: ", get_metadata()) } -# This is the 'end' step. All flows must have an 'end' step, +# This is the 'end' step. All flows must have an 'end' step, # which is the last step in the flow. end <- function(self){ message("HelloAWS is all done.") } metaflow("HelloAWSFlow") %>% - step(step = "start", - r_function = start, + step(step = "start", + r_function = start, next_step = "hello") %>% - step(step = "hello", + step(step = "hello", decorator("retry", times=2), decorator("batch", cpu=2, memory=2048), - r_function = hello, + r_function = hello, next_step = "end") %>% - step(step = "end", - r_function = end) %>% + step(step = "end", + r_function = end) %>% run() diff --git a/R/inst/tutorials/05-statistics-redux/README.md b/R/inst/tutorials/05-statistics-redux/README.md index 22142fcc1c0..2576b4b5f15 100644 --- a/R/inst/tutorials/05-statistics-redux/README.md +++ b/R/inst/tutorials/05-statistics-redux/README.md @@ -28,9 +28,9 @@ In a terminal: If you are using RStudio, you can replace the last line `run()` with ```R run(batch=TRUE, max_workers=4, package_suffixes=".R,.csv,") -``` +``` and run by `source("stats.R")`. ##### Inspect the results: Open the R markdown file ```02-statistics/stats.Rmd``` in your RStudio and re-run the cells. You can access -the artifacts stored in AWS S3 from your local RStudio session. \ No newline at end of file +the artifacts stored in AWS S3 from your local RStudio session. \ No newline at end of file diff --git a/R/inst/tutorials/06-worldview/README.md b/R/inst/tutorials/06-worldview/README.md index 8b500187a26..3f896e12f9a 100644 --- a/R/inst/tutorials/06-worldview/README.md +++ b/R/inst/tutorials/06-worldview/README.md @@ -11,4 +11,4 @@ monitor all of your Metaflow flows.** #### To play this episode: 1. ```cd tutorials/06-worldview/``` -2. Open ```worldview.Rmd``` in RStudio on your local computer \ No newline at end of file +2. Open ```worldview.Rmd``` in RStudio on your local computer \ No newline at end of file diff --git a/R/inst/tutorials/06-worldview/worldview.Rmd b/R/inst/tutorials/06-worldview/worldview.Rmd index 07896ccf041..1089386da8a 100644 --- a/R/inst/tutorials/06-worldview/worldview.Rmd +++ b/R/inst/tutorials/06-worldview/worldview.Rmd @@ -18,9 +18,9 @@ set_namespace(NULL) flow_names <- metaflow::list_flows() for (name in unlist(flow_names)){ flow <- flow_client$new(name) - + run <- run_client$new(flow, flow$latest_run) - + message("Run id: ", run$id, " Last run: ", run$finished_at, " Successful: ", run$successful) } ``` diff --git a/R/inst/tutorials/07-autopilot/README.md b/R/inst/tutorials/07-autopilot/README.md index 0ccd1ee0f94..864e9dbb086 100644 --- a/R/inst/tutorials/07-autopilot/README.md +++ b/R/inst/tutorials/07-autopilot/README.md @@ -1,11 +1,11 @@ # Episode 07-autopilot: Scheduling Compute in the Cloud. -**This example revisits 'Episode 05-statistics-redux: Computing in the Cloud'. +**This example revisits 'Episode 05-statistics-redux: Computing in the Cloud'. With Metaflow, you don't need to make any code changes to schedule your flow in the cloud. In this example we will schedule the 'stats.R' workflow -using the 'step-functions create' command line argument. This instructs -Metaflow to schedule your flow on AWS Step Functions without changing any code. -You can execute your flow on AWS Step Functions by using the +using the 'step-functions create' command line argument. This instructs +Metaflow to schedule your flow on AWS Step Functions without changing any code. +You can execute your flow on AWS Step Functions by using the 'step-functions trigger' command line argument. You can use a notebook to setup a simple dashboard to monitor all of your Metaflow flows.** @@ -24,16 +24,16 @@ In a terminal: 2. ```Rscript stats.R --package-suffixes=.R,.csv step-functions create --max-workers 4``` 3. ```Rscript stats.R --package-suffixes=.R,.csv step-functions trigger``` -If you are using RStudio, you can replace the last line `run()` by +If you are using RStudio, you can replace the last line `run()` by ```R run(package_suffixes=".R,.csv", step_functions="create", max_workers=4) ``` -for SFN create, and +for SFN create, and ```R run(package_suffixes=".R,.csv", step_functions="trigger") ``` -for SFN trigger. You can then directly run `source("stats.R`)` in RStudio. +for SFN trigger. You can then directly run `source("stats.R`)` in RStudio. ##### Inspect the results: Open the R Markdown file```07-autopilot/stats.Rmd``` in your RStudio and re-run the cells. You can access -the artifacts stored in AWS S3 from your local RStudio session. \ No newline at end of file +the artifacts stored in AWS S3 from your local RStudio session. \ No newline at end of file diff --git a/R/inst/tutorials/07-autopilot/autopilot.Rmd b/R/inst/tutorials/07-autopilot/autopilot.Rmd index 54e21a5b98a..fbde4639d29 100644 --- a/R/inst/tutorials/07-autopilot/autopilot.Rmd +++ b/R/inst/tutorials/07-autopilot/autopilot.Rmd @@ -12,7 +12,7 @@ message("Current metadata provider: ", metaflow::get_metadata()) ``` ## Plot a timeline view of a scheduled run of MovieStatsFlow -When you triggered your flow on AWS Step Functions using `step-functions trigger`, you would have seen an output similar to - +When you triggered your flow on AWS Step Functions using `step-functions trigger`, you would have seen an output similar to - ```{bash} ... Workflow MovieStatsFlow triggered on AWS Step Functions (run-id sfn-dolor-sit-amet). diff --git a/R/man/batch.Rd b/R/man/batch.Rd index 5286e63645f..7df1ceb72f4 100644 --- a/R/man/batch.Rd +++ b/R/man/batch.Rd @@ -78,7 +78,7 @@ from all decorators is used. } \examples{ \dontrun{ -# This example will generate a large random matrix which takes up roughly +# This example will generate a large random matrix which takes up roughly # 48GB of memory, and sums the entries. The `batch` decorator forces this # step to run in an environment with 60000MB of memory. diff --git a/R/man/environment_variables.Rd b/R/man/environment_variables.Rd index d7d855637f7..9fac318d9fc 100644 --- a/R/man/environment_variables.Rd +++ b/R/man/environment_variables.Rd @@ -25,11 +25,11 @@ start <- function(self) { } metaflow("EnvironmentVariables") \%>\% - step(step="start", + step(step="start", environment_variables(CUTEST_ANIMAL = "corgi", ALSO_CUTE = "penguin"), - r_function=start, + r_function=start, next_step="end") \%>\% - step(step="end") \%>\% + step(step="end") \%>\% run() } } diff --git a/R/man/retry.Rd b/R/man/retry.Rd index 11cfde429d1..056c7fc0c3d 100644 --- a/R/man/retry.Rd +++ b/R/man/retry.Rd @@ -32,7 +32,7 @@ information on how to use this decorator. start <- function(self){ n <- rbinom(n=1, size=1, prob=0.5) if (n==0){ - stop("Bad Luck!") + stop("Bad Luck!") } else{ print("Lucky you!") } @@ -43,12 +43,12 @@ end <- function(self){ } metaflow("RetryFlow") \%>\% - step(step="start", + step(step="start", retry(times=3), - r_function=start, + r_function=start, next_step="end") \%>\% - step(step="end", - r_function=end) \%>\% + step(step="end", + r_function=end) \%>\% run() } } diff --git a/R/tests/contexts.json b/R/tests/contexts.json index 1e1fb02812c..2d4b83c48da 100644 --- a/R/tests/contexts.json +++ b/R/tests/contexts.json @@ -25,7 +25,7 @@ }, "python": "python3", "top_options": [ - "batch = TRUE", + "batch = TRUE", "max_workers = 16", "package_suffixes = c('.R', '.py', '.csv')", "metadata='service'", diff --git a/R/tests/testthat/test-decorators-aws.R b/R/tests/testthat/test-decorators-aws.R index d3eb007db34..f7b8ae6df62 100644 --- a/R/tests/testthat/test-decorators-aws.R +++ b/R/tests/testthat/test-decorators-aws.R @@ -14,7 +14,7 @@ test_that("@batch parses correctly", { test_that("@resources wrapper parsed correctly", { skip_if_no_metaflow() - + actual <- resources()[1] expected <- paste0("@resources(", "cpu=1, ", @@ -23,7 +23,7 @@ test_that("@resources wrapper parsed correctly", { "shared_memory=None", ")") expect_equal(actual, expected) - + expect_match(resources(gpu = 1)[1], "gpu=1") expect_match(resources(memory = 60000)[1], "memory=60000") }) @@ -36,7 +36,7 @@ test_that("@batch wrapper parsed correctly", { pkg.env$mf$metaflow_config$BATCH_JOB_QUEUE <- "foo" pkg.env$mf$metaflow_config$ECS_S3_ACCESS_IAM_ROLE <- "bar" pkg.env$mf$metaflow_config$ECS_FARGATE_EXECUTION_ROLE <- "baz" - + actual <- batch()[1] expected <- paste0("@batch(", "cpu=1, ", @@ -51,7 +51,7 @@ test_that("@batch wrapper parsed correctly", { "swappiness=None", ")") expect_equal(actual, expected) - + expect_match(batch(gpu = 1)[1], "gpu=1") expect_match(batch(iam_role = "cassowary")[1], "iam_role='cassowary'") }) diff --git a/R/tests/testthat/test-decorators-environment.R b/R/tests/testthat/test-decorators-environment.R index 451e44015c1..7b8e50232cf 100644 --- a/R/tests/testthat/test-decorators-environment.R +++ b/R/tests/testthat/test-decorators-environment.R @@ -1,6 +1,6 @@ test_that("@environment parses correctly", { skip_if_no_metaflow() - + actual <- decorator("retry", times = 3)[1] expected <- "@retry(times=3)" expect_equal(actual, expected) @@ -8,15 +8,15 @@ test_that("@environment parses correctly", { test_that("@environment wrapper parses correctly", { skip_if_no_metaflow() - + actual <- environment_variables(foo = "red panda")[1] expected <- "@environment(vars={'foo': 'red panda'})" expect_equal(actual, expected) - + actual <- environment_variables(foo = "red panda", bar = "corgi")[1] expected <- "@environment(vars={'foo': 'red panda', 'bar': 'corgi'})" expect_equal(actual, expected) - + # Note that in this case, "TRUE" does not become Pythonic "True" --- # each environment variable value is immediately coerced to a character. actual <- environment_variables(foo = "TRUE")[1] diff --git a/R/tests/testthat/test-decorators-error.R b/R/tests/testthat/test-decorators-error.R index fa6e0f3644f..cce97622ffe 100644 --- a/R/tests/testthat/test-decorators-error.R +++ b/R/tests/testthat/test-decorators-error.R @@ -1,6 +1,6 @@ test_that("@retry parses correctly", { skip_if_no_metaflow() - + actual <- decorator("retry", times = 3)[1] expected <- "@retry(times=3)" expect_equal(actual, expected) @@ -8,11 +8,11 @@ test_that("@retry parses correctly", { test_that("@retry wrapper parses correctly", { skip_if_no_metaflow() - + actual <- retry(times = 3)[1] expected <- "@retry(times=3, minutes_between_retries=2)" expect_equal(actual, expected) - + actual <- retry(times = 3, minutes_between_retries=0)[1] expected <- "@retry(times=3, minutes_between_retries=0)" expect_equal(actual, expected) @@ -20,7 +20,7 @@ test_that("@retry wrapper parses correctly", { test_that("@catch parses correctly", { skip_if_no_metaflow() - + actual <- decorator("catch", var = "red_panda")[1] expected <- "@catch(var='red_panda')" expect_equal(actual, expected) @@ -28,11 +28,11 @@ test_that("@catch parses correctly", { test_that("@catch wrapper parses correctly", { skip_if_no_metaflow() - + actual <- catch(var = "red_panda")[1] expected <- "@catch(var='red_panda', print_exception=True)" expect_equal(actual, expected) - + actual <- catch(var = "red_panda", print_exception = FALSE)[1] expected <- "@catch(var='red_panda', print_exception=False)" expect_equal(actual, expected) diff --git a/R/tests/testthat/test-decorators.R b/R/tests/testthat/test-decorators.R index f14540128b2..9675fb12c28 100644 --- a/R/tests/testthat/test-decorators.R +++ b/R/tests/testthat/test-decorators.R @@ -7,19 +7,19 @@ test_that("error on duplicate arguments", { test_that("decorator arguments parsed correctly", { skip_if_no_metaflow() - + actual <- decorator_arguments(list(cpu = 10)) expected <- "cpu=10" expect_equal(actual, expected) - + actual <- decorator_arguments(list(memory = 60000, cpu = 10)) expected <- "memory=60000, cpu=10" expect_equal(actual, expected) - + actual <- decorator_arguments(list(memory = 60000, image = NULL)) expected <- "memory=60000, image=None" expect_equal(actual, expected) - + actual <- decorator_arguments(list(abc = "red panda"), .convert_args = FALSE) expected <- "abc=red panda" # invalid Python because we're not converting expect_equal(actual, expected) diff --git a/R/tests/testthat/test-sfn-cli-parsing.R b/R/tests/testthat/test-sfn-cli-parsing.R index 9906953490a..5eea9483eed 100644 --- a/R/tests/testthat/test-sfn-cli-parsing.R +++ b/R/tests/testthat/test-sfn-cli-parsing.R @@ -8,7 +8,7 @@ test_that("SFN create", { run_cmd <- strsplit(trimws(readRDS("run_cmd.RDS")), split=" ")[[1]] actual <- paste(run_cmd[3:length(run_cmd)], collapse=" ") - expected <- "--flowRDS=flow.RDS --no-pylint step-functions create" + expected <- "--flowRDS=flow.RDS --no-pylint step-functions create" expect_equal(actual, expected) on.exit(file.remove("run_cmd.RDS")) @@ -23,7 +23,7 @@ test_that("SFN create --help", { run_cmd <- strsplit(trimws(readRDS("run_cmd.RDS")), split=" ")[[1]] actual <- paste(run_cmd[3:length(run_cmd)], collapse=" ") - expected <- "--flowRDS=flow.RDS --no-pylint step-functions create --help" + expected <- "--flowRDS=flow.RDS --no-pylint step-functions create --help" expect_equal(actual, expected) on.exit(file.remove("run_cmd.RDS")) @@ -38,7 +38,7 @@ test_that("SFN create --package-suffixes", { run_cmd <- strsplit(trimws(readRDS("run_cmd.RDS")), split=" ")[[1]] actual <- paste(run_cmd[3:length(run_cmd)], collapse=" ") - expected <- "--flowRDS=flow.RDS --no-pylint --package-suffixes=.csv,.RDS,.R step-functions create" + expected <- "--flowRDS=flow.RDS --no-pylint --package-suffixes=.csv,.RDS,.R step-functions create" expect_equal(actual, expected) on.exit(file.remove("run_cmd.RDS")) @@ -53,7 +53,7 @@ test_that("SFN create --generate-new-token", { run_cmd <- strsplit(trimws(readRDS("run_cmd.RDS")), split=" ")[[1]] actual <- paste(run_cmd[3:length(run_cmd)], collapse=" ") - expected <- "--flowRDS=flow.RDS --no-pylint step-functions create --generate-new-token" + expected <- "--flowRDS=flow.RDS --no-pylint step-functions create --generate-new-token" expect_equal(actual, expected) on.exit(file.remove("run_cmd.RDS")) @@ -68,7 +68,7 @@ test_that("SFN create --generate-new-token --max-workers 100 --lr 0.01", { run_cmd <- strsplit(trimws(readRDS("run_cmd.RDS")), split=" ")[[1]] actual <- paste(run_cmd[3:length(run_cmd)], collapse=" ") - expected <- "--flowRDS=flow.RDS --no-pylint step-functions create --generate-new-token --max-workers 100 --lr 0.01" + expected <- "--flowRDS=flow.RDS --no-pylint step-functions create --generate-new-token --max-workers 100 --lr 0.01" expect_equal(actual, expected) on.exit(file.remove("run_cmd.RDS")) }) @@ -83,7 +83,7 @@ test_that("SFN trigger", { run_cmd <- strsplit(trimws(readRDS("run_cmd.RDS")), split=" ")[[1]] actual <- paste(run_cmd[3:length(run_cmd)], collapse=" ") - expected <- "--flowRDS=flow.RDS --no-pylint step-functions trigger" + expected <- "--flowRDS=flow.RDS --no-pylint step-functions trigger" expect_equal(actual, expected) on.exit(file.remove("run_cmd.RDS")) @@ -99,7 +99,7 @@ test_that("SFN list-runs --running", { run_cmd <- strsplit(trimws(readRDS("run_cmd.RDS")), split=" ")[[1]] actual <- paste(run_cmd[3:length(run_cmd)], collapse=" ") - expected <- "--flowRDS=flow.RDS --no-pylint step-functions list-runs --running" + expected <- "--flowRDS=flow.RDS --no-pylint step-functions list-runs --running" expect_equal(actual, expected) on.exit(file.remove("run_cmd.RDS")) diff --git a/README.md b/README.md index 9349613e8cb..ce8443b67ff 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Metaflow provides a simple and friendly pythonic [API](https://docs.metaflow.org 2. [Effortlessly scale horizontally and vertically in your cloud](https://docs.metaflow.org/scaling/remote-tasks/introduction), utilizing both CPUs and GPUs, with [fast data access](https://docs.metaflow.org/scaling/data) for running [massive embarrassingly parallel](https://docs.metaflow.org/metaflow/basics#foreach) as well as [gang-scheduled](https://docs.metaflow.org/scaling/remote-tasks/distributed-computing) compute workloads [reliably](https://docs.metaflow.org/scaling/failures) and [efficiently](https://docs.metaflow.org/scaling/checkpoint/introduction). 3. [Easily manage dependencies](https://docs.metaflow.org/scaling/dependencies) and [deploy with one-click](https://docs.metaflow.org/production/introduction) to highly available production orchestrators with built in support for [reactive orchestration](https://docs.metaflow.org/production/event-triggering). -For full documentation, check out our [API Reference](https://docs.metaflow.org/api) or see our [Release Notes](https://github.com/Netflix/metaflow/releases) for the latest features and improvements. +For full documentation, check out our [API Reference](https://docs.metaflow.org/api) or see our [Release Notes](https://github.com/Netflix/metaflow/releases) for the latest features and improvements. ## Getting started @@ -37,11 +37,11 @@ Alternatively, using [conda-forge](https://anaconda.org/conda-forge/metaflow): conda install -c conda-forge metaflow ``` -Once installed, a great way to get started is by following our [tutorial](https://docs.metaflow.org/getting-started/tutorials). It walks you through creating and running your first Metaflow flow step by step. +Once installed, a great way to get started is by following our [tutorial](https://docs.metaflow.org/getting-started/tutorials). It walks you through creating and running your first Metaflow flow step by step. For more details on Metaflow’s features and best practices, check out: -- [How Metaflow works](https://docs.metaflow.org/metaflow/basics) -- [Additional resources](https://docs.metaflow.org/introduction/metaflow-resources) +- [How Metaflow works](https://docs.metaflow.org/metaflow/basics) +- [Additional resources](https://docs.metaflow.org/introduction/metaflow-resources) If you need help, don’t hesitate to reach out on our [Slack community](http://slack.outerbounds.co/)! @@ -50,8 +50,8 @@ If you need help, don’t hesitate to reach out on our [Slack community](http:// -While you can get started with Metaflow easily on your laptop, the main benefits of Metaflow lie in its ability to [scale out to external compute clusters](https://docs.metaflow.org/scaling/remote-tasks/introduction) -and to [deploy to production-grade workflow orchestrators](https://docs.metaflow.org/production/introduction). To benefit from these features, follow this [guide](https://outerbounds.com/engineering/welcome/) to +While you can get started with Metaflow easily on your laptop, the main benefits of Metaflow lie in its ability to [scale out to external compute clusters](https://docs.metaflow.org/scaling/remote-tasks/introduction) +and to [deploy to production-grade workflow orchestrators](https://docs.metaflow.org/production/introduction). To benefit from these features, follow this [guide](https://outerbounds.com/engineering/welcome/) to configure Metaflow and the infrastructure behind it appropriately. diff --git a/SECURITY.md b/SECURITY.md index dc4503118f9..3c8ce7a0f03 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,6 +1,6 @@ # Security Policy -We currently accept reports for vulnerabilities on all published versions of the project. +We currently accept reports for vulnerabilities on all published versions of the project. ## Reporting a Vulnerability diff --git a/devtools/Tiltfile b/devtools/Tiltfile index ccf724950b6..10a2c9650bb 100644 --- a/devtools/Tiltfile +++ b/devtools/Tiltfile @@ -21,7 +21,7 @@ JOBSET_VERSION = os.getenv("JOBSET_VERSION", "v0.8.2") ARGO_WORKFLOWS_HELM_CHART_VERSION = os.getenv("ARGO_WORKFLOWS_HELM_CHART_VERSION", "0.45.2") # Helm chart version ARGO_WORKFLOWS_IMAGE_TAG = os.getenv("ARGO_WORKFLOWS_IMAGE_TAG", "v3.6.0") # Argo Workflows application version -# Argo Events versions +# Argo Events versions ARGO_EVENTS_HELM_CHART_VERSION = os.getenv("ARGO_EVENTS_HELM_CHART_VERSION", "2.4.8") # Helm chart version ARGO_EVENTS_IMAGE_TAG = os.getenv("ARGO_EVENTS_IMAGE_TAG", "v1.9.2") # Argo Events application version @@ -580,7 +580,7 @@ if "jobset" in enabled_components: ) metaflow_config["METAFLOW_KUBERNETES_JOBSET_ENABLED"] = "true" - + config_resources.append('jobset-controller-manager') # ClusterRole for jobset operations diff --git a/devtools/pick_services.sh b/devtools/pick_services.sh index 697ddb26a93..2d6be9d262f 100755 --- a/devtools/pick_services.sh +++ b/devtools/pick_services.sh @@ -7,9 +7,9 @@ COLOR="214" LOGO=" ______ ________________________________________ __________ __ ___ |/ /__ ____/__ __/__ |__ ____/__ / __ __ \_ | / / -__ /|_/ /__ __/ __ / __ /| |_ /_ __ / _ / / /_ | /| / / -_ / / / _ /___ _ / _ ___ | __/ _ /___/ /_/ /__ |/ |/ / -/_/ /_/ /_____/ /_/ /_/ |_/_/ /_____/\____/ ____/|__/ +__ /|_/ /__ __/ __ / __ /| |_ /_ __ / _ / / /_ | /| / / +_ / / / _ /___ _ / _ ___ | __/ _ /___/ /_/ /__ |/ |/ / +/_/ /_/ /_____/ /_/ /_/ |_/_/ /_____/\____/ ____/|__/ " SERVICE_OPTIONS=( @@ -33,7 +33,7 @@ gum style "Select services to deploy (press enter to select all):" \ pretty_print() { local items=("$@") - + if [ "${#items[@]}" -eq 1 ]; then echo "${items[0]}" return diff --git a/docs/Environment escape.md b/docs/Environment escape.md index c0b03354872..1ab377349b3 100644 --- a/docs/Environment escape.md +++ b/docs/Environment escape.md @@ -225,7 +225,7 @@ everything to the server: to do so. The server is thus started by the client, and the client is responsible for - terminating the server when it dies. A big part of the client and server code + terminating the server when it dies. A big part of the client and server code consist in loading the configuration for the emulated module, particularly the overrides. diff --git a/docs/cards.md b/docs/cards.md index ac4de55b847..1d14bf887ae 100644 --- a/docs/cards.md +++ b/docs/cards.md @@ -2,13 +2,13 @@ Metaflow Cards make it possible to produce human-readable report cards automatically from any Metaflow tasks. You can use the feature to observe results of Metaflow runs, visualize models, and share outcomes with non-technical stakeholders. -While Metaflow comes with a built-in default card that shows all outputs of a task without any changes in the code, the most exciting use cases are enabled by custom cards: With a few additional lines of Python code, you can change the structure and the content of the report to highlight data that matters to you. For more flexible or advanced reports, you can create custom card templates that generate arbitrary HTML. +While Metaflow comes with a built-in default card that shows all outputs of a task without any changes in the code, the most exciting use cases are enabled by custom cards: With a few additional lines of Python code, you can change the structure and the content of the report to highlight data that matters to you. For more flexible or advanced reports, you can create custom card templates that generate arbitrary HTML. Anyone can create card templates and share them as standard Python packages. Cards can be accessed via the Metaflow CLI even without an internet connection, making it possible to use them in security-conscious environments. Cards are also integrated with the latest release of the Metaflow GUI, allowing you to enrich the existing task view with application-specific information. ## Technical Details -### Table Of Contents +### Table Of Contents * [@card decorator](#card-decorator) * [Parameters](#parameters) * [Usage Semantics](#usage-semantics) @@ -27,7 +27,7 @@ Anyone can create card templates and share them as standard Python packages. Car Metaflow cards can be created by placing an [`@card` decorator](#@card-decorator) over a `@step`. Cards are created after a metaflow task ( instantiation of each `@step` ) completes execution. You can have multiple `@card` decorators for an individual `@step`. Each decorator takes a `type` argument which defaults to the value `default`. The `type` argument corresponds the [MetaflowCard.type](#metaflowcard). On task completion ,every `@card` decorator creates a separate subprocess to call the [card create cli command](#card-cli). This command will create and [store](#carddatastore) the HTML page for the card. -Since the cards are stored in the datastore we can access them via the `view/get` commands in the [card_cli](#card-cli) or by using the `get_cards` [function](../metaflow/plugins/cards/card_client.py). +Since the cards are stored in the datastore we can access them via the `view/get` commands in the [card_cli](#card-cli) or by using the `get_cards` [function](../metaflow/plugins/cards/card_client.py). Metaflow ships with a [DefaultCard](#defaultcard) which visualizes artifacts, images, and `pandas.Dataframe`s. Metaflow also ships custom components like `Image`, `Table`, `Markdown` etc. These can be added to a card at `Task` runtime. Cards can also be edited from `@step` code using the [current.card](#editing-metaflowcard-from-@step-code) interface. `current.card` helps add `MetaflowCardComponent`s from `@step` code to a `MetaflowCard`. `current.card` offers methods like `current.card.append` or `current.card['myid']` to helps add components to a card. Since there can be many `@card`s over a `@step`, `@card` also comes with an `id` argument. The `id` argument helps disambiguate the card a component goes to when using `current.card`. For example, setting `@card(id='myid')` and calling `current.card['myid'].append(x)` will append `MetaflowCardComponent` `x` to the card with `id='myid'`. @@ -35,10 +35,10 @@ Metaflow ships with a [DefaultCard](#defaultcard) which visualizes artifacts, im The `@card` [decorator](../metaflow/plugins/cards/card_decorator.py) is implemented by inheriting the `StepDecorator`. The decorator can be placed over `@step` to create an HTML file visualizing information from the task. #### Parameters -- `type` `(str)` [Defaults to `default`]: The `type` of `MetaflowCard` to create. More details on `MetaflowCard`s is provided [later in this document](#metaflowcard). -- `options` `(dict)` : options to instantiate a `MetaflowCard`. `MetaflowCard`s will be instantiated with the `options` keyword argument. The value of this argument will be this dictionary. -- `timeout` `(int)` [Defaults to `45`]: Amount of time to wait before killing the card subprocess -- `save_errors` `(bool)` [Defaults to `True`]: If set to `True` then any failure on rendering a `MetaflowCard` will generate an `ErrorCard` instead with the full stack trace of the failure. +- `type` `(str)` [Defaults to `default`]: The `type` of `MetaflowCard` to create. More details on `MetaflowCard`s is provided [later in this document](#metaflowcard). +- `options` `(dict)` : options to instantiate a `MetaflowCard`. `MetaflowCard`s will be instantiated with the `options` keyword argument. The value of this argument will be this dictionary. +- `timeout` `(int)` [Defaults to `45`]: Amount of time to wait before killing the card subprocess +- `save_errors` `(bool)` [Defaults to `True`]: If set to `True` then any failure on rendering a `MetaflowCard` will generate an `ErrorCard` instead with the full stack trace of the failure. #### Usage Semantics @@ -63,7 +63,7 @@ class ModelTrainingFlow(FlowSpec): import numpy as np self.loss = np.random.randn(100,100)*100 self.next(self.end) - + @step def end(self): print("Done Computation") @@ -75,25 +75,25 @@ if __name__ == "__main__": ### `CardDatastore` -The [CardDatastore](../metaflow/plugins/cards/card_datastore.py) is used by the [card_cli](#card-cli) and the [metaflow card client](#access-cards-in-notebooks) (`get_cards`). It exposes methods to get metadata about a card and the paths to cards for a `pathspec`. +The [CardDatastore](../metaflow/plugins/cards/card_datastore.py) is used by the [card_cli](#card-cli) and the [metaflow card client](#access-cards-in-notebooks) (`get_cards`). It exposes methods to get metadata about a card and the paths to cards for a `pathspec`. ### Card CLI Methods exposed by the [card_cli](../metaflow/plugins/cards/.card_cli.py). : -- `create` : Creates the card in the datastore for a `Task`. Adding a `--render-error-card` will render a `ErrorCard` upon failure to render the card of the selected `type`. If `--render-error-card` is not passed then the CLI will fail loudly with the exception. +- `create` : Creates the card in the datastore for a `Task`. Adding a `--render-error-card` will render a `ErrorCard` upon failure to render the card of the selected `type`. If `--render-error-card` is not passed then the CLI will fail loudly with the exception. ```sh -# python myflow.py card create --type --timeout --options "{}" +# python myflow.py card create --type --timeout --options "{}" python myflow.py card create 100/stepname/1000 --type default --timeout 10 --options '{"only_repr":false}' --render-error-card ``` -- `view/get` : Calling the `view` CLI method will open the card associated for the pathspec in a browser. The `get` method gets the HTML for the card and prints it. You can call the command in the following way. Adding `--follow-resumed` as argument will retrieve the card for the origin resumed task. +- `view/get` : Calling the `view` CLI method will open the card associated for the pathspec in a browser. The `get` method gets the HTML for the card and prints it. You can call the command in the following way. Adding `--follow-resumed` as argument will retrieve the card for the origin resumed task. ```sh -# python myflow.py card view --hash --type -python myflow.py card view 100/stepname/1000 --hash ads34 --type default --follow-resumed +# python myflow.py card view --hash --type +python myflow.py card view 100/stepname/1000 --hash ads34 --type default --follow-resumed ``` ### Access cards in notebooks -Metaflow also exposes a `get_cards` client that helps resolve cards outside the CLI. Example usage is shown below : +Metaflow also exposes a `get_cards` client that helps resolve cards outside the CLI. Example usage is shown below : ```python from metaflow import Task from metaflow.cards import get_cards @@ -112,7 +112,7 @@ html = card_iterator[0].get() ### `MetaflowCard` -The [MetaflowCard](../metaflow/plugins/cards/card_modules/card.py) class is the base class to create custom cards. All subclasses require implementing the `render` function. The `render` function is expected to return a string. Below is an example snippet of usage : +The [MetaflowCard](../metaflow/plugins/cards/card_modules/card.py) class is the base class to create custom cards. All subclasses require implementing the `render` function. The `render` function is expected to return a string. Below is an example snippet of usage : ```python from metaflow.cards import MetaflowCard # path to the custom html file which is a `mustache` template. @@ -140,21 +140,21 @@ class CustomCard(MetaflowCard): return pt.render(html_template,data) ``` -The class consists of the `_get_mustache` method that returns [chevron](https://github.com/noahmorrison/chevron) object ( a `mustache` based [templating engine](http://mustache.github.io/mustache.5.html) ). Using the `mustache` templating engine you can rewrite HTML template file. In the above example the `PATH_TO_CUSTOM_HTML` is the file that holds the `mustache` HTML template. +The class consists of the `_get_mustache` method that returns [chevron](https://github.com/noahmorrison/chevron) object ( a `mustache` based [templating engine](http://mustache.github.io/mustache.5.html) ). Using the `mustache` templating engine you can rewrite HTML template file. In the above example the `PATH_TO_CUSTOM_HTML` is the file that holds the `mustache` HTML template. #### Attributes - `type (str)` : The `type` of card. Needs to ensure correct resolution. -- `ALLOW_USER_COMPONENTS (bool)` : Setting this to `True` will make the card be user editable. More information on user editable cards can be found [here](#editing-metaflowcard-from-@step-code). +- `ALLOW_USER_COMPONENTS (bool)` : Setting this to `True` will make the card be user editable. More information on user editable cards can be found [here](#editing-metaflowcard-from-@step-code). #### `__init__` Parameters -- `components` `(List[str])`: `components` is a list of `render`ed `MetaflowCardComponent`s created at `@step` runtime. These are passed to the `card create` cli command via a tempfile path in the `--component-file` argument. +- `components` `(List[str])`: `components` is a list of `render`ed `MetaflowCardComponent`s created at `@step` runtime. These are passed to the `card create` cli command via a tempfile path in the `--component-file` argument. - `graph` `(Dict[str,dict])`: The DAG associated to the flow. It is a dictionary of the form `stepname:step_attributes`. `step_attributes` is a dictionary of metadata about a step , `stepname` is the name of the step in the DAG. -- `options` `(dict)`: helps control the behavior of individual cards. - - For example, the `DefaultCard` supports `options` as dictionary of the form `{"only_repr":True}`. Here setting `only_repr` as `True` will ensure that all artifacts are serialized with `reprlib.repr` function instead of native object serialization. +- `options` `(dict)`: helps control the behavior of individual cards. + - For example, the `DefaultCard` supports `options` as dictionary of the form `{"only_repr":True}`. Here setting `only_repr` as `True` will ensure that all artifacts are serialized with `reprlib.repr` function instead of native object serialization. ### `MetaflowCardComponent` -The `render` function of the `MetaflowCardComponent` class returns a `string` or `dict`. It can be called in the `MetaflowCard` class or passed during runtime execution. An example of using `MetaflowCardComponent` inside `MetaflowCard` can be seen below : +The `render` function of the `MetaflowCardComponent` class returns a `string` or `dict`. It can be called in the `MetaflowCard` class or passed during runtime execution. An example of using `MetaflowCardComponent` inside `MetaflowCard` can be seen below : ```python from metaflow.cards import MetaflowCard,MetaflowCardComponent @@ -196,32 +196,32 @@ class CustomCard(MetaflowCard): data = data ) html_template = self.HTML - + return pt.render(html_template,data) ``` ### `DefaultCard` -The [DefaultCard](../metaflow/plugins/cards/card_modules/basic.py) is a default card exposed by metaflow. This will be used when the `@card` decorator is called without any `type` argument or called with `type='default'` argument. It will also be the default card used with cli. The card uses an [HTML template](../metaflow/plugins/cards/card_modules/base.html) along with a [JS](../metaflow/plugins/cards/card_modules/main.js) and a [CSS](../metaflow/plugins/cards/card_modules/bundle.css) files. +The [DefaultCard](../metaflow/plugins/cards/card_modules/basic.py) is a default card exposed by metaflow. This will be used when the `@card` decorator is called without any `type` argument or called with `type='default'` argument. It will also be the default card used with cli. The card uses an [HTML template](../metaflow/plugins/cards/card_modules/base.html) along with a [JS](../metaflow/plugins/cards/card_modules/main.js) and a [CSS](../metaflow/plugins/cards/card_modules/bundle.css) files. -The [HTML](../metaflow/plugins/cards/card_modules/base.html) is a template which works with [JS](../metaflow/plugins/cards/card_modules/main.js) and [CSS](../metaflow/plugins/cards/card_modules/bundle.css). +The [HTML](../metaflow/plugins/cards/card_modules/base.html) is a template which works with [JS](../metaflow/plugins/cards/card_modules/main.js) and [CSS](../metaflow/plugins/cards/card_modules/bundle.css). -The JS and CSS are created after building the JS and CSS from the [cards-ui](../metaflow/plugins/cards/ui/README.md) directory. [cards-ui](../metaflow/plugins/cards/ui/README.md) consists of the JS app that generates the HTML view from a JSON object. +The JS and CSS are created after building the JS and CSS from the [cards-ui](../metaflow/plugins/cards/ui/README.md) directory. [cards-ui](../metaflow/plugins/cards/ui/README.md) consists of the JS app that generates the HTML view from a JSON object. ### Default `MetaflowCardComponent` -`DefaultCard`/`BlankCard` can be given `MetaflowCardComponent` from `@step` code. The following are the main `MetaflowCardComponent`s available via `metaflow.cards`. -- `Artifact` : A component to help log artifacts at task runtime. +`DefaultCard`/`BlankCard` can be given `MetaflowCardComponent` from `@step` code. The following are the main `MetaflowCardComponent`s available via `metaflow.cards`. +- `Artifact` : A component to help log artifacts at task runtime. - Example : `Artifact(some_variable,compress=True)` -- `Table` : A component to create a table in the card HTML. Consists of convenience methods : +- `Table` : A component to create a table in the card HTML. Consists of convenience methods : - `Table.from_dataframe(df)` to make a table from a dataframe. -- `Image` : A component to create an image in the card HTML: +- `Image` : A component to create an image in the card HTML: - `Image(bytearr,"my Image from bytes")`: to directly from `bytes` - `Image.from_pil_image(pilimage,"From PIL Image")` : to create an image from a `PIL.Image` - `Image.from_matplotlib(plot,"My matplotlib plot")` : to create an image from a plot -- `Error` : A wrapper subcomponent to display errors. Accepts an `exception` and a `title` as arguments. +- `Error` : A wrapper subcomponent to display errors. Accepts an `exception` and a `title` as arguments. - `Markdown` : A component that renders markdown in the HTML template ### Editing `MetaflowCard` from `@step` code -`MetaflowCard`s can be edited from `@step` code using the `current.card` interface. The `current.card` interface will only be active when a `@card` decorator is placed over a `@step`. To understand the workings of `current.card` consider the following snippet. +`MetaflowCard`s can be edited from `@step` code using the `current.card` interface. The `current.card` interface will only be active when a `@card` decorator is placed over a `@step`. To understand the workings of `current.card` consider the following snippet. ```python @card(type='blank',id='a') @card(type='default') @@ -241,32 +241,32 @@ In the above scenario there are two `@card` decorators which are being customize #### `current.card` (`CardComponentCollector`) -The `CardComponentCollector` is the object responsible for resolving a `MetaflowCardComponent` to the card referenced in the `@card` decorator. +The `CardComponentCollector` is the object responsible for resolving a `MetaflowCardComponent` to the card referenced in the `@card` decorator. Since there can be many cards, `CardComponentCollector` has a `_finalize` function. The `_finalize` function is called once the **last** `@card` decorator calls `task_pre_step`. The `_finalize` function will try to find the **default editable card** from all the `@card` decorators on the `@step`. The default editable card is the card that can access the `current.card.append`/`current.card.extend` methods. If there are multiple editable cards with no `id` then `current.card` will throw warnings when users call `current.card.append`. This is done because `current.card` cannot resolve which card the component belongs. -The `@card` decorator also exposes another argument called `customize=True`. **Only one `@card` decorator over a `@step` can have `customize=True`**. Since cards can also be added from CLI when running a flow, adding `@card(customize=True)` will set **that particular card** from the decorator as default editable. This means that `current.card.append` will append to the card belonging to `@card` with `customize=True`. If there is more than one `@card` decorator with `customize=True` then `current.card` will throw warnings that `append` won't work. +The `@card` decorator also exposes another argument called `customize=True`. **Only one `@card` decorator over a `@step` can have `customize=True`**. Since cards can also be added from CLI when running a flow, adding `@card(customize=True)` will set **that particular card** from the decorator as default editable. This means that `current.card.append` will append to the card belonging to `@card` with `customize=True`. If there is more than one `@card` decorator with `customize=True` then `current.card` will throw warnings that `append` won't work. -One important feature of the `current.card` object is that it will not fail. Even when users try to access `current.card.append` with multiple editable cards, we throw warnings but don't fail. `current.card` will also not fail when a user tries to access a card of a non-existing id via `current.card['mycard']`. Since `current.card['mycard']` gives reference to a `list` of `MetaflowCardComponent`s, `current.card` will return a non-referenced `list` when users try to access the dictionary interface with a nonexistent id (`current.card['my_non_existant_card']`). +One important feature of the `current.card` object is that it will not fail. Even when users try to access `current.card.append` with multiple editable cards, we throw warnings but don't fail. `current.card` will also not fail when a user tries to access a card of a non-existing id via `current.card['mycard']`. Since `current.card['mycard']` gives reference to a `list` of `MetaflowCardComponent`s, `current.card` will return a non-referenced `list` when users try to access the dictionary interface with a nonexistent id (`current.card['my_non_existant_card']`). -Once the `@step` completes execution, every `@card` decorator will call `current.card._serialize` (`CardComponentCollector._serialize`) to get a JSON serializable list of `str`/`dict` objects. The `_serialize` function internally calls all [component's](#metaflowcardcomponent) `render` function. This list is `json.dump`ed to a `tempfile` and passed to the `card create` subprocess where the `MetaflowCard` can use them in the final output. +Once the `@step` completes execution, every `@card` decorator will call `current.card._serialize` (`CardComponentCollector._serialize`) to get a JSON serializable list of `str`/`dict` objects. The `_serialize` function internally calls all [component's](#metaflowcardcomponent) `render` function. This list is `json.dump`ed to a `tempfile` and passed to the `card create` subprocess where the `MetaflowCard` can use them in the final output. -### Creating Custom Installable Cards +### Creating Custom Installable Cards Custom cards can be installed with the help of the `metaflow_extensions` namespace package. Every `metaflow_extensions` module having custom cards should follow the below directory structure. You can see an example cookie-cutter card over [here](https://github.com/outerbounds/metaflow-card-html). ``` your_package/ # the name of this dir doesn't matter ├ setup.py -├ metaflow_extensions/ -│ └ organizationA/ # NO __init__.py file, This is a namespace package. -│ └ plugins/ # NO __init__.py file, This is a namespace package. -│ └ cards/ # NO __init__.py file, This is a namespace package. +├ metaflow_extensions/ +│ └ organizationA/ # NO __init__.py file, This is a namespace package. +│ └ plugins/ # NO __init__.py file, This is a namespace package. +│ └ cards/ # NO __init__.py file, This is a namespace package. │ └ my_card_module/ # Name of card_module │ └ __init__.py. # This is the __init__.py is required to recognize `my_card_module` as a package -│ └ somerandomfile.py. # Some file as a part of the package. +│ └ somerandomfile.py. # Some file as a part of the package. . ``` -The `__init__.py` of the `metaflow_extensions.organizationA.plugins.cards.my_card_module`, requires a `CARDS` attribute which needs to be a `list` of objects inheriting `MetaflowCard` class. For Example, in the below `__init__.py` file exposes a `MetaflowCard` of `type` "y_card2". +The `__init__.py` of the `metaflow_extensions.organizationA.plugins.cards.my_card_module`, requires a `CARDS` attribute which needs to be a `list` of objects inheriting `MetaflowCard` class. For Example, in the below `__init__.py` file exposes a `MetaflowCard` of `type` "y_card2". ```python from metaflow.cards import MetaflowCard @@ -285,7 +285,7 @@ class YCard(MetaflowCard): CARDS = [YCard] ``` -Having this `metaflow_extensions` module present in the PYTHONPATH can also work. Custom cards can also be created by reusing components provided by metaflow. For Example : +Having this `metaflow_extensions` module present in the PYTHONPATH can also work. Custom cards can also be created by reusing components provided by metaflow. For Example : ```python from metaflow.cards import BlankCard from metaflow.cards import Artifact,Table @@ -293,7 +293,7 @@ from metaflow.cards import Artifact,Table class MyCustomCard(BlankCard): type = 'my_custom_card' - + def render(self, task): art_com = [ Table( diff --git a/docs/concurrency.md b/docs/concurrency.md index ed058830ab5..10380882f36 100644 --- a/docs/concurrency.md +++ b/docs/concurrency.md @@ -80,7 +80,7 @@ Set the environment variable `METAFLOW_DEBUG_SUBCOMMAND=1` to see the exact command line that is used to launch a subcommand task. You can re-execute the task simply by re-executing the command line manually. However, be careful when re-executing commands from real runs, as you -will rewrite data in the datastore. To be safe, preferably rerun only +will rewrite data in the datastore. To be safe, preferably rerun only commands executed with `--datastore=local` and `--metadata=local`. You can observe running subprocesses with `ps` and attach to them using @@ -118,9 +118,9 @@ than TCP. We send heart beats to metadata service in a sidecar, `heartbeat.py` to detect whether the task is alive. Since heart beats are purely informational, -we didn't want to increase the latency of the main process due to these -service calls, nor we wanted to fail the whole parent process in case of a -request failing. A sidecar that handles communication with the metadata +we didn't want to increase the latency of the main process due to these +service calls, nor we wanted to fail the whole parent process in case of a +request failing. A sidecar that handles communication with the metadata service was a perfect solution. #### How to Observe @@ -206,8 +206,8 @@ tasks is very easy and practically zero-cost. #### Example Uses -Many sidecars, e.g. `heartbeat.py`, use a separate worker thread to make -sure that the main process consuming messages from the parent will not +Many sidecars, e.g. `heartbeat.py`, use a separate worker thread to make +sure that the main process consuming messages from the parent will not block for an extended amount of time. ### 5. Multiprocessing diff --git a/docs/datastore.md b/docs/datastore.md index 4f080df0eff..d66f741c455 100644 --- a/docs/datastore.md +++ b/docs/datastore.md @@ -110,7 +110,7 @@ additional operations: identical blobs of data will only be stored once) - transforms the data prior to storing; we currently only compress the data but other operations are possible. - + Data is always de-duplicated, but you can choose to skip the transformation step by telling the content address store that the data should be stored `raw` (ie: with no transformation). Note that the de-duplication logic happens *prior* to diff --git a/docs/lifecycle.dot b/docs/lifecycle.dot index 961aca6e5cc..83e8fe342f2 100644 --- a/docs/lifecycle.dot +++ b/docs/lifecycle.dot @@ -10,7 +10,7 @@ digraph Metaflow { lightgoldenrod1: metadata lightpink2: function call grey78: event / change in control - + */ graph [fontsize=10, fontname="Noto Mono"] @@ -145,7 +145,7 @@ digraph Metaflow { validate_env_deuce -> flow_init_deuce flow_init_deuce -> step_init_deuce step_init_deuce -> choose_command_deuce - + /* package */ validate_dag -> init_environment init_environment -> package_init diff --git a/docs/sidecars.md b/docs/sidecars.md index 132d81dd603..0d9c0d95a76 100644 --- a/docs/sidecars.md +++ b/docs/sidecars.md @@ -2,22 +2,22 @@ ## Purpose -There are several use cases around logging, monitoring, and -possibly other “tier 2” features that would benefit from a nonblocking implementation. -So anything running within a sidecar should be able to be executed asynchronously from the main process, -with no strong consistency requirement between it and the main process. This will help ensure that errors -in non-critical flows do not cause the whole workflow to fail and reduces the latency overhead added +There are several use cases around logging, monitoring, and +possibly other “tier 2” features that would benefit from a nonblocking implementation. +So anything running within a sidecar should be able to be executed asynchronously from the main process, +with no strong consistency requirement between it and the main process. This will help ensure that errors +in non-critical flows do not cause the whole workflow to fail and reduces the latency overhead added by the platform itself. ## Design/Architecture -Sidecars are run under a separate subprocess (sidecar worker) that engages in one-way communication with -the main process (sidecar class) via -[pipes](https://www.tutorialspoint.com/inter_process_communication/inter_process_communication_pipes.htm). -The sidecar worker consumes messages from the main process via stdin and logs debug and error messages to stderr. -Note that since metaflow blocks the completion of a task until the termination of stdout (to collect the logs), +Sidecars are run under a separate subprocess (sidecar worker) that engages in one-way communication with +the main process (sidecar class) via +[pipes](https://www.tutorialspoint.com/inter_process_communication/inter_process_communication_pipes.htm). +The sidecar worker consumes messages from the main process via stdin and logs debug and error messages to stderr. +Note that since metaflow blocks the completion of a task until the termination of stdout (to collect the logs), the stdout for sidecars is directed to dev/nul instead of inheriting the stdout of the parent process to ensure the process is non-blocking. - + #### Interface @@ -29,7 +29,7 @@ Every implementation of sidecar needs to implement the following two methods: #### `def shutdown()` -- Defines the "best effort" shutdown mechanism for the subprocess. +- Defines the "best effort" shutdown mechanism for the subprocess. ## Specific Implementations @@ -37,10 +37,9 @@ Every implementation of sidecar needs to implement the following two methods: We send heart beats to metadata service in a sidecar, `heartbeat.py` to detect whether the task is alive. Since heart beats are purely informational, -we didn't want to increase the latency of the main process due to these -service calls, nor we wanted to fail the whole parent process in case of a -request failing. A sidecar that handles communication with the metadata +we didn't want to increase the latency of the main process due to these +service calls, nor we wanted to fail the whole parent process in case of a +request failing. A sidecar that handles communication with the metadata service was a perfect solution. - \ No newline at end of file diff --git a/metaflow/_vendor/__init__.py b/metaflow/_vendor/__init__.py index ae7b11a6298..30011733fda 100644 --- a/metaflow/_vendor/__init__.py +++ b/metaflow/_vendor/__init__.py @@ -1,10 +1,10 @@ """ -metaflow._vendor is for vendoring dependencies of metaflow. Files -inside of metaflow._vendor should be considered immutable and -should only be updated to versions from upstream. +metaflow._vendor is for vendoring dependencies of metaflow. Files +inside of metaflow._vendor should be considered immutable and +should only be updated to versions from upstream. This folder is generated by `python vendor.py` -If you would like to debundle the vendored dependencies, please +If you would like to debundle the vendored dependencies, please reach out to the maintainers at chat.metaflow.org """ diff --git a/metaflow/_vendor/yaml/parser.py b/metaflow/_vendor/yaml/parser.py index 13a5995d292..ee290856226 100644 --- a/metaflow/_vendor/yaml/parser.py +++ b/metaflow/_vendor/yaml/parser.py @@ -482,7 +482,7 @@ def parse_flow_sequence_entry(self, first=False): token = self.peek_token() raise ParserError("while parsing a flow sequence", self.marks[-1], "expected ',' or ']', but got %r" % token.id, token.start_mark) - + if self.check_token(KeyToken): token = self.peek_token() event = MappingStartEvent(None, None, True, diff --git a/metaflow/_vendor/yaml/scanner.py b/metaflow/_vendor/yaml/scanner.py index 7437ede1c60..7718fc30b78 100644 --- a/metaflow/_vendor/yaml/scanner.py +++ b/metaflow/_vendor/yaml/scanner.py @@ -313,7 +313,7 @@ def remove_possible_simple_key(self): # Remove the saved possible key position at the current flow level. if self.flow_level in self.possible_simple_keys: key = self.possible_simple_keys[self.flow_level] - + if key.required: raise ScannerError("while scanning a simple key", key.mark, "could not find expected ':'", self.get_mark()) @@ -362,11 +362,11 @@ def fetch_stream_start(self): # Read the token. mark = self.get_mark() - + # Add STREAM-START. self.tokens.append(StreamStartToken(mark, mark, encoding=self.encoding)) - + def fetch_stream_end(self): @@ -380,7 +380,7 @@ def fetch_stream_end(self): # Read the token. mark = self.get_mark() - + # Add STREAM-END. self.tokens.append(StreamEndToken(mark, mark)) @@ -388,7 +388,7 @@ def fetch_stream_end(self): self.done = True def fetch_directive(self): - + # Set the current indentation to -1. self.unwind_indent(-1) @@ -515,7 +515,7 @@ def fetch_block_entry(self): self.tokens.append(BlockEntryToken(start_mark, end_mark)) def fetch_key(self): - + # Block context needs additional checks. if not self.flow_level: @@ -565,7 +565,7 @@ def fetch_value(self): # It must be a part of a complex key. else: - + # Block context needs additional checks. # (Do we really need them? They will be caught by the parser # anyway.) @@ -1017,14 +1017,14 @@ def scan_block_scalar(self, style): # Unfortunately, folding rules are ambiguous. # # This is the folding according to the specification: - + if folded and line_break == '\n' \ and leading_non_space and self.peek() not in ' \t': if not breaks: chunks.append(' ') else: chunks.append(line_break) - + # This is Clark Evans's interpretation (also in the spec # examples): # diff --git a/metaflow/datastore/datastore_set.py b/metaflow/datastore/datastore_set.py index 80cc4c690a4..697c86c70bb 100644 --- a/metaflow/datastore/datastore_set.py +++ b/metaflow/datastore/datastore_set.py @@ -7,7 +7,7 @@ """ TaskDataStoreSet allows you to prefetch multiple (read) datastores into a -cache and lets you access them. As a performance optimization it also lets you +cache and lets you access them. As a performance optimization it also lets you prefetch select data artifacts leveraging a shared cache. """ @@ -69,7 +69,7 @@ def __iter__(self): """ This class ensures that blobs that correspond to artifacts that -are common to all datastores in this set are only loaded once +are common to all datastores in this set are only loaded once """ diff --git a/metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py b/metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py index 33b91b22b77..f8fdb5b7c1f 100644 --- a/metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +++ b/metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py @@ -120,15 +120,15 @@ def _sanitize_and_add_entry_to_result(k, v): """ These are the exceptions that can be raised by the AWS SDK: - + SecretsManager.Client.exceptions.ResourceNotFoundException SecretsManager.Client.exceptions.InvalidParameterException SecretsManager.Client.exceptions.InvalidRequestException SecretsManager.Client.exceptions.DecryptionFailure SecretsManager.Client.exceptions.InternalServiceError - + Looks pretty informative already, so we won't catch here directly. - + 1/27/2023(jackie) - We will evolve this over time as we learn more. """ response = secrets_manager_client.get_secret_value(SecretId=secret_id) diff --git a/metaflow/plugins/cards/card_cli.py b/metaflow/plugins/cards/card_cli.py index 9cb8b4bbb9d..a44c31bfc98 100644 --- a/metaflow/plugins/cards/card_cli.py +++ b/metaflow/plugins/cards/card_cli.py @@ -221,7 +221,7 @@ def list_available_cards( cpr = """ Card Id: %s Card Type: %s - Card Hash: %s + Card Hash: %s Card Path: %s """ % ( path_tuple.id, diff --git a/metaflow/plugins/cards/card_modules/base.html b/metaflow/plugins/cards/card_modules/base.html index 76ff0b87d2d..0654653930d 100644 --- a/metaflow/plugins/cards/card_modules/base.html +++ b/metaflow/plugins/cards/card_modules/base.html @@ -12,7 +12,7 @@ - + @@ -36,7 +36,7 @@ {{/RENDER_COMPLETE}} {{^RENDER_COMPLETE}} - {{/RENDER_COMPLETE}} diff --git a/metaflow/plugins/cards/card_modules/test_cards.py b/metaflow/plugins/cards/card_modules/test_cards.py index 67a2ee533da..2cb81c26294 100644 --- a/metaflow/plugins/cards/card_modules/test_cards.py +++ b/metaflow/plugins/cards/card_modules/test_cards.py @@ -109,7 +109,7 @@ def render(self, task): REFRESHABLE_HTML_TEMPLATE = """ -
{#if collapsible} -
- + {#if !isCollapsed}
{json_string}
@@ -90,7 +90,7 @@ margin: 0.5rem 0; overflow: hidden; } - + .json-header { display: flex; justify-content: space-between; @@ -101,7 +101,7 @@ font-size: 0.875rem; font-weight: 500; } - + .collapse-button { display: flex; align-items: center; @@ -113,25 +113,25 @@ font-size: 0.875rem; font-weight: 500; } - + .collapse-button:hover { color: #111827; } - + .collapse-icon { transition: transform 0.2s ease; font-size: 0.75rem; } - + .collapse-icon.collapsed { transform: rotate(-90deg); } - + .json-label { color: #374151; font-weight: 500; } - + .copy-button { background: #3b82f6; color: white; @@ -142,20 +142,20 @@ cursor: pointer; transition: all 0.2s ease; } - + .copy-button:hover { background: #2563eb; } - + .copy-button.success { background: #10b981; } - + .json-content { overflow: auto; max-height: 400px; /* Default max height */ } - + .json-code { margin: 0; padding: 0; @@ -163,7 +163,7 @@ border: none; overflow: visible; } - + .json-code code { display: block; padding: 1rem; @@ -176,37 +176,37 @@ word-break: break-word; border: none; } - + /* Let Prism.js handle all token styling - no custom overrides */ - + /* Responsive adjustments */ @media (max-width: 640px) { .json-header { padding: 0.375rem 0.5rem; font-size: 0.8125rem; } - + .json-code { padding: 0.75rem 0.5rem; font-size: 0.75rem; } - + .copy-button { padding: 0.1875rem 0.375rem; font-size: 0.6875rem; } } - + /* Table context adjustments */ :global(table .json-viewer) { margin: 0.25rem 0; font-size: 0.75rem; } - + :global(table .json-content) { max-height: 200px; } - + :global(table .json-code) { padding: 0.5rem; font-size: 0.6875rem; diff --git a/metaflow/plugins/cards/ui/src/components/log.svelte b/metaflow/plugins/cards/ui/src/components/log.svelte index 2a6d2f4b808..5a4cbde2f58 100644 --- a/metaflow/plugins/cards/ui/src/components/log.svelte +++ b/metaflow/plugins/cards/ui/src/components/log.svelte @@ -11,7 +11,7 @@ $: el ? highlightCode() : null; -
 
+
   
     {componentData.data}
   
diff --git a/metaflow/plugins/cards/ui/src/components/modal.svelte b/metaflow/plugins/cards/ui/src/components/modal.svelte
index e475bb71187..1a72bb024e6 100644
--- a/metaflow/plugins/cards/ui/src/components/modal.svelte
+++ b/metaflow/plugins/cards/ui/src/components/modal.svelte
@@ -1,4 +1,4 @@
-
 
@@ -13,13 +13,13 @@
     {#if title}
       

{title}

{/if} - +
{displayValue}
- + {#if subtitle}

{subtitle}

{/if} - + {#if change_indicator}
{change_indicator}
{/if} @@ -37,11 +37,11 @@ display: flex; align-items: center; } - + .value-box-content { width: 100%; } - + .value-box-title { font-size: 0.875rem; font-weight: 500; @@ -50,7 +50,7 @@ text-transform: uppercase; letter-spacing: 0.025em; } - + .value-box-value { font-size: 2rem; font-weight: 700; @@ -58,13 +58,13 @@ line-height: 1.2; margin: 0 0 0.5rem 0; } - + .value-box-subtitle { font-size: 0.875rem; color: #6b7280; margin: 0 0 0.5rem 0; } - + .value-box-change { font-size: 0.75rem; font-weight: 500; @@ -72,94 +72,94 @@ text-transform: uppercase; letter-spacing: 0.025em; } - + /* Theme variants */ .value-box.default { background: white; border-color: #e5e7eb; } - + .value-box.bg-gradient-indigo-purple { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; } - + .value-box.bg-gradient-indigo-purple .value-box-title, .value-box.bg-gradient-indigo-purple .value-box-subtitle { color: rgba(255, 255, 255, 0.8); } - + .value-box.bg-gradient-indigo-purple .value-box-value { color: white; } - + .value-box.bg-gradient-indigo-purple .value-box-change { color: rgba(255, 255, 255, 0.9); } - - + + .value-box.success { background: #f0fdf4; border-color: #bbf7d0; } - + .value-box.success .value-box-value { color: #065f46; } - + .value-box.success .value-box-change { color: #059669; } - + .value-box.warning { background: #fffbeb; border-color: #fed7aa; } - + .value-box.warning .value-box-value { color: #92400e; } - + .value-box.warning .value-box-change { color: #d97706; } - + .value-box.danger { background: #fef2f2; border-color: #fecaca; } - + .value-box.danger .value-box-value { color: #991b1b; } - + .value-box.danger .value-box-change { color: #dc2626; } - + /* Responsive adjustments */ @media (max-width: 640px) { .value-box { padding: 1rem; min-height: 100px; } - + .value-box-value { font-size: 1.5rem; } - + } - + /* Table context adjustments */ :global(table .value-box) { min-height: auto; padding: 0.75rem; margin: 0.25rem 0; } - + :global(table .value-box-value) { font-size: 1.25rem; } - + diff --git a/metaflow/plugins/cards/ui/src/components/vega-chart.svelte b/metaflow/plugins/cards/ui/src/components/vega-chart.svelte index 90b988a05e7..60ff82ecf9b 100644 --- a/metaflow/plugins/cards/ui/src/components/vega-chart.svelte +++ b/metaflow/plugins/cards/ui/src/components/vega-chart.svelte @@ -8,7 +8,7 @@ {#if data && spec} {:else} - + {/if} diff --git a/metaflow/plugins/cards/ui/src/components/yaml-viewer.svelte b/metaflow/plugins/cards/ui/src/components/yaml-viewer.svelte index 33e7ce70021..bb65d50b86b 100644 --- a/metaflow/plugins/cards/ui/src/components/yaml-viewer.svelte +++ b/metaflow/plugins/cards/ui/src/components/yaml-viewer.svelte @@ -1,16 +1,16 @@
{#if collapsible} -
- + {#if !isCollapsed}
{yaml_string}
@@ -90,7 +90,7 @@ margin: 0.5rem 0; overflow: hidden; } - + .yaml-header { display: flex; justify-content: space-between; @@ -101,7 +101,7 @@ font-size: 0.875rem; font-weight: 500; } - + .collapse-button { display: flex; align-items: center; @@ -113,25 +113,25 @@ font-size: 0.875rem; font-weight: 500; } - + .collapse-button:hover { color: #111827; } - + .collapse-icon { transition: transform 0.2s ease; font-size: 0.75rem; } - + .collapse-icon.collapsed { transform: rotate(-90deg); } - + .yaml-label { color: #374151; font-weight: 500; } - + .copy-button { background: #3b82f6; color: white; @@ -142,20 +142,20 @@ cursor: pointer; transition: all 0.2s ease; } - + .copy-button:hover { background: #2563eb; } - + .copy-button.success { background: #10b981; } - + .yaml-content { overflow: auto; max-height: 400px; /* Default max height */ } - + .yaml-code { margin: 0; padding: 0; @@ -163,7 +163,7 @@ border: none; overflow: visible; } - + .yaml-code code { display: block; padding: 1rem; @@ -176,37 +176,37 @@ word-break: break-word; border: none; } - + /* Let Prism.js handle all token styling - no custom overrides */ - + /* Responsive adjustments */ @media (max-width: 640px) { .yaml-header { padding: 0.375rem 0.5rem; font-size: 0.8125rem; } - + .yaml-code { padding: 0.75rem 0.5rem; font-size: 0.75rem; } - + .copy-button { padding: 0.1875rem 0.375rem; font-size: 0.6875rem; } } - + /* Table context adjustments */ :global(table .yaml-viewer) { margin: 0.25rem 0; font-size: 0.75rem; } - + :global(table .yaml-content) { max-height: 200px; } - + :global(table .yaml-code) { padding: 0.5rem; font-size: 0.6875rem; diff --git a/test/README.md b/test/README.md index b857959df0d..4b500d73658 100644 --- a/test/README.md +++ b/test/README.md @@ -43,15 +43,15 @@ for more information about how to execute `pytest` tests. ## The Integration Test Harness for Metaflow The integration test harness for the core Metaflow at `test/core` -generates and executes synthetic Metaflow flows, exercising all -aspects of Metaflow. The test suite is executed using -[tox](http://tox.readthedocs.io) as configured in `tox.ini`. -You can run the tests by hand using `pytest` or +generates and executes synthetic Metaflow flows, exercising all +aspects of Metaflow. The test suite is executed using +[tox](http://tox.readthedocs.io) as configured in `tox.ini`. +You can run the tests by hand using `pytest` or `run_tests.py` as described below. What happens when you execute `python helloworld.py run`? The execution -involves multiple layers of the Metaflow stack. The stack looks like -following, starting from the most fundamental layer all the way to the +involves multiple layers of the Metaflow stack. The stack looks like +following, starting from the most fundamental layer all the way to the user interface: 0. Python interpreter (`python2`, `python3`) diff --git a/test/core/graphs/branch_in_switch.json b/test/core/graphs/branch_in_switch.json index 387947d5078..d1c05336133 100644 --- a/test/core/graphs/branch_in_switch.json +++ b/test/core/graphs/branch_in_switch.json @@ -1,7 +1,7 @@ { "name": "branch_in_switch", "graph": { - "start": { + "start": { "switch": {"process": "process_branch", "skip": "skip_path"}, "condition": "mode", "quals": ["start-branch-in-switch"] diff --git a/test/extensions/README.md b/test/extensions/README.md index f135475d26a..4421f3d8108 100644 --- a/test/extensions/README.md +++ b/test/extensions/README.md @@ -1,5 +1,5 @@ -# Extensions Testing Framework. +# Extensions Testing Framework. What does this framework do ? It installs the extensions and then runs the test suite which leverages the extensions. -Currently installs the cards related packages. \ No newline at end of file +Currently installs the cards related packages. \ No newline at end of file diff --git a/test/extensions/packages/card_via_init/README.md b/test/extensions/packages/card_via_init/README.md index 2ac3fcae4d8..143dcba1a7c 100644 --- a/test/extensions/packages/card_via_init/README.md +++ b/test/extensions/packages/card_via_init/README.md @@ -1,3 +1,3 @@ # card_via_init -This test checks if card extensions directly with a `plugins/cards` directory structure work as planned. \ No newline at end of file +This test checks if card extensions directly with a `plugins/cards` directory structure work as planned. \ No newline at end of file diff --git a/test_runner b/test_runner index 69ca8a8d921..e43de33e2ae 100755 --- a/test_runner +++ b/test_runner @@ -21,7 +21,7 @@ run_tests() { # We run realtime cards tests separately because there these tests validate the asynchronous updates to the # information stored in the datastore. So if there are other processes starving resources then these tests will -# surely fail since a lot of checks have timeouts. +# surely fail since a lot of checks have timeouts. run_runtime_card_tests() { CARD_GRAPHS="small-foreach,small-parallel,nested-branches,single-linear-step,simple-foreach" cd test/core && PYTHONPATH=`pwd`/../../ python3 run_tests.py --num-parallel 8 --contexts python3-all-local-cards-realtime --graphs $CARD_GRAPHS && cd ../../