From d8dd6196309e72996e6ae2cae241e15b9299e8cd Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 18 Sep 2022 17:37:58 +0200 Subject: [PATCH 01/24] Format README --- README.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ff0303bd..795cae5b 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,16 @@ # se-ml.github.io -Software Engineering for Machine Learning Homepage +> Software Engineering for Machine Learning Homepage +### Prerequisites -### Prereq Install [jekyll](https://jekyllrb.com/docs/). ### Development: -#### Install dependencies: -``` +#### Install dependencies + +```sh $ git clone https://github.com/SE-ML/se-ml.github.io $ cd se-ml.github.io $ git fetch --all @@ -17,18 +18,20 @@ $ git checkout -b source origin/source $ bundle install ``` -#### Run dev server: -``` -$ bundle exec jekyll serve -``` +#### Run development server -### Deploy to production: +```sh +bundle exec jekyll serve ``` + +### Deploy to production + +```sh $ ./bin/deploy -u se-ml ``` +#### Kudos -#### Kudos: - this webpage uses the [al-folio](https://alshedivat.github.io/al-folio/) theme. - some icons are downloaded from [Freepik](https://www.freepik.com/). From 98c4921def067124a73faaaf1807be02ac69b089 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 18 Sep 2022 17:39:14 +0200 Subject: [PATCH 02/24] Add missing dependency --- Gemfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Gemfile b/Gemfile index 8e842b1e..b52e91ac 100644 --- a/Gemfile +++ b/Gemfile @@ -8,3 +8,5 @@ group :jekyll_plugins do gem 'jemoji' gem 'unicode_utils' end + +gem "webrick", "~> 1.7" From a5717cc210e7cd67f37646846bd396b478a7045f Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 18 Sep 2022 17:39:46 +0200 Subject: [PATCH 03/24] Fix Jekyll warnings --- README.md | 3 ++- _config.yml | 1 + _pages/publications.md | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 795cae5b..a2b3a676 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,9 @@ $ bundle install #### Run development server ```sh -bundle exec jekyll serve +bundle exec jekyll serve --incremental ``` +> The website will be available at [localhost:4000](http://localhost:4000) ### Deploy to production diff --git a/_config.yml b/_config.yml index 33277bb5..78a728bf 100644 --- a/_config.yml +++ b/_config.yml @@ -2,6 +2,7 @@ # Site settings # ----------------------------------------------------------------------------- name: SE-ML +title: SE-ML email: j.m.w.visser@liacs.leidenuniv.nl description: > # this means to ignore newlines until "url:" Webpage for the Software Engineering for Machine Learning diff --git a/_pages/publications.md b/_pages/publications.md index 49916b2c..89045b78 100644 --- a/_pages/publications.md +++ b/_pages/publications.md @@ -7,7 +7,7 @@ permalink: /publications
    - {% assign sorted_pub = (site.pub | sort: 'date') | reverse %} + {% assign sorted_pub = site.pub | sort: 'date' | reverse %} {% for item in sorted_pub %}
  • {{item.title}} From 399600348daed24838f2c388dcde24731096103b Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 18 Sep 2022 19:35:39 +0200 Subject: [PATCH 04/24] Fix off-by-one error in practice counter The value of site.best_practices[0] is null because the first best practices is at site.best_practices[1]. --- _layouts/practice.html | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/_layouts/practice.html b/_layouts/practice.html index fd40bdb5..3bd2c28d 100644 --- a/_layouts/practice.html +++ b/_layouts/practice.html @@ -10,8 +10,7 @@

    {{ page.name }}

    - {% assign sorted_practices = site.best_practices | sort:"index" %} - {% assign sorted_practices = site.best_practices | where_exp: "item", "item.index > 0" %} + {% assign sorted_practices = site.best_practices | where_exp: "item", "item.index > 0" | sort:"index" %} {% if page.index == 1 %} {% assign next_pr_index = page.index | plus:1 %} @@ -52,7 +51,7 @@

    {{ page.name }}

    - {{page.index }} / {{sorted_practices.size}} • + {{ page.index }} / {{ sorted_practices.size | minus:1 }} {{page.category}} @@ -249,7 +248,7 @@

    Read more



    - {{page.index }} / {{sorted_practices.size}} • + {{ page.index }} / {{ sorted_practices.size | minus:1 }} {{page.category}} From 37a96734faae05f999324f4cead1be5d767399cf Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 18 Sep 2022 20:10:20 +0200 Subject: [PATCH 05/24] Add caching best practice --- _best_practices/04-cache.md | 31 +++++++++++++++++++++++++++++++ _references/cmmd.md | 6 ++++++ _references/van2021.md | 6 ++++++ 3 files changed, 43 insertions(+) create mode 100644 _best_practices/04-cache.md create mode 100644 _references/cmmd.md create mode 100644 _references/van2021.md diff --git a/_best_practices/04-cache.md b/_best_practices/04-cache.md new file mode 100644 index 00000000..e492d7d5 --- /dev/null +++ b/_best_practices/04-cache.md @@ -0,0 +1,31 @@ +--- +layout: practice +author: András Schmelczer +name: Cache Production Predictions +title: Cache Production Predictions +category: Deployment +unique_id: deployment_cache +index: 35 +difficulty: "na" +references: [van2021, CMMD] +comments: True +description: +image: # +photocredit: # + +intent: Improve performance, allow more flexibility for the clients, and reduce the deployment's carbon footprint. +motivation: Avoiding the expensive, especially in the case of Deep Learning models, recomputation of results can lead to lower latency, lower costs, and an overall more socially conscious deployment. +applicability: Caching should be implemented in production-level ML applications where repeating input values may occur. +related: [deployment_log] +dependencies: # +survey_question: + +labels: [quality] + +--- + +Sustainability is an increasingly crucial concern of ethical AI, and avoiding wasting computing resources is a part of it. To this end, caching the results of expensive operations has to be considered in any ML deployment. + +Caching is a well-known technique for improving the latency of repeated responses. By using it, we can avoid recomputing results that we have already calculated. However, extra care has to be taken in order not to expose private data to third parties; therefore, access control must be thoroughly considered. + +If the ML service's clients can rely on virtually instant responses to repeated queries, that can open up opportunities for different, new access patterns. This freedom can result in a developer friendlier API and better developer experience. diff --git a/_references/cmmd.md b/_references/cmmd.md new file mode 100644 index 00000000..a89119af --- /dev/null +++ b/_references/cmmd.md @@ -0,0 +1,6 @@ +--- +title: 'Caching for ML Model Deployments' +acronym: CMMD +type: GL - Tier 3 +webpage: 'https://www.tekhnoal.com/caching-for-ml-models.html' +--- \ No newline at end of file diff --git a/_references/van2021.md b/_references/van2021.md new file mode 100644 index 00000000..ca182d6e --- /dev/null +++ b/_references/van2021.md @@ -0,0 +1,6 @@ +--- +title: 'Sustainable AI: AI for sustainability and the sustainability of AI' +acronym: van2021sustainable +type: AL +webpage: 'https://link.springer.com/article/10.1007/s43681-021-00043-6' +--- \ No newline at end of file From 16145ff549ff12e2aa3401ce8d0055d67c62f25d Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 18 Sep 2022 20:11:15 +0200 Subject: [PATCH 06/24] Mention great-ai as a source for some of the practices --- practices/index.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/practices/index.html b/practices/index.html index a3b072e8..186b0caa 100644 --- a/practices/index.html +++ b/practices/index.html @@ -14,9 +14,9 @@

    Engineering best practices for Machine Learning

    The list below gathers a set of engineering best practices for developing software systems with machine learning (ML) components.

    -These practices were identified by engaging with ML engineering teams and -reviewing relevant
    academic and - grey literature. +These practices were identified by engaging with ML engineering teams, +reviewing relevant academic and + grey literature, and through conducting case studies on a production ML pipeline. We are continuously running a global survey among ML engineering teams to measure the adoption of these practices.

    From 114c51e49c119ef50f3995423b6667f87bcb93e5 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 18 Sep 2022 20:32:44 +0200 Subject: [PATCH 07/24] Fix syntax error stopping Disqus from loading --- _layouts/post.html | 6 +----- _layouts/practice.html | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/_layouts/post.html b/_layouts/post.html index c203111e..01a50b6c 100644 --- a/_layouts/post.html +++ b/_layouts/post.html @@ -27,11 +27,7 @@

    {{ page.title }}