diff --git a/.eslintignore b/.eslintignore
new file mode 100644
index 000000000..68abfffe0
--- /dev/null
+++ b/.eslintignore
@@ -0,0 +1,6 @@
+**/node_modules/**
+**/deps/**
+**/build/**
+**/3rd_party/**
+extension/
+modules/lo_dash_react_components/lo_dash_react_components/
diff --git a/.eslintrc.json b/.eslintrc.json
new file mode 100644
index 000000000..ae54b838c
--- /dev/null
+++ b/.eslintrc.json
@@ -0,0 +1,9 @@
+{
+ "extends": "standard",
+ "rules": {
+ "semi": ["error", "always"]
+ },
+ "env": {
+ "jasmine": true
+ }
+}
\ No newline at end of file
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 000000000..f8cfe2162
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,35 @@
+name: Lint
+
+on: [push]
+
+jobs:
+ lint-python:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+
+ - name: Install Make
+ run: sudo apt-get install make
+
+ - name: Lint files
+ run: make linting-python
+ lint-node:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v3
+ - name: Set up Node
+ uses: actions/setup-node@v4
+ with:
+ node-version: '22.x'
+
+ - name: Install Make
+ run: sudo apt-get install make
+
+ - name: Lint files
+ run: make linting-node
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 000000000..490d33e0e
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,51 @@
+name: Test packages
+
+on: [push]
+
+jobs:
+ test-packages:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ package: ['learning_observer/', 'modules/writing_observer/']
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+
+ - name: Install Make
+ run: sudo apt-get install make
+
+ - name: Get list of changed files
+ id: changes
+ run: |
+ git fetch origin master
+ git diff --name-only origin/master HEAD > changed_files.txt
+
+ - name: Check if package has changes
+ id: package_check
+ run: |
+ if grep -qE "^${{ matrix.package }}" changed_files.txt; then
+ echo "run_tests=true" >> $GITHUB_ENV
+ else
+ echo "run_tests=false" >> $GITHUB_ENV
+ fi
+
+ - name: Skip tests if no changes
+ if: env.run_tests == 'false'
+ run: echo "Skipping tests for ${{ matrix.package }} as there are no changes."
+
+ - name: Install the base Learning Observer
+ if: env.run_tests == 'true'
+ run: make install
+
+ - name: Install the package with pip
+ if: env.run_tests == 'true'
+ run: pip install -e ${{ matrix.package }}
+
+ - name: Run tests
+ if: env.run_tests == 'true'
+ run: make test PKG=${{ matrix.package }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..65002d617
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,42 @@
+*~
+\#*
+.\#*
+*__pycache__*
+webapp/logs
+webapp/static_data/teachers.yaml
+creds.yaml
+CREDS.YAML
+uncommitted
+extension.crx
+extension.pem
+extension.zip
+*egg-info*
+public_key
+*/dist
+learning_observer/learning_observer/static_data/teachers.yaml
+learning_observer/learning_observer/logs/
+learning_observer/learning_observer/static/3rd_party/
+learning_observer/learning_observer/static_data/course_lists/
+learning_observer/learning_observer/static_data/course_rosters/
+learning_observer/learning_observer/static_data/repos/
+learning_observer/learning_observer/static_data/dash_assets/
+learning_observer/learning_observer/static_data/courses.json
+learning_observer/learning_observer/static_data/students.json
+learning_observer/passwd*
+--*
+.venv/
+.vscode/
+build/
+dist/
+node_modules
+*.orig
+lo_event*tgz
+*.log
+LanguageTool-stable.zip
+LanguageTool-5.4
+package-lock.json
+learning_observer/learning_observer/static_data/google/
+learning_observer/learning_observer/static_data/admins.yaml
+.ipynb_checkpoints/
+.eggs/
+.next/
\ No newline at end of file
diff --git a/.nvmrc b/.nvmrc
new file mode 100644
index 000000000..b6a7d89c6
--- /dev/null
+++ b/.nvmrc
@@ -0,0 +1 @@
+16
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 000000000..de6952502
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,29 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+ os: ubuntu-24.04
+ tools:
+ python: "3.11"
+ # You can also specify other tool versions:
+ # nodejs: "19"
+ # rust: "1.64"
+ # golang: "1.19"
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+ configuration: autodocs/conf.py
+
+# If using Sphinx, optionally build your docs in additional formats such as PDF
+# formats:
+# - pdf
+
+# Optionally declare the Python requirements required to build your docs
+python:
+ install:
+ - requirements: autodocs/requirements.txt
diff --git a/.stylelintrc.json b/.stylelintrc.json
new file mode 100644
index 000000000..7a9e48b5a
--- /dev/null
+++ b/.stylelintrc.json
@@ -0,0 +1,18 @@
+{
+ "ignoreFiles": [
+ "./**/node_modules/**",
+ "./**/deps/**",
+ "./**/build/**",
+ "./**/3rd_party/**",
+ "./extension/**"
+ ],
+ "extends": "stylelint-config-standard",
+ "customSyntax": "postcss-scss",
+ "plugins": [
+ "stylelint-scss"
+ ],
+ "rules": {
+ "at-rule-no-unknown": null,
+ "scss/at-rule-no-unknown": true
+ }
+}
\ No newline at end of file
diff --git a/CONTRIBUTORS.TXT b/CONTRIBUTORS.TXT
new file mode 100644
index 000000000..ed669f735
--- /dev/null
+++ b/CONTRIBUTORS.TXT
@@ -0,0 +1,4 @@
+Piotr Mitros
+Oren Livne
+Paul Deane
+Bradley Erickson
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 000000000..61cdb7d65
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.10
+RUN git config --global --add safe.directory /app
+WORKDIR /app
+
+# TODO start redis in here
+# see about docker loopback
+RUN apt-get update && \
+ apt-get install -y python3-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+COPY . /app
+
+RUN make install
+CMD ["make", "run"]
diff --git a/LICENSE.TXT b/LICENSE.TXT
new file mode 100644
index 000000000..be3f7b28e
--- /dev/null
+++ b/LICENSE.TXT
@@ -0,0 +1,661 @@
+ GNU AFFERO GENERAL PUBLIC LICENSE
+ Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+ A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate. Many developers of free software are heartened and
+encouraged by the resulting cooperation. However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+ The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community. It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server. Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+ An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals. This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU Affero General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Remote Network Interaction; Use with the GNU General Public License.
+
+ Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software. This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time. Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source. For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code. There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+.
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..a45c90b55
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,110 @@
+# TODO rename these packages to something else
+PACKAGES ?= wo
+
+help:
+ @echo "Available commands:"
+ @echo ""
+ @echo " run Run the learning_observer Python application."
+ @echo " install-pre-commit-hook Install the pre-commit git hook."
+ @echo " install Install the learning_observer package in development mode."
+ @echo " install-dev Install dev dependencies (requires additional setup)."
+ @echo " install-packages Install specific packages: [${PACKAGES}]."
+ @echo " test Run tests for the specified package (PKG=)."
+ @echo " linting-setup Setup linting tools and dependencies."
+ @echo " linting-python Lint Python files using pycodestyle and pylint."
+ @echo " linting-node Lint Node files (JS, CSS, and unused CSS detection)."
+ @echo " linting Perform all linting tasks (Python and Node)."
+ @echo " build-wo-chrome-extension Build the writing-process extension."
+ @echo " build-python-distribution Build a distribution for the specified package (PKG=)."
+ @echo ""
+ @echo "Note: All commands are executed in the current shell environment."
+ @echo " Ensure your virtual environment is activated if desired, as installs and actions"
+ @echo " will occur in the environment where the 'make' command is run."
+ @echo ""
+ @echo "Use 'make ' to execute a command. For example: make run"
+
+run:
+ # If you haven't done so yet, run: make install
+ # we need to make sure we are on the virtual env when we do this
+ cd learning_observer && python learning_observer
+
+# Install commands
+install-pre-commit-hook:
+ # Adding pre-commit.sh to Git hooks
+ cp scripts/hooks/pre-commit.sh .git/hooks/pre-commit
+ chmod +x .git/hooks/pre-commit
+
+install: install-pre-commit-hook
+ # The following only works with specified packages
+ # we need to install learning_observer in dev mode to
+ # more easily pass in specific files we need, such as creds
+ pip install --no-cache-dir -e learning_observer/
+
+ # Installing Learning Oberser (LO) Dash React Components
+ # TODO properly fetch the current version of lodrc.
+ # We have a symbolic link between `lodrc-current` and the most
+ # recent version. We would like to directly fetch `lodrc-current`,
+ # however, the fetch only returns the name of the file it's
+ # linked to. We do an additional fetch for the linked file.
+ @LODRC_CURRENT=$$(curl -s https://raw.githubusercontent.com/ETS-Next-Gen/lo_assets/main/lo_dash_react_components/lo_dash_react_components-current.tar.gz); \
+ pip install https://raw.githubusercontent.com/ETS-Next-Gen/lo_assets/main/lo_dash_react_components/$${LODRC_CURRENT}
+
+install-dev:
+ # TODO create a dev requirements file
+ pip install --no-cache-dir -e learning_observer/[${PACKAGES}]
+ . ${HOME}/.nvm/nvm.sh && nvm use && pip install -v -e modules/lo_dash_react_components/
+
+install-packages:
+ pip install -e learning_observer/[${PACKAGES}]
+
+# Testing commands
+test:
+ @if [ -z "$(PKG)" ]; then echo "No module specified, please try again with \"make test PKG=path/to/module\""; exit 1; fi
+ ./test.sh $(PKG)
+
+# Linting commands
+linting-python:
+ # Linting Python modules
+ pip install pycodestyle pylint
+ pycodestyle --ignore=E501,W503 $$(git ls-files 'learning_observer/*.py' 'modules/*.py')
+ pylint -d W0613,W0511,C0301,R0913,too-few-public-methods $$(git ls-files 'learning_observer/*.py' 'modules/*.py')
+
+linting-node:
+ npm install
+ # TODO each of these have lots of errors and block
+ # the next item from running
+ # Starting to lint Node modules
+ # Linting Javascript
+ npm run lint:js
+ # Linting CSS
+ npm run lint:css
+ # Finding any unused CSS files
+ npm run find-unused-css
+
+linting: linting-setup linting-python linting-node
+ # Finished linting
+
+# Build commands
+build-wo-chrome-extension:
+ # Installing LO Event
+ cd modules/lo_event && npm install & npm link lo_event
+ # Building extension
+ cd extension/writing-process && npm install && npm run build
+
+build-python-distribution:
+ # Building distribution for package
+ pip install build
+ # Switching to package directory
+ cd $(PKG) && python -m build
+
+# TODO we may want to have a separate command for uploading to testpypi
+upload-python-package-to-pypi: build-python-distribution
+ pip install twine
+ # TODO we currently only upload to testpypi
+ # TODO we need to include `TWINE_USERNAME=__token__`
+ # and `TWINE_PASSWORD={ourTwineToken}` to authenticate
+ #
+ # TODO We have not fully tested the following commands.
+ # Try out the following steps and fix any bugs so the
+ # Makefile can do it automatically.
+ # cd $(PKG) && twine upload -r testpypi dist/*
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..8c56ebba8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,109 @@
+# Writing Observer and Learning Observer
+
+
+
+This repository is part of a project to provide an open source
+learning analytics dashboard to help instructors be able to manage
+student learning processes, and in particular, student writing
+processes.
+
+
+
+## Learning Observer
+
+_Learning Observer_ is designed as an open source, open science learning
+process data dashboarding framework. You write reducers to handle
+per-student writing data, and aggegators to make dashboards. We've
+tested this in math and writing, but our focus is on writing process
+data.
+
+At a high level, Learning Observer functions as an application platform.
+The primary `learning_observer` module bootstraps the system: it loads
+configuration, connects to storage and messaging back ends, and brokers
+communication with data sources. Other modules plug into that
+infrastructure to define the specific reducers, dashboards, and other
+items that users interact with, letting teams experiment with new
+features without having to reimplement the platform core.
+
+It's not finished, but it's moving along quickly.
+
+## Writing Observer
+
+_Writing Observer_ is a plug-in for Google Docs which visualizes writing
+data to teachers. Our immediate goal was to provide a dashboard which
+gives rapid, actionable insights to educators supporting remote
+learning during this pandemic. We're working to expand this to support
+a broad range of write-to-learn and collaborative learning techniques.
+
+## Status
+
+There isn't much to see here for external collaborators yet. This
+repository has a series of prototypes to confirm we can:
+
+* collect the data we want;
+* extract what we need from it; and
+* route it to where we want it to go (there's _a lot_ of data, with
+ complex dependencies, so this is actually a nontrivial problem)
+
+Which mitigates most of the technical risk. We also now integrate with
+Google Classroom. We also have prototype APIs for making dashboards, and
+a few prototype dashboards.
+
+For this to be useful, we'll need to provide some basic documentation
+for developers to be able to navigate this repo (in particular,
+explaining _why_ this approach works).
+
+This system is designed to be _massively_ scalable, but it is not
+currently implemented to be so (mostly for trivial reasons;
+e.g. scaffolding code which uses static files as a storage model). It
+will take work to flush out all of these performance issues, but we'd
+like to do that work once we better understand what we're doing and
+that the core approach and APIs are correct.
+
+## Getting Started
+
+We have a short guide to [installing the system](docs/tutorials/install.md).
+Getting the base system working is pretty easy. To create a new module
+for the system to use, check out our [cookiecutter module guide](docs/tutorials/cookiecutter-module.md).
+
+### System requirements
+
+It depends on what you're planning to use the system for.
+
+The core _Learning Observer_ system works fine on an AWS nano
+instance, and that's how we do most of our testing and small-scale
+pilots. These instances have 512MB of RAM, and minimal CPU. It's
+important that this configuration remains usable.
+
+For deployment and more sophisticated uses (e.g. NLP) in larger
+numbers of classrooms, we need **heavy** metal. As we're playing with
+algorithms, deep learning is turning out to work surprisingly well,
+and at the same time, requires surprisingly large amounts of computing
+power. A GPGPU with plenty of RAM is helpful if you want to work with
+more sophisticated algorithms, and is likely to be a requirement for
+many types of uses.
+
+All _Learning Observer_ development has been on Linux-based platforms
+(including Ubuntu and RHEL). There are folks outside of the core team
+who have tried to run it on Mac or on WSL, with some success.
+
+Running on RHEL typically uses the following services:
+
+* redis
+* nginx
+
+#### bcrypt
+
+A note on bcrypt. The code uses bcrypt for internal password
+management. There is a mess of incompatible versions. Be careful if
+installing any way other than the official install to get the right
+one.
+
+## Contributing or learning more
+
+We're still a small team, and the easiest way is to shoot us a quick
+email. We'll gladly walk you through anything you're interested in.
+
+Contact/core maintainer: Piotr Mitros
+
+Licensing: Open source / free software. License: AGPL.
diff --git a/VERSION b/VERSION
new file mode 100644
index 000000000..82977005b
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.1.0+2026.02.03T15.21.57.253Z.1310c89e.berickson.20260130.execution.dag.single.value
diff --git a/autodocs/.gitignore b/autodocs/.gitignore
new file mode 100644
index 000000000..90045b728
--- /dev/null
+++ b/autodocs/.gitignore
@@ -0,0 +1,4 @@
+_build/
+generated/
+apidocs/
+module_readmes/
diff --git a/autodocs/Makefile b/autodocs/Makefile
new file mode 100644
index 000000000..d4bb2cbb9
--- /dev/null
+++ b/autodocs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/autodocs/api.rst b/autodocs/api.rst
new file mode 100644
index 000000000..2d744d115
--- /dev/null
+++ b/autodocs/api.rst
@@ -0,0 +1,7 @@
+API
+===
+
+.. toctree::
+ :maxdepth: 4
+
+ apidocs/index
diff --git a/autodocs/concepts.rst b/autodocs/concepts.rst
new file mode 100644
index 000000000..8c0002830
--- /dev/null
+++ b/autodocs/concepts.rst
@@ -0,0 +1,49 @@
+Concepts
+=============
+
+Explanations of key ideas, principles, and background knowledge.
+Follow this recommended sequence to build context before diving into
+implementation details:
+
+- :doc:`History ` - establishes the background and
+ problem space the project is addressing.
+- :doc:`System Design ` - explains how the product
+ strategy and user needs translate into an overall system approach.
+- :doc:`Architecture ` - outlines the concrete
+ architecture that implements the system design.
+- :doc:`Technologies ` - surveys the primary tools
+ and platforms we rely on to realize the architecture.
+- :doc:`System Settings ` - describes how the
+ system loads global and cascading settings.
+- :doc:`Events ` - introduces the event model that drives
+ data flowing through the system.
+- :doc:`Reducers ` - details how incoming events are
+ aggregated into the state our experiences depend on.
+- :doc:`Communication Protocol ` - discusses how
+ the system queries data from reducers for dashboards.
+- :doc:`Student Identity Mapping ` - explain
+ how learners information is mapped across integrations.
+- :doc:`Scaling ` - covers strategies for growing the
+ system once the fundamentals are in place.
+- :doc:`Auth ` - describes authentication considerations
+ that secure access to the system.
+- :doc:`Privacy ` - documents how we protect learner data
+ and comply with privacy expectations.
+
+.. toctree::
+ :hidden:
+ :maxdepth: 1
+ :titlesonly:
+
+ docs/concepts/history
+ docs/concepts/system_design
+ docs/concepts/architecture
+ docs/concepts/technologies
+ docs/concepts/system_settings
+ docs/concepts/events
+ docs/concepts/reducers
+ docs/concepts/communication_protocol
+ docs/concepts/student_identity_mapping
+ docs/concepts/scaling
+ docs/concepts/auth
+ docs/concepts/privacy
diff --git a/autodocs/conf.py b/autodocs/conf.py
new file mode 100644
index 000000000..3af5b92ef
--- /dev/null
+++ b/autodocs/conf.py
@@ -0,0 +1,190 @@
+import os
+import pathlib
+import re
+import shutil
+import sphinx.util
+import sys
+import unicodedata
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'Learning Observer'
+copyright = '2020-2025, Bradley Erickson'
+author = 'Bradley Erickson'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+sys.path.insert(0, os.path.abspath('../'))
+
+extensions = [
+ 'autodoc2',
+ 'myst_parser',
+]
+
+autodoc2_packages = [
+ '../learning_observer/learning_observer',
+ '../modules/writing_observer/writing_observer'
+]
+
+autodoc2_output_dir = 'apidocs'
+autodoc2_member_order = 'bysource'
+
+source_suffix = {
+ '.rst': 'restructuredtext',
+ '.md': 'markdown',
+ '.txt': 'markdown'
+}
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = 'alabaster'
+html_static_path = ['_static']
+
+LOGGER = sphinx.util.logging.getLogger(__name__)
+
+
+_MARKDOWN_IMAGE_PATTERN = re.compile(r'!\[[^\]]*\]\(([^)]+)\)')
+_RST_IMAGE_PATTERNS = [
+ re.compile(r'\.\.\s+image::\s+([^\s]+)'),
+ re.compile(r'\.\.\s+figure::\s+([^\s]+)'),
+]
+
+
+def _extract_local_assets(text):
+ """Return relative asset paths referenced in the provided README text."""
+
+ asset_paths = set()
+ for match in _MARKDOWN_IMAGE_PATTERN.findall(text):
+ asset_paths.add(match)
+ for pattern in _RST_IMAGE_PATTERNS:
+ asset_paths.update(pattern.findall(text))
+
+ filtered_assets = set()
+ for raw_path in asset_paths:
+ candidate = raw_path.strip()
+ if not candidate:
+ continue
+ # Remove optional titles ("path "optional title"") and URL fragments
+ candidate = candidate.split()[0]
+ candidate = candidate.split('#', maxsplit=1)[0]
+ candidate = candidate.split('?', maxsplit=1)[0]
+
+ if candidate.startswith(('http://', 'https://', 'data:')):
+ continue
+ if candidate.startswith('#'):
+ continue
+
+ filtered_assets.add(candidate)
+
+ return sorted(filtered_assets)
+
+
+def _copy_module_assets(readme_path, destination_dir):
+ """Copy image assets referenced by ``readme_path`` into ``destination_dir``."""
+
+ module_dir = readme_path.parent.resolve()
+ readme_text = readme_path.read_text(encoding='utf-8')
+ asset_paths = _extract_local_assets(readme_text)
+ for asset in asset_paths:
+ relative_posix_path = pathlib.PurePosixPath(asset)
+ if relative_posix_path.is_absolute():
+ LOGGER.warning(
+ "Skipping absolute image path %s referenced in %s", asset, readme_path
+ )
+ continue
+
+ normalized_relative_path = pathlib.Path(*relative_posix_path.parts)
+ source_path = (module_dir / normalized_relative_path).resolve(strict=False)
+
+ try:
+ source_path.relative_to(module_dir)
+ except ValueError:
+ LOGGER.warning(
+ "Skipping image outside module directory: %s referenced in %s",
+ asset,
+ readme_path,
+ )
+ continue
+
+ if not source_path.exists():
+ LOGGER.warning(
+ "Referenced image %s in %s was not found", asset, readme_path
+ )
+ continue
+
+ destination_path = destination_dir / normalized_relative_path
+ destination_path.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(source_path, destination_path)
+
+
+def _extract_readme_title(readme_path: pathlib.Path) -> str:
+ """Return the first Markdown heading in ``readme_path``.
+
+ Defaults to the parent directory name if no heading can be found.
+ """
+
+ try:
+ for line in readme_path.read_text(encoding='utf-8').splitlines():
+ stripped = line.strip()
+ if stripped.startswith('#'):
+ title = stripped.lstrip('#').strip()
+ if title:
+ return title
+ except OSError as exc: # pragma: no cover - filesystem error propagation
+ LOGGER.warning("unable to read %s: %s", readme_path, exc)
+
+ return readme_path.parent.name
+
+
+def _slugify(text: str) -> str:
+ """Convert ``text`` to a lowercase filename-safe slug."""
+
+ normalized = unicodedata.normalize('NFKD', text)
+ without_diacritics = ''.join(ch for ch in normalized if not unicodedata.combining(ch))
+ slug = re.sub(r'[^a-z0-9]+', '-', without_diacritics.casefold()).strip('-')
+ return slug or 'module'
+
+
+def _copy_module_readmes(app):
+ """Populate ``module_readmes`` with module README files and assets."""
+
+ docs_root = pathlib.Path(__file__).parent.resolve()
+ modules_root = docs_root.parent / 'modules'
+ destination_root = docs_root / 'module_readmes'
+
+ if not modules_root.exists():
+ LOGGER.warning("modules directory %s was not found", modules_root)
+ return
+
+ if destination_root.exists():
+ shutil.rmtree(destination_root)
+ destination_root.mkdir(parents=True, exist_ok=True)
+
+ readme_info = []
+ for readme_path in modules_root.glob('*/README.md'):
+ title = _extract_readme_title(readme_path)
+ readme_info.append((title, readme_path))
+
+ readme_info.sort(key=lambda item: item[0].casefold())
+
+ for title, readme_path in readme_info:
+ module_name = readme_path.parent.name
+ slug = _slugify(title)
+ module_destination = destination_root / f'{slug}--{module_name}'
+ module_destination.mkdir(parents=True, exist_ok=True)
+ destination_path = module_destination / "README.md"
+ shutil.copy2(readme_path, destination_path)
+ _copy_module_assets(readme_path, module_destination)
+
+
+def setup(app):
+ app.connect('builder-inited', _copy_module_readmes)
diff --git a/autodocs/docs b/autodocs/docs
new file mode 120000
index 000000000..6246dffc3
--- /dev/null
+++ b/autodocs/docs
@@ -0,0 +1 @@
+../docs/
\ No newline at end of file
diff --git a/autodocs/how-to.rst b/autodocs/how-to.rst
new file mode 100644
index 000000000..9b97f0992
--- /dev/null
+++ b/autodocs/how-to.rst
@@ -0,0 +1,31 @@
+How-to
+=============
+
+Practical instructions for achieving specific goals within Learning Observer. Use these guides when you know what outcome you need and want a proven recipe to follow:
+
+- :doc:`Communication Protocol ` - How to query data from reducers or system endpoints for dashboards.
+- :doc:`Build Dashboards ` - Walk through creating dashboards from reducer outputs, including layout choices and data wiring.
+- :doc:`Offline Reducer Replay ` - Explain how to repopulate reducer content with study logs.
+- :doc:`Serve as LTI application` - Cover how to install Learning Observer as an LTI application.
+- :doc:`Connect LO Blocks to Canvas via Learning Observer` - Show how to connect launch LO Blocks through Learning Observer from within Canvas.
+- :doc:`Configure Multiple Roster Sources` - Allow the system to dynamically choose a roster source given a user's context.
+- :doc:`Run with Docker ` - Learn how to containerize the stack, manage images, and operate the project using Docker Compose.
+- :doc:`Writing Observer Extension ` - Install, configure, and validate the Writing Observer browser extension for capturing events.
+- :doc:`Interactive Environments ` - Connect Learning Observer to Jupyter and other live coding setups for iterative development.
+- :doc:`Impersonate Users ` - Start and stop acting as another user while keeping dashboards informed.
+
+.. toctree::
+ :hidden:
+ :maxdepth: 1
+ :titlesonly:
+
+ docs/how-to/communication_protocol.md
+ docs/how-to/dashboards.md
+ docs/how-to/offline_replay.md
+ docs/how-to/lti.md
+ docs/how-to/connect_lo_blocks_to_canvas.md
+ docs/how-to/multiple_roster_sources.md
+ docs/how-to/docker.md
+ docs/how-to/extension.md
+ docs/how-to/interactive_environments.md
+ docs/how-to/impersonation.md
diff --git a/autodocs/index.rst b/autodocs/index.rst
new file mode 100644
index 000000000..10bedee6f
--- /dev/null
+++ b/autodocs/index.rst
@@ -0,0 +1,42 @@
+.. Learning Observer documentation master file, created by
+ sphinx-quickstart on Mon May 1 13:11:55 2023.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Learning Observer
+=================
+
+Learning Observer is designed as an open source, open science learning
+process data dashboarding framework. You write reducers to handle
+per-student writing data, and aggegators to make dashboards. We've
+tested this in math and writing, but our focus is on writing process
+data.
+
+At a high level, Learning Observer operates as an application platform:
+the core :mod:`learning_observer` package boots the system, loads
+configured modules, and manages shared data services, while each module
+provides the specific dashboards, reducers, and other artifacts that
+users interact with.
+
+Our documentation is organized into four main categories, each serving a different purpose. You can explore them below:
+
+- :doc:`Tutorials ` - Step-by-step guides to help you learn by doing.
+- :doc:`Concepts ` - Explanations of key ideas and background knowledge.
+- :doc:`How-To ` - Practical instructions to solve specific goals.
+- :doc:`Reference ` - Detailed API/configuration information.
+
+.. toctree::
+ :hidden:
+ :maxdepth: 3
+
+ tutorials
+ concepts
+ how-to
+ reference
+
+Additional Information
+----------------------
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/autodocs/make.bat b/autodocs/make.bat
new file mode 100644
index 000000000..32bb24529
--- /dev/null
+++ b/autodocs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.https://www.sphinx-doc.org/
+ exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/autodocs/modules.rst b/autodocs/modules.rst
new file mode 100644
index 000000000..a8395ea34
--- /dev/null
+++ b/autodocs/modules.rst
@@ -0,0 +1,9 @@
+Modules
+-----------
+The module READMEs are collected automatically during the Sphinx build.
+
+.. toctree::
+ :maxdepth: 1
+ :glob:
+
+ module_readmes/*/README
diff --git a/autodocs/reference.rst b/autodocs/reference.rst
new file mode 100644
index 000000000..ba358f17b
--- /dev/null
+++ b/autodocs/reference.rst
@@ -0,0 +1,27 @@
+Reference
+=============
+
+Detailed, structured information about APIs, configurations, and technical details. Consult these resources when you need definitive answers about how the system behaves or how to integrate with it:
+
+- :doc:`Code Quality Standards ` - Understand our expectations for readability, style, and continuous improvement.
+- :doc:`System Settings ` - Review what each system setting does and where it is used.
+- :doc:`Documentation Conventions ` - Learn how we structure docs, what tools we use, and how to contribute updates.
+- :doc:`Linting Rules ` - Review the automated checks that keep the codebase healthy and how to run them locally.
+- :doc:`Testing Strategy ` - Explore the testing layers we rely on and guidelines for writing reliable tests.
+- :doc:`Versioning and Releases ` - See how we tag releases, manage dependencies, and maintain backward compatibility.
+- :doc:`Module Reference ` - Dive into the autogenerated API reference for Python modules within Learning Observer.
+- :doc:`API Reference ` - Inspect the internal functionality of the system.
+
+.. toctree::
+ :hidden:
+ :maxdepth: 1
+ :titlesonly:
+
+ docs/reference/code_quality.md
+ docs/reference/system_settings.md
+ docs/reference/documentation.md
+ docs/reference/linting.md
+ docs/reference/testing.md
+ docs/reference/versioning.md
+ modules
+ api
diff --git a/autodocs/requirements.txt b/autodocs/requirements.txt
new file mode 100644
index 000000000..719fea2af
--- /dev/null
+++ b/autodocs/requirements.txt
@@ -0,0 +1,3 @@
+myst_parser
+sphinx
+sphinx-autodoc2
diff --git a/autodocs/tutorials.rst b/autodocs/tutorials.rst
new file mode 100644
index 000000000..0d6771b53
--- /dev/null
+++ b/autodocs/tutorials.rst
@@ -0,0 +1,15 @@
+Tutorials
+=============
+
+Step-by-step guides that teach by doing. Follow these tutorials to get hands-on experience with core workflows:
+
+- :doc:`Install Learning Observer ` - Set up the development environment, install dependencies, and verify your deployment.
+- :doc:`Create a Module with Cookiecutter ` - Generate a new module scaffold, customize it, and understand the key files produced by the template.
+
+.. toctree::
+ :hidden:
+ :maxdepth: 1
+ :titlesonly:
+
+ docs/tutorials/install.md
+ docs/tutorials/cookiecutter-module.md
diff --git a/devops/README.md b/devops/README.md
new file mode 100644
index 000000000..352ae056f
--- /dev/null
+++ b/devops/README.md
@@ -0,0 +1,13 @@
+Dev-ops scripts
+===============
+
+This contains machinery for spinning up, shutting down, and managing
+Learning Observer servers. It's usable, but very much not done yet. We
+can spin up, spin down, and list machines, but this ought to be more
+fault-tolerant, better logged, less hard-coded, etc.
+
+We would like to be cross-platform, and evenually support both
+Debian-based distros and RPM-based distros. We're not there yet
+either. We'd also like to support multiple cloud providers. We're not
+there yet either. However, we probably won't accept PRs which move us
+away from this goal.
\ No newline at end of file
diff --git a/devops/ansible/files/default b/devops/ansible/files/default
new file mode 100644
index 000000000..4c633a57b
--- /dev/null
+++ b/devops/ansible/files/default
@@ -0,0 +1,74 @@
+server {
+ # We listen for HTTP on port 80. This is helpful for debugging
+ listen 80 default_server;
+ listen [::]:80 default_server;
+
+ # We listen for HTTPS on port 443 too. This is managed when we set up certbot.
+
+ # Set this up when installing:
+ server_name {SERVER_NAME};
+
+ # We're mostly not using static web files right now, but it's good to have these around.
+ root /var/www/html;
+ index index.html index.htm index.nginx-debian.html;
+
+ # We will eventually want to split our (non-CORS) data intake and our (CORS) dashboards
+ location / {
+ # First attempt to serve request as file, then
+ # as directory, then fall back to displaying a 404.
+ add_header "Access-Control-Allow-Origin" *;
+ add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD";
+ add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept";
+
+ try_files $uri $uri/ =404;
+ }
+
+ location /app/ {
+ # For now, this is for debugging and development. We'd like to be able to launch arbitrary
+ # web apps. In the longer-term, it's likely the whole system might move here (and who knows
+ # if this comment will update).
+ #
+ # Note we don't add CORS headers for now, but we eventually will need to. We'll need to sort
+ # through where we add them, though.
+ proxy_pass http://localhost:8080/;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ #rewrite ^/app/(.*)$ $1 last;
+ if ($request_method = OPTIONS ) {
+ return 200;
+ }
+ }
+
+ # This is our HTTP API
+ # Note that we disable CORS. We may want to have a version with and without CORS
+ location /webapi/ {
+ proxy_pass http://localhost:8888/webapi/;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ if ($request_method = OPTIONS ) {
+ add_header "Access-Control-Allow-Origin" *;
+ add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD";
+ add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept";
+ return 200;
+ }
+ }
+
+ # And our websockets API
+ # We are migrating our streaming analytics to web sockets.
+ location /wsapi/ {
+ proxy_pass http://localhost:8888/wsapi/;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+ proxy_read_timeout 86400;
+
+ if ($request_method = OPTIONS ) {
+ add_header "Access-Control-Allow-Origin" *;
+ add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD";
+ add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept";
+ return 200;
+ }
+ }
+}
diff --git a/devops/ansible/files/nginx-locations b/devops/ansible/files/nginx-locations
new file mode 100644
index 000000000..3bb4a0a75
--- /dev/null
+++ b/devops/ansible/files/nginx-locations
@@ -0,0 +1,30 @@
+
+ location /webapi/ {
+ proxy_pass http://localhost:8888/webapi/;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ if ($request_method = OPTIONS ) {
+ add_header "Access-Control-Allow-Origin" *;
+ add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD";
+ add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept";
+ return 200;
+ }
+ }
+
+ location /wsapi/ {
+ proxy_pass http://localhost:8888/wsapi/;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+ proxy_read_timeout 86400;
+
+ if ($request_method = OPTIONS ) {
+ add_header "Access-Control-Allow-Origin" *;
+ add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD";
+ add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept";
+ return 200;
+ }
+ }
+
diff --git a/devops/ansible/local.yaml b/devops/ansible/local.yaml
new file mode 100644
index 000000000..3d502ca93
--- /dev/null
+++ b/devops/ansible/local.yaml
@@ -0,0 +1,6 @@
+- name: Provision writing analysis server
+ hosts: localhost
+ connection: local
+ tasks:
+ - include: tasks/writing-apt.yaml
+
diff --git a/devops/ansible/scripts/add_nginx_locations.py b/devops/ansible/scripts/add_nginx_locations.py
new file mode 100644
index 000000000..f695f9396
--- /dev/null
+++ b/devops/ansible/scripts/add_nginx_locations.py
@@ -0,0 +1,45 @@
+"""
+This script adds the locations for our web API to nginx. It adds
+them after the default location.
+"""
+
+import sys
+import shutil
+import datetime
+
+lines = open("/etc/nginx/sites-enabled/default", "r").readlines()
+
+# If we've already added these, do nothing.
+for line in lines:
+ if "webapi" in line:
+ print("Already configured!")
+ sys.exit(-1)
+
+# We will accumulate the new file into this variable
+output = ""
+
+# We step through the file until we find the first 'location' line, and
+# keep cycling until we find a single "}" ending that section.
+#
+# At that point, we add the new set of location
+
+location_found = False
+added = False
+for line in lines:
+ output += line
+ if line.strip().startswith("location"):
+ print("Found")
+ location_found = True
+ if location_found and line.strip() == "}" and not added:
+ output += open("../files/nginx-locations").read()
+ added = True
+
+
+backup_file = "/etc/nginx/sites-enabled-default-" + \
+ datetime.datetime.utcnow().isoformat()
+shutil.move("/etc/nginx/sites-enabled/default", backup_file)
+
+with open("/etc/nginx/sites-enabled/default", "w") as fp:
+ fp.write(output)
+
+print(output)
diff --git a/devops/ansible/scripts/rhel b/devops/ansible/scripts/rhel
new file mode 100644
index 000000000..130a66c0f
--- /dev/null
+++ b/devops/ansible/scripts/rhel
@@ -0,0 +1 @@
+yum install ansible emacs nginx redis curl git links lynx screen whois nginx postgresql
diff --git a/devops/ansible/tasks/writing-apt.yaml b/devops/ansible/tasks/writing-apt.yaml
new file mode 100644
index 000000000..e00d6b134
--- /dev/null
+++ b/devops/ansible/tasks/writing-apt.yaml
@@ -0,0 +1,57 @@
+- apt: upgrade=dist update_cache=yes
+
+- name: Basic utils
+ apt: name={{ item }}
+ with_items:
+ - curl
+ - emacs
+ - git
+ - git-core
+ - links
+ - lynx
+ - mosh
+ - nmap
+ - whois
+ - screen
+ - wipe
+ - build-essential
+ - net-tools
+
+# We don't need all of this per se, but it's convenient. If nothing
+# else, it gives prereqs for `pip`
+- name: Python
+ apt: name={{ item }}
+ with_items:
+ - ipython3
+ - libxml2-dev
+ - libxslt1-dev
+ - python3-boto
+ - python3-bson
+ - python3-dev
+ - python3-matplotlib
+ - python3-numpy
+ - python3-pandas
+ - python3-pip
+ - python3-scipy
+ - python3-setuptools
+ - python3-sklearn
+ - virtualenvwrapper
+ - libjpeg-dev
+ - python3-opencv
+ - python3-virtualenv
+ - python3-aiohttp
+ - python3-aiohttp-cors
+ - python3-tornado
+ - python3-yaml
+ - python3-asyncpg
+ - python3-bcrypt
+
+- name: Server
+ apt: name={{ item }}
+ with_items:
+ - redis
+ - nginx
+ - certbot
+ - apache2-utils
+ - fcgiwrap
+ - python3-certbot-nginx
\ No newline at end of file
diff --git a/devops/requirements.txt b/devops/requirements.txt
new file mode 100644
index 000000000..e548f11b9
--- /dev/null
+++ b/devops/requirements.txt
@@ -0,0 +1,6 @@
+chevron
+boto3
+pyyaml
+fabric
+invoke
+filetype
diff --git a/devops/single_server_instances/README.md b/devops/single_server_instances/README.md
new file mode 100644
index 000000000..f2123849b
--- /dev/null
+++ b/devops/single_server_instances/README.md
@@ -0,0 +1,74 @@
+# Learning Observer — Instance Control Scripts
+
+This directory contains two bash scripts to start and stop multiple instances of the `learning_observer` application.
+
+## Files
+
+* **`start_lo_instances.sh`** — Launches one or more instances of the app on sequential ports, creates log files, and stores process IDs in a PID directory.
+* **`stop_lo_instances.sh`** — Stops all running instances recorded in the PID directory.
+
+## Configuration
+
+Before use, edit the scripts to match your system:
+
+* `LEARNING_OBSERVER_LOC` — Path to your project code.
+* `VIRTUALENV_PATH` — Path to your Python virtual environment.
+* `LOGFILE_DEST` — Directory for logs (default `/var/log/learning_observer`).
+* `START_PORT` — First port to use.
+* `SCRIPT_NAME` — Command or Python file to run.
+
+## Usage
+
+Start instances (default: 1):
+
+```bash
+./start_lo_instances.sh
+./start_lo_instances.sh 3 # start 3 instances
+```
+
+Stop all instances:
+
+```bash
+./stop_lo_instances.sh
+```
+
+Logs are saved in `LOGFILE_DEST`, and PIDs are stored in `LOGFILE_DEST/pids`.
+You may need to change paths or permissions depending on your environment.
+
+## Nginx Settings
+
+The file `nginx.conf.example` provides a sample configration for Nginx when you start 4 instances of LO.
+First, these settings split the incoming events and all other traffic between 2 upstream servers.
+Each upstream server balances connections between 2 instances of Learning Observer.
+
+```text
+Incoming Request
+ │
+ ▼
++---------------+
+| NGINX |
++---------------+
+ │
+ ▼
+ Path starts
+ with "/wsapi/in/"?
+ ┌───────────────┐
+ Yes│ │No
+ ▼ ▼
++------------------+ +-----------------+
+| wsapi_in_backend | | general_backend |
+| | | |
++-------+----------+ +--------+--------+
+ │ │
+ +----+----+ +----+----+ Balanced by least
+ | App 1 | | App 3 | connections `least_conn`
+ | :9001 | | :9003 |
+ +---------+ +---------+
+ +----+----+ +----+----+
+ | App 2 | | App 4 |
+ | :9002 | | :9004 |
+ +---------+ +---------+
+```
+
+Note: these are settings to add to your nginx configuration.
+You will likely have other settings, such as ssl certificates.
diff --git a/devops/single_server_instances/nginx.conf.example b/devops/single_server_instances/nginx.conf.example
new file mode 100644
index 000000000..571ba0f2d
--- /dev/null
+++ b/devops/single_server_instances/nginx.conf.example
@@ -0,0 +1,55 @@
+# Upstreams
+upstream wsapi_in_backend {
+ least_conn;
+ server 127.0.0.1:9001;
+ server 127.0.0.1:9002;
+}
+
+upstream general_backend {
+ least_conn;
+ server 127.0.0.1:9003;
+ server 127.0.0.1:9004;
+}
+
+# Simple CORS preflight detection
+map $request_method $cors_preflight {
+ default 0;
+ OPTIONS 1;
+}
+
+server {
+ # Common proxy headers for everything
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+
+ # --- Split route for /wsapi/in/ ---
+ location /wsapi/in/ {
+ proxy_pass http://wsapi_in_backend;
+ proxy_read_timeout 86400;
+
+ add_header 'Access-Control-Allow-Origin' '*';
+ add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS, HEAD';
+ add_header 'Access-Control-Allow-Headers' 'Authorization, Origin, X-Requested-With, Content-Type, Accept';
+
+ if ($cors_preflight) {
+ return 200;
+ }
+ }
+
+ # --- Everything else goes to general backend ---
+ location / {
+ proxy_pass http://general_backend;
+ proxy_read_timeout 86400;
+
+ add_header 'Access-Control-Allow-Origin' '*';
+ add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS, HEAD';
+ add_header 'Access-Control-Allow-Headers' 'Authorization, Origin, X-Requested-With, Content-Type, Accept';
+
+ if ($cors_preflight) {
+ return 200;
+ }
+ }
+}
diff --git a/devops/single_server_instances/start_lo_instances.sh b/devops/single_server_instances/start_lo_instances.sh
new file mode 100755
index 000000000..c872430dc
--- /dev/null
+++ b/devops/single_server_instances/start_lo_instances.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# === Config ===
+NUM_SERVERS=${1:-1} # default 1 server instance
+START_PORT=9001
+LOGFILE_DEST="/var/log/learning_observer"
+PIDFILE_DIR="$LOGFILE_DEST/pids"
+LEARNING_OBSERVER_LOC="/path/to/your/code"
+VIRTUALENV_PATH="/path/to/your/venv"
+SCRIPT_NAME="learning_observer"
+
+# Create log + pid dirs if they don't exist
+mkdir -p "$LOGFILE_DEST"
+mkdir -p "$PIDFILE_DIR"
+
+# Timestamp for log grouping
+LOG_DATE=$(date "+%m-%d-%Y--%H-%M-%S")
+
+# === Start Servers ===
+echo "Starting $NUM_SERVERS instances of $SCRIPT_NAME..."
+
+cd "$LEARNING_OBSERVER_LOC"
+source "$VIRTUALENV_PATH/bin/activate"
+
+for ((i=0; i Log: $LOGFILE_NAME"
+ nohup python $SCRIPT_NAME --port $PORT > "$LOGFILE_NAME" 2>&1 &
+ PROCESS_ID=$!
+ echo $PROCESS_ID > "$PIDFILE_NAME"
+ echo " -> PID $PROCESS_ID logged to $PIDFILE_NAME"
+done
+
+echo "✅ All servers started."
+echo "Run ./scripts/stop_lo_instances.sh to stop server processes."
diff --git a/devops/single_server_instances/stop_lo_instances.sh b/devops/single_server_instances/stop_lo_instances.sh
new file mode 100755
index 000000000..2a3243445
--- /dev/null
+++ b/devops/single_server_instances/stop_lo_instances.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# === Config ===
+LOGFILE_DEST="/var/log/learning_observer"
+PIDFILE_DIR="$LOGFILE_DEST/pids"
+SCRIPT_NAME="learning_observer"
+
+# === Stop All Servers ===
+echo "Stopping all $SCRIPT_NAME servers..."
+
+if [ ! -d "$PIDFILE_DIR" ]; then
+ echo "PID directory not found. Nothing to stop."
+ exit 1
+fi
+
+for PIDFILE in "$PIDFILE_DIR"/*.pid; do
+ if [ -f "$PIDFILE" ]; then
+ PID=$(cat "$PIDFILE")
+ if kill -0 "$PID" 2>/dev/null; then
+ echo "Stopping PID $PID from $PIDFILE"
+ kill "$PID"
+ else
+ echo "PID $PID not running, skipping."
+ fi
+ rm -f "$PIDFILE"
+ fi
+done
+
+echo "✅ All servers stopped."
diff --git a/devops/tasks/README.md b/devops/tasks/README.md
new file mode 100644
index 000000000..156977314
--- /dev/null
+++ b/devops/tasks/README.md
@@ -0,0 +1,124 @@
+Deployment Scripts
+==================
+
+Our goals are:
+
+* We'd like to have a flock of LO servers for dynamic assessment,
+ Writing Observer, random demos, etc. These should have a common
+ configuration, with variations.
+* We'd like to have a log of how these are configured at every point
+ in time, and any changes, so we can have context for any process
+ data we collect.
+* We'd like this representation to be interoperable with our process
+ data storage formats
+* We'd like configuation data to be moderately secure. Device
+ configuration won't allow exploits in itself, but it can make
+ vulnerabilities more serious. While things like IDs and locations of
+ resources don't present an attack vector in themselves, knowing them
+ is sometimes the limiting factor on being able to exploit an attack
+ vector (for example, if I have an exploit where I can read one
+ arbitrary file on your system, being able to leverage that attack
+ hinges on knowing what files you have where)
+* However, configuration data also sometimes needs to stores things
+ which are super-sensitive, like security tokens and similar.
+* Making changes should be fast and easy. This happens all the time.
+* Digging into archives doesn't need to be easy, just possible. For
+ research, only a few types of analysis need it. For operations, you
+ usually only need it for debugging or disaster recovery.
+
+Our **planned** architecture is:
+
+* A set of `fabric` script which can spin up / spin down / update
+ machines (with appropriate logging)
+* A baseline configuration in `ansible`.
+* Deltas from that configuration stored in an independent `git` repo
+* Security tokens stored in a seperate TBD data store. We'll populate
+ these with templates.
+* Log files of when new versions are updated/deployed/brought down, in
+ the same system as our process data
+* The tagging process data with `git` hashes of what state the system
+ was in when it generated it.
+
+We're making the baseline `ansible` configuration pretty featureful,
+since as a research project, it's helpful to be able to `ssh` into
+machines, and e.g. run `Python` scripts locally.
+
+Whether or not we need `ansible`, `fabric`, or both is a bit of an
+open question.
+
+Where we are
+------------
+
+This will be out-of-date quickly, but as of this writing:
+
+* We can provision, terminate, and update machines with a baseline
+ configuration.
+* A lot of stuff is hardcoded, which would make this difficult for
+ others to use (e.g. learning-observer.org).
+* We install packages, grab things from `git`, etc, but don't handle
+ configuration well yet.
+* We don't log.
+
+We orchestrate servers with [invoke](https://www.pyinvoke.org/):
+
+* `inv list` will show a listing of deployed machines
+* `inv provision [machine]` will spin up a new AWS machine
+* `inv update` will update all machines
+* `inv terminate [machine]` will shut down a machine
+* `inv connect [machine]` will open up an `ssh` session to a machine
+* `inv configure [machine]` is typically run after provision, and
+ will place configuration files (which might vary
+ machine-by-machine) (mostly finished)
+* `inv certbot [machine]` will set up SSL (unfinished)
+* `inv downloadconfig [machine]` will copy the configuration back.
+* `inv create [machine]` is a shortcut to do everything for a new instance in one step (provision, configure, certbotify, and download the SSL config)
+
+A lot of this is unfinished, but still, it's already ahead of the AWS
+GUI and doing things by hand. The key functionality missing is:
+
+* High-quality logging
+* Fault recovering
+* Version control of configurations
+
+To set up a new machine, run:
+
+```
+inv provision [machine]
+inv configure [machine]
+inv certbot [machine]
+inv downloadconfig [machine]
+```
+
+From there, edit configuration files in `config` and to update the
+machine to a new version, run
+
+```
+inv configure [machine]
+```
+
+Debugging
+---------
+
+The most annoying part of this setup is getting `systemd` working,
+which is poorly documented, inconsistent, and poorly-engineered. The
+tool are `journalctl -xe |tail -100`, looking at `lo.err` (currently
+in `/home/ubuntu/`, but should move to `/var/log/` eventually), and
+`systemctl status --full learning_observer`. The most common issues
+are permissions (e.g. running as the wrong user, log files generated
+as `root:root` at some point, etc), running from the wrong directory,
+and similar sorts of environment issues.
+
+Logging
+-------
+
+We are logging system configuration with `git`. Note that this is
+**NOT** atomic or thread-safe. This is perhaps a bug, and perhaps by
+design:
+
+* Tasks take a _while_ to run, and they need to run in parallel when
+ managing many machines.
+* A better (much more complex) approach would use branches or do
+ atomic commits at the end (e.g. download to a temporary dir, and
+ move right before the commit.
+* However, it is possible to reverse-engineered exactly what happened,
+ roughly when. This is good enough for now.
\ No newline at end of file
diff --git a/devops/tasks/config/creds.yaml b/devops/tasks/config/creds.yaml
new file mode 100644
index 000000000..dab0fe6d8
--- /dev/null
+++ b/devops/tasks/config/creds.yaml
@@ -0,0 +1,33 @@
+hostname: {{hostname}}.{{domain}}
+xmpp:
+ sink: # Receives messages. We'll need many of these.
+ jid: sink@localhost
+ password: {{RANDOM1}}
+ source: # Sends messages.
+ jid: source@localhost
+ password: {{RANDOM1}}
+ stream: # For debugging
+ jid: stream@localhost
+ password: {{RANDOM1}}
+auth:
+ password_file: passwd.lo
+pubsub:
+ type: redis
+kvs:
+ type: redis
+roster_data:
+ source: all
+aio:
+ session_secret: {{RANDOM2}}
+ session_max_age: 3600
+config:
+ run_mode: dev
+ debug: []
+theme:
+ server_name: Learning Observer
+ front_page_pitch: Learning Observer is an experimental dashboard. If you'd like to be part of the experiment, please contact us. If you're already part of the experiment, log in!
+ logo_big: /static/media/logo-clean.jpg
+event_auth:
+ local_storage:
+ userfile: students.yaml
+ allow_guest: true
\ No newline at end of file
diff --git a/devops/tasks/config/hostname b/devops/tasks/config/hostname
new file mode 100644
index 000000000..8c9fff80a
--- /dev/null
+++ b/devops/tasks/config/hostname
@@ -0,0 +1 @@
+{{hostname}}
\ No newline at end of file
diff --git a/devops/tasks/config/init.d b/devops/tasks/config/init.d
new file mode 100644
index 000000000..6d8b816ad
--- /dev/null
+++ b/devops/tasks/config/init.d
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# The world's simplest, stupidest init script.
+#
+# THIS IS CURRENTLY UNUSED, SINCE WE USE A SYSTEMD SCRIPT
+
+### BEGIN INIT INFO
+# Provides: learning_observer
+# Required-Start: mountkernfs $local_fs
+# Required-Stop:
+# Should-Start:
+# X-Start-Before:
+# Default-Start: S
+# Default-Stop:
+# Short-Description: Runs the Learning Observer platform
+# Description: This is a part of a larger dev-ops infrastructure. This is unlikely to work in isolation.
+### END INIT INFO
+#
+# written by Piotr Mitros
+
+
+case "$1" in
+start)
+ cd /home/ubuntu/writing_observer/learning_observer/
+ setsid -f su ubuntu ./lo.sh
+;;
+status)
+ printf "For status, run: ps aux | grep learning_observer\n"
+;;
+stop)
+ pkill -f learning_observer
+;;
+
+restart)
+ $0 stop
+ $0 start
+;;
+
+*)
+ echo "Usage: $0 {status|start|stop|restart}"
+ exit 1
+esac
diff --git a/devops/tasks/config/lo.sh b/devops/tasks/config/lo.sh
new file mode 100644
index 000000000..d9a05d850
--- /dev/null
+++ b/devops/tasks/config/lo.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/bash
+
+# This is a script to start up Learning Observer with it's own process
+# name. This is convenient for being able to start / stop the process.
+
+. /usr/share/virtualenvwrapper/virtualenvwrapper.sh
+workon learning_observer
+cd /home/ubuntu/writing_observer/learning_observer
+bash -c "exec -a learning_observer python learning_observer" >> /home/ubuntu/lo.log 2>> /home/ubuntu/lo.err
diff --git a/devops/tasks/config/nginx b/devops/tasks/config/nginx
new file mode 100644
index 000000000..229ba6c4a
--- /dev/null
+++ b/devops/tasks/config/nginx
@@ -0,0 +1,40 @@
+server {
+ # We listen for HTTP on port 80. When we set up certbot, this changes to 443.
+ listen 80 default_server;
+ listen [::]:80 default_server;
+
+ server_name {{hostname}}.{{domain}};
+
+ location / {
+ # Generally, used to configure permissions. E.g. http basic auth, allow/deny
+ # IP blocks, etc. Note that for deploy, this should be broken out into several
+ # blocks (e.g. incoming event, dashboards, etc.)
+ {{nginx_root_options}}
+
+ proxy_pass http://localhost:8888/;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+
+ # We disable CORS globally. This should be more granular.
+ add_header "Access-Control-Allow-Origin" *;
+ add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD";
+ add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept";
+ }
+ location /wsapi/ {
+ proxy_pass http://localhost:8888/wsapi/;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+ proxy_read_timeout 86400;
+
+ add_header "Access-Control-Allow-Origin" *;
+ add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD";
+ add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept";
+
+ if ($request_method = OPTIONS ) {
+ return 200;
+ }
+ }
+}
\ No newline at end of file
diff --git a/devops/tasks/config/passwd.lo b/devops/tasks/config/passwd.lo
new file mode 100644
index 000000000..e69de29bb
diff --git a/devops/tasks/config/postuploads b/devops/tasks/config/postuploads
new file mode 100644
index 000000000..a8dcdc67d
--- /dev/null
+++ b/devops/tasks/config/postuploads
@@ -0,0 +1,10 @@
+sudo hostnamectl set-hostname {hostname}
+sudo rm -f /etc/nginx/sites-available/default
+sudo rm -f /etc/nginx/sites-enabled/default
+if [ -f /etc/nginx/sites-available/{hostname} ]; then sudo ln -f /etc/nginx/sites-available/{hostname} /etc/nginx/sites-enabled/{hostname}; else echo "WARNING: Failed to make symlink in /etc/nginx/sites-available (config/postupload)"; fi
+
+sudo chown -R ubuntu:ubuntu /home/ubuntu/writing_observer
+sudo systemctl daemon-reload
+sudo service learning_observer stop
+sudo service learning_observer start
+sudo service nginx restart
diff --git a/devops/tasks/config/rsyslog.conf b/devops/tasks/config/rsyslog.conf
new file mode 100644
index 000000000..47ee22ba5
--- /dev/null
+++ b/devops/tasks/config/rsyslog.conf
@@ -0,0 +1 @@
+if $programname == 'learning_observer' then /var/log/lo.log
\ No newline at end of file
diff --git a/devops/tasks/config/sync.csv b/devops/tasks/config/sync.csv
new file mode 100644
index 000000000..e3592df21
--- /dev/null
+++ b/devops/tasks/config/sync.csv
@@ -0,0 +1,6 @@
+creds.yaml,root:root,644,/home/ubuntu/writing_observer/learning_observer/creds.yaml,"Learning Observer settings file"
+nginx,root:root,644,/etc/nginx/sites-enabled/{hostname},"nginx site configuration"
+passwd.lo,root:root,644,/home/ubuntu/writing_observer/learning_observer/passwd.lo,"(Generally blank) passwords file"
+lo.sh,ubuntu:ubuntu,744,/home/ubuntu/writing_observer/learning_observer/lo.sh,"Script to start Learning Observer with a nice process name"
+systemd,root:root,644,/etc/systemd/system/learning_observer.service,"Systemd init script"
+rsyslog.conf,root:root,644,/etc/rsyslog.d/learning_observer.conf,"rsyslog script (for stdout/stderr)"
\ No newline at end of file
diff --git a/devops/tasks/config/systemd b/devops/tasks/config/systemd
new file mode 100644
index 000000000..673d3ffa2
--- /dev/null
+++ b/devops/tasks/config/systemd
@@ -0,0 +1,11 @@
+[Unit]
+Description=Learning Observer
+
+[Service]
+ExecStart=/home/ubuntu/writing_observer/learning_observer/lo.sh
+Type=simple
+StandardOutput=syslog
+StandardError=syslog
+SyslogIdentifier=learning_observer
+User=ubuntu
+Group=ubuntu
\ No newline at end of file
diff --git a/devops/tasks/orchlib/__init__.py b/devops/tasks/orchlib/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/devops/tasks/orchlib/aws.py b/devops/tasks/orchlib/aws.py
new file mode 100644
index 000000000..ce76b14ba
--- /dev/null
+++ b/devops/tasks/orchlib/aws.py
@@ -0,0 +1,248 @@
+'''
+Tools to bring up an AWS nano instance, and to connect it to DNS via
+Route 53. We do not want to be AWS-specific, and this file should be
+the only place where we import boto.
+'''
+
+import time
+import yaml
+
+import boto3
+
+import orchlib.config
+import orchlib.fabric_flock
+from orchlib.logger import system
+
+
+session = boto3.session.Session()
+ec2 = session.resource('ec2')
+ec2client = boto3.client('ec2')
+r53 = boto3.client('route53')
+
+UBUNTU_20_04 = "ami-09e67e426f25ce0d7"
+
+def create_instance(name):
+ '''
+ Launch a machine on EC2. Return the boto instance object.
+ '''
+ blockDeviceMappings = [
+ {
+ "DeviceName": "/dev/xvda",
+ "Ebs": {
+ "DeleteOnTermination": True,
+ "VolumeSize": 32,
+ "VolumeType": "gp2"
+ }
+ }
+ ]
+
+ # Baseline set of tags....
+ tags = [
+ {
+ 'Key': 'Name',
+ 'Value': name
+ },
+ {
+ 'Key': 'Owner',
+ 'Value': orchlib.config.creds['owner']
+ },
+ {
+ 'Key': 'deploy-group',
+ 'Value': orchlib.config.creds['deploy-group']
+ }
+ ]
+
+ # And we allow extra tags from the config file.
+ #
+ # This should be handled more nicely at some point. We want
+ # a global config, with per-machine overrides, and we want
+ # this config common for all templates, etc.
+ for (key, value) in orchlib.config.creds.get("ec2_tags", {}).items():
+ tags.append({
+ 'Key': key,
+ 'Value': value
+ })
+
+ # This is kind of a mess.
+ # Good command to help guide how to make this:
+ # `aws ec2 describe-instances > template`
+ # It doesn't correspond 1:1, but it's a good starting
+ # point.
+ response = ec2.create_instances(
+ ImageId=UBUNTU_20_04,
+ InstanceType='t2.small',
+ BlockDeviceMappings=blockDeviceMappings,
+ KeyName=orchlib.config.creds['aws_keyname'],
+ MinCount=1,
+ MaxCount=1,
+ Placement={
+ "AvailabilityZone": "us-east-1b"
+ },
+ NetworkInterfaces=[
+ {
+ 'SubnetId': orchlib.config.creds['aws_subnet_id'],
+ 'DeviceIndex': 0,
+ 'AssociatePublicIpAddress': True,
+ 'Groups': [orchlib.config.creds['aws_security_group']]
+ }
+ ],
+ TagSpecifications=[
+ {
+ 'ResourceType': 'instance',
+ 'Tags': tags
+ }
+ ]
+ )
+
+ instance = response[0]
+ instance.wait_until_running()
+ # Reload, to update with assigned IP, etc.
+ instance = ec2.Instance(instance.instance_id)
+
+ # Switch to IMDS v2, hopefully, due to security improvements
+ ec2client.modify_instance_metadata_options(
+ InstanceId=instance.instance_id,
+ HttpTokens='required',
+ HttpEndpoint='enabled'
+ )
+
+ print("Launched ", instance.instance_id)
+ print("IP: ", instance.public_ip_address)
+ return instance
+
+
+def list_instances():
+ '''
+ List all of the `learning-observer` instances, in a compact
+ format, with just the:
+
+ * Instance ID
+ * Tags
+ * Public IP Address
+ '''
+ reservations = ec2client.describe_instances(Filters=[
+ {
+ 'Name': 'tag:deploy-group',
+ 'Values': [orchlib.config.creds['deploy-group']]
+ },
+ ])['Reservations']
+ instances = sum([i['Instances'] for i in reservations], [])
+ summary = [{
+ 'InstanceId': i['InstanceId'],
+ 'Tags': {tag['Key']: tag['Value'] for tag in i['Tags']},
+ 'PublicIpAddress': i.get('PublicIpAddress', "--.--.--.--")
+ } for i in instances]
+ return summary
+
+def terminate_instances(name):
+ '''
+ Terminate all instances give the name.
+
+ Returns the number of instances terminated. We might kill more
+ than one if we assign several the same name.
+
+ Also, wipes their associated DNS.
+ '''
+ instances = list_instances()
+ print("All instances: ", instances)
+ matching_instances = [
+ i for i in instances if i['Tags']['Name'] == name
+ ]
+ # Set to `None` so we don't accidentally touch this again!
+ instances = None
+ print("Matching instances: ", matching_instances)
+ for i in range(10):
+ print(10-i)
+ time.sleep(1)
+ print("Removing DNS")
+ for instance in matching_instances:
+ register_dns(
+ name,
+ orchlib.config.creds['domain'],
+ instance['PublicIpAddress'],
+ unregister=True
+ )
+ print("Terminating")
+ ec2client.terminate_instances(
+ InstanceIds = [i['InstanceId'] for i in matching_instances]
+ )
+ system("ssh-keygen -R {host}.{domain}".format(
+ host=name,
+ domain=orchlib.config.creds['domain']
+ ))
+ return len(matching_instances)
+
+
+def register_dns(subdomain, domain, ip, unregister=False):
+ '''
+ Assign a domain name to a machine.
+ '''
+ action = 'UPSERT'
+ if unregister:
+ action = 'DELETE'
+ zones = r53.list_hosted_zones_by_name(
+ DNSName=domain
+ )['HostedZones']
+
+ # AWS seems to ignore DNSName=domain. We filter down to the right
+ # domain AWS does include a dot at the end
+ # (e.g. 'learning-observer.org.'), and we don't right now
+ # (e.g. `learning-observer.org`). We don't need the first test,
+ # but we included it so we don't break if we ever do pass a domain
+ # in with the dot.
+ zones = [
+ z for z in zones # Take all the zone where....
+ if z['Name'].upper() == domain.upper() # The domain name is correct
+ or z['Name'].upper() == (domain+".").upper() # With a dot at the end
+ ]
+
+ if len(zones)!= 1:
+ raise Exception("Wrong number of hosted zones!")
+ zoneId = zones[0]['Id']
+ request = r53.change_resource_record_sets(
+ HostedZoneId = zoneId,
+ ChangeBatch = {
+ 'Changes': [
+ {
+ 'Action': action,
+ 'ResourceRecordSet' : {
+ 'Name' : '{subdomain}.{domain}.'.format(
+ subdomain=subdomain,
+ domain=domain
+ ),
+ 'Type' : 'A',
+ 'TTL' : 15,
+ 'ResourceRecords' : [
+ {'Value': ip}
+ ]
+ }
+ },
+ ]
+ }
+ )
+
+ # If we're setting DNS, wait for changes to propagate, so we
+ # can use DNS later in the script
+ while True and not unregister:
+ print("Propagating DNS....", request['ChangeInfo']['Status'])
+ time.sleep(1)
+ id = request['ChangeInfo']['Id']
+ request = r53.get_change(Id=id)
+ if request['ChangeInfo']['Status'] == 'INSYNC':
+ break
+ return True
+
+
+def name_to_group(machine_name):
+ '''
+ For a machine name, return a fabric ssh group of machines with
+ that name.
+ '''
+ pool = [
+ i['PublicIpAddress']
+ for i in list_instances()
+ if i['Tags']['Name'] == machine_name
+ ]
+ print(pool)
+ group = orchlib.fabric_flock.machine_group(*pool)
+ return group
diff --git a/devops/tasks/orchlib/config.py b/devops/tasks/orchlib/config.py
new file mode 100644
index 000000000..330de195c
--- /dev/null
+++ b/devops/tasks/orchlib/config.py
@@ -0,0 +1,100 @@
+import os
+import os.path
+
+import json
+import yaml
+
+creds_file = "settings/CREDS.YAML"
+
+if not os.path.exists(creds_file):
+ print("No credentials file. I'll need a bit of info from you")
+ print("to make one.")
+ info = {
+ "user": "Your username on the remote machine (probably ubuntu)",
+ "key_filename": "Your AWS key filename (something like /home/me/.ssh/aws.pem)",
+ "aws_keyname": "Your AWS key id (as AWS knows it; e.g. aws.pem)",
+ "aws_subnet_id": "AWS subnet (e.g. subnet-012345abc)",
+ "aws_security_group": "AWS security group (e.g. sg-012345abc)",
+ "owner": "Your name",
+ "email": "Your email",
+ "domain": "Domain name (e.g. learning-observer.org)",
+ "flock-config": "Path to git repo where we'll store machine config.",
+ "deploy-group": "Tag to identify all machines (typically, learning-observer)",
+ "ec2_tags": "JSON dictionary of any additional tags you'd like on your machines. If you're not sure, type {}"
+ }
+ print("I'll need:")
+ for key, value in info.items():
+ print("* {value}".format(value=value))
+ print("Let's get going")
+ d = {}
+ for key, value in info.items():
+ print(value)
+ d[key] = input("{key}: ".format(key=key)).strip()
+ d['ec2_tags'] = json.loads(d['ec2_tags'])
+ if not os.path.exists(d['flock-config']):
+ os.system("git init {path}".format(path=d['flock-config']))
+ os.mkdir(os.path.join(d['flock-config'], "config"))
+ with open("settings/CREDS.YAML", "w") as fp:
+ yaml.dump(d, fp)
+
+creds = yaml.safe_load(open(creds_file))
+
+def config_filename(machine_name, file_suffix, create=False):
+ '''
+ Search for the name of a config file, checking
+ * Per-machine config
+ * System-wide defaults
+ * Defaults for this for the Learning Observer (defined in this repo)
+
+ Absolute paths (e.g. beginning with '/') are returned as-is.
+ '''
+ if file_suffix.startswith("/"):
+ return file_suffix
+
+ paths = [
+ # First, we try per-machine configuration
+ os.path.join(
+ creds["flock-config"], "config", machine_name, file_suffix
+ ),
+ # Next, we try the per-machine override
+ os.path.join(
+ creds["flock-config"], "config", machine_name, file_suffix+".base"
+ ),
+ # Then, system-wide configuration
+ os.path.join(
+ creds["flock-config"], "config", file_suffix
+ ),
+ # And finally, as a fallback, default files
+ os.path.join(
+ "config", file_suffix
+ )
+ ]
+
+ # For making new versions, always return the per-machine git repo
+ # directory
+ if create == True:
+ return paths[0]
+
+ for fn in paths:
+ print(fn)
+ if os.path.exists(fn):
+ return fn
+
+
+def config_lines(machine_name, file_suffix):
+ '''
+ Kind of like a smart `open().readlines()` for reading config files.
+
+ Handle paths, prefixes, missing files (return nothing),
+ `strip()`ing lines, comments, etc.
+ '''
+ fn = config_filename(machine_name, file_suffix)
+ # No config file
+ if fn is None:
+ print("Skipping; no file for: ", file_suffix)
+ return
+ print("Config file: ", fn)
+ for line in open(fn).readlines():
+ line = line.strip()
+ if len(line) > 0:
+ yield line
diff --git a/devops/tasks/orchlib/fabric_flock.py b/devops/tasks/orchlib/fabric_flock.py
new file mode 100644
index 000000000..0876dcd77
--- /dev/null
+++ b/devops/tasks/orchlib/fabric_flock.py
@@ -0,0 +1,81 @@
+'''
+These are baseline script to help orchestrate a flock of machines
+via ssh. This is a thin wrapper around `fabric`.
+'''
+
+import yaml
+import fabric
+
+import orchlib.config
+import orchlib.logger
+
+def machine_group(*pool):
+ # Skip terminated machines.
+ # Sadly, also skips recently-created machines....
+ pool = [ip for ip in pool if ip!="--.--.--.--"]
+ group = fabric.SerialGroup(
+ *pool,
+ user=orchlib.config.creds['user'],
+ connect_kwargs={"key_filename": orchlib.config.creds['key_filename']}
+ )
+
+ class GroupWrapper:
+ '''
+ This is a thin wrapper, designed for logging commands, and in the
+ future, perhaps return values.
+ '''
+ def __init__(self, group):
+ self._group = group
+
+ def run(self, command):
+ command = "source ~/.profile; " + command
+ orchlib.logger.grouplog(
+ "run",
+ [command],
+ {}
+ )
+
+ self._group.run(command)
+
+ def get(self, *args, **kwargs):
+ orchlib.logger.grouplog(
+ "get",
+ args,
+ kwargs
+ )
+ self._group.get(*args, **kwargs)
+
+ def put(self, *args, **kwargs):
+ orchlib.logger.grouplog(
+ "put",
+ args,
+ kwargs
+ )
+ self._group.put(*args, **kwargs)
+
+ def sudo(self, *args, **kwargs):
+ orchlib.logger.grouplog(
+ "sudo",
+ args,
+ kwargs
+ )
+ self._group.sudo(*args, **kwargs)
+
+ wrapper = GroupWrapper(group)
+
+ return wrapper
+
+
+def connection_group(pool = None):
+ '''
+ Return a Fabric connection group
+ '''
+ if pool is None:
+ pool = machine_pool()
+
+ return fabric.SerialGroup(
+ pool,
+ user=orchlib.config.creds['user'],
+ connect_kwargs={"key_filename": orchlib.config.creds['key_filename']}
+ )
+
diff --git a/devops/tasks/orchlib/logger.py b/devops/tasks/orchlib/logger.py
new file mode 100644
index 000000000..2622b4d62
--- /dev/null
+++ b/devops/tasks/orchlib/logger.py
@@ -0,0 +1,41 @@
+'''
+We'd like to log which actions we take.
+
+This isn't done, but it's a start
+'''
+
+import os
+
+log = [
+]
+
+def system(command):
+ '''
+ Run a command on the local system (`os.system`)
+
+ Log the command and return code.
+ '''
+ rc = os.system(command)
+ log.append({
+ 'event': 'system',
+ 'command': command,
+ 'return_code': rc
+ })
+ return rc
+
+def grouplog(command, args, kwargs):
+ log.append({
+ 'event': 'group',
+ 'command': command,
+ 'args': args,
+ 'kwargs': kwargs
+ })
+
+
+def exitlog():
+ '''
+ Not done.
+ '''
+ os.path.join(
+ orchlib.config.creds["flock-config"], "logs"
+ )
diff --git a/devops/tasks/orchlib/repos.py b/devops/tasks/orchlib/repos.py
new file mode 100644
index 000000000..5ec3ea0e0
--- /dev/null
+++ b/devops/tasks/orchlib/repos.py
@@ -0,0 +1,54 @@
+import os
+
+import orchlib.config
+
+import remote_scripts.gitpaths
+
+
+# Working command: GIT_SSH_COMMAND="ssh -i KEY.pem" git --git-dir=/tmp/foo/.git push -f --mirror ssh://ubuntu@SOME_SERVER/home/ubuntu/baregit/foo
+
+
+# This command will forcefully push a local repo to a remote server, including all branches
+GIT_PUSH ='''
+GIT_SSH_COMMAND="ssh -i {key} -o 'StrictHostKeyChecking no'" git
+ --git-dir={localrepo}/.git
+ push -f
+ --mirror
+ ssh://ubuntu@{mn}.{domain}/home/ubuntu/baregit/{reponame}
+'''.strip().replace('\n', '')
+
+
+def force_push(machine, localrepo):
+ print("LOCAL REPO: ", localrepo)
+ command = GIT_PUSH.format(
+ mn=machine,
+ domain=orchlib.config.creds['domain'],
+ key=orchlib.config.creds['key_filename'],
+ localrepo=localrepo,
+ reponame=remote_scripts.gitpaths.gitpath_to_name(localrepo)
+ )
+ print(command)
+ os.system(command)
+
+
+def remote_invoke(group, command):
+ remote_command = "cd writing_observer/devops/tasks/remote_scripts; inv {command}".format(command=command)
+ print(remote_command)
+ group.run(remote_command)
+
+
+def update(group, machine_name):
+ # In most cases, these would correspond to static sites, or
+ # Learning Observer modules
+ print("Grabbing public git packages")
+ for package in orchlib.config.config_lines(machine_name, "gitclone"):
+ remote_invoke(group, "cloneupdate {package}".format(package=package))
+ print("Pushing private git packages")
+
+ # We can only push to bare repos.
+ for package in orchlib.config.config_lines(machine_name, "gitpush"):
+ print("Configuring: ", package)
+ remote_invoke(group, "init {package}".format(package=package))
+ print("Force pushing: ", package)
+ force_push(machine_name, package)
+ remote_invoke(group, "cloneupdatelocal {package}".format(package=package))
diff --git a/devops/tasks/orchlib/templates.py b/devops/tasks/orchlib/templates.py
new file mode 100644
index 000000000..bce617079
--- /dev/null
+++ b/devops/tasks/orchlib/templates.py
@@ -0,0 +1,138 @@
+import base64
+import io
+import os.path
+import uuid
+
+import chevron
+import filetype
+
+import orchlib.config
+
+def secure_guid():
+ '''
+ Mix up a few entropy sources with a few system identifiers...
+
+ This should really be built-in.
+ '''
+ os_random = str(base64.b64encode(os.urandom(32)))
+ uuid1 = uuid.uuid1()
+ uuid4 = uuid.uuid4().hex
+ return uuid.uuid5(uuid1, uuid4).hex
+
+
+def render_file_for_transfer(filename, config):
+ '''
+ This converts a filename and a dictionary into a file-like
+ object, ready for upload.
+ '''
+ # We don't render binary files. This is not a complete set, and we might extend this
+ # later
+ BINARY_TYPES = filetype.audio_matchers + filetype.image_matchers + filetype.video_matchers
+ endings = [".js", ".css", ".ttf", ".ogg", ".jpg", ".png", ".webm", ".mp4"]
+ def skip_encode(filename):
+ '''We don't want to run most binary files, code, etc. through our
+ templating engine. These are heuristics.
+
+ We probably should be explicit and add a field to the config
+ file, so we don't need heuristics. This is a little bit more
+ complex and ad-hoc than I like.
+ '''
+ for e in endings:
+ if filename.endswith(e):
+ return True
+ if filetype.guess(filename) in BINARY_TYPES:
+ return True
+ return False
+
+ if skip_encode(filename):
+ return open(filename, "rb")
+
+ # Other files, we run through our templating engine
+ with open(filename) as fp:
+ # We convert to bytes as a hack-around for this bug: https://github.com/paramiko/paramiko/issues/1133
+ return io.BytesIO(chevron.render(fp, config).encode('utf-8'))
+
+
+def upload(
+ group,
+ machine_name,
+ filename,
+ remote_filename,
+ config,
+ username=None,
+ permissions=None):
+ '''
+ This will upload a file to an AWS machine. It will:
+
+ * Find the right file. It might be a system-wide default,
+ or a machine-specific one.
+ * Generate a set of secure tokens for use in templates (e.g. for
+ initial passwords)
+ * Render the file through `mustache` templates, based on the
+ configuration
+ * Upload to the server
+ * Move to the right place, and set permissions.
+ '''
+ # We can use these for security tokens in templates.
+ # We should save these at some point
+ for i in range(10):
+ key = "RANDOM"+str(i)
+ if key not in config:
+ config["RANDOM"+str(i)] = secure_guid()
+
+ local_filename = orchlib.config.config_filename(machine_name, filename)
+
+ # This seems like an odd place, but latest `fabric` has no way
+ # to handle uploads as root.
+ group.put(
+ render_file_for_transfer(
+ local_filename,
+ config
+ ),
+ "/tmp/inv-upload-tmp"
+ )
+
+ group.run("sudo mv /tmp/inv-upload-tmp {remote_filename}".format(
+ remote_filename=remote_filename,
+ mn=machine_name
+ ))
+ if username is not None:
+ group.run("sudo chown {username} {remote_filename}".format(
+ username=username,
+ remote_filename=remote_filename
+ ))
+ if permissions is not None:
+ group.run("sudo chmod {permissions} {remote_filename}".format(
+ permissions=permissions,
+ remote_filename=remote_filename
+ ))
+
+
+def download(
+ group,
+ machine_name,
+ filename,
+ remote_filename):
+ '''
+ This will download a configuration file from an AWS machine, as
+ specified in the machine configuration. It's a simple parallel
+ to `upload`
+ '''
+ print("Remote file: ", remote_filename)
+
+ local_filename = orchlib.config.config_filename(
+ machine_name,
+ filename,
+ create=True
+ )
+
+ print("Local filename: ", local_filename)
+
+ pathname = os.path.split(local_filename)[1]
+ if not os.path.exists(pathname):
+ os.mkdir(pathname)
+
+ group.get(
+ remote_filename,
+ local_filename
+ )
diff --git a/devops/tasks/orchlib/ubuntu.py b/devops/tasks/orchlib/ubuntu.py
new file mode 100644
index 000000000..af4327d40
--- /dev/null
+++ b/devops/tasks/orchlib/ubuntu.py
@@ -0,0 +1,55 @@
+'''
+These are scripts for preparing an Ubuntu 20.04 machine to run the
+Learning Observer
+'''
+
+import fabric.exceptions
+
+import orchlib.fabric_flock
+import orchlib.config
+
+
+def run_script(scriptfile):
+ '''
+ Helper which executes a series of commands on set of machines
+ '''
+ script = open("scripts/{fn}.fab".format(fn=scriptfile)).read()
+ def run(*machines):
+ group = orchlib.fabric_flock.machine_group(*machines)
+
+ for line in ['hostname'] + script.split("\n"):
+ line = line.strip()
+ if len(line) > 0 and line[0] != "#":
+ print(line)
+ group.run(line)
+ return run
+
+update = run_script("update")
+baseline_packages = run_script("baseline_packages")
+python_venv = run_script("python_venv")
+
+
+def reboot(machine):
+ '''
+ Run the reboot script. We expect an exception since the remote machine
+ reboots while Fabric is connected.
+ '''
+ try:
+ print("Trying to reboot (this doesn't always work")
+ run_script("reboot")(machine)
+ except fabric.exceptions.GroupException:
+ pass
+
+def provision(ip):
+ group = fabric.SerialGroup(
+ ip,
+ user=orchlib.config.creds['user'],
+ connect_kwargs={"key_filename": orchlib.config.creds['key_filename']}
+ )
+ update()
+ baseline_packages()
+ python_venv()
+ reboot()
+
+if __name__=='__main__':
+ provision()
diff --git a/devops/tasks/remote_scripts/__init__.py b/devops/tasks/remote_scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/devops/tasks/remote_scripts/gitpaths.py b/devops/tasks/remote_scripts/gitpaths.py
new file mode 100644
index 000000000..80801ca85
--- /dev/null
+++ b/devops/tasks/remote_scripts/gitpaths.py
@@ -0,0 +1,56 @@
+import os.path
+import os
+
+
+WORKING_REPO_PATH='/home/ubuntu/'
+BARE_REPO_PATH='/home/ubuntu/baregit/'
+
+
+def gitpath_to_name(packagepath):
+ '''
+ Convert a git path to the name of the repo. For example:
+
+ `https://github.com/ETS-Next-Gen/writing_observer.git` ==> `writing_observer`
+ '''
+ package = os.path.split(packagepath)[1]
+ if package.endswith(".git"):
+ return package[:-4]
+ else:
+ return package
+
+
+def working_repopath(repo=None):
+ '''
+ Switch to the path where *working* `git` repo is located. E.g. one
+ with a working tree, if it exists.
+ '''
+ if repo is None:
+ os.chdir(WORKING_REPO_PATH)
+ return WORKING_REPO_PATH
+
+ path = os.path.join(WORKING_REPO_PATH, repo)
+ if os.path.exists(path):
+ os.chdir(path)
+ return path
+ return False
+
+
+def bare_repopath(repo=None):
+ '''
+ Switch to the path where *bare* `git` repo is located. E.g. one
+ without a working tree, for pushing and pulling.
+ '''
+ # If we don't have a path for bare repos, create it.
+ if(os.system("mkdir -p "+BARE_REPO_PATH)):
+ print("Error creating or accessing bare repository directory")
+ sys.exit(-1)
+
+ if repo is None:
+ os.chdir(BARE_REPO_PATH)
+ return BARE_REPO_PATH
+
+ path = os.path.join(BARE_REPO_PATH, repo)
+ if os.path.exists(path):
+ os.chdir(path)
+ return path
+ return False
diff --git a/devops/tasks/remote_scripts/tasks.py b/devops/tasks/remote_scripts/tasks.py
new file mode 100644
index 000000000..73b697427
--- /dev/null
+++ b/devops/tasks/remote_scripts/tasks.py
@@ -0,0 +1,123 @@
+'''
+This is a remote script for random `git` operations (e.g. running
+on machines in the Learning Observer flock).
+
+This is a bit awkward, but we maintain:
+
+- Public `git` repositories in `/home/ubuntu/`
+- Private `git` repositories in `/home/ubuntu/baregit` cloned into
+ `/home/ubuntu`
+
+The reason for this design is:
+
+- Pushing a nonpublic repo to a remote server is a bit awkward. Versions
+ of `git` in current distros do *not* support `push`ing into a non-bare
+ repo (although this functionaly was added to bleeding edge git). If
+ we're pushing, we want to push into a bare repo
+- For use (e.g. for
+
+We would like to do this (relatively) statelessly, so that if a repo
+exists, we can do an update. If it's up-to-date, we can do nothing. If
+it's not there, we create it.
+
+As of this writing, this is not fully tested. We're going to test more
+fully by finishing the side from where we're orchestrating.
+
+Note that these scripts are designed to be as flexible as possible in terms
+of how a path is specified. E.g.:
+
+ inv init https://gitserver.example.com/a/foo.git
+ inv init /temp/foo
+ inv init foo
+
+Will all do the same thing. They will go into the bare repo path, and crete
+an empty repository called `foo` if one doesn't already exist, ready for
+pushing.
+
+In the future, we should have a desired version and perhaps give warnings if
+the wrong one is used.
+'''
+
+import os
+import os.path
+
+import sys
+
+from invoke import task
+
+
+# We would like to use these on the remote machine, but also on the local
+# machine.
+try:
+ from gitpaths import bare_repopath, working_repopath, gitpath_to_name
+except:
+ from orchlib.gitpaths import bare_repopath, working_repopath, gitpath_to_name
+
+
+@task
+def branch(c, repo, branch):
+ '''
+ Switch to a branch in a repo.
+ '''
+ repo = gitpath_to_name(repo)
+ print("Going to to: ", working_repopath(repo))
+ command = "git checkout "+branch
+ print(command)
+ os.system(command)
+
+
+@task
+def init(c, repo):
+ '''
+ Create a new bare repo, if one does not exist already.
+
+ Otherwise, continue on silently.
+
+ This is for force pushes of remote repos.
+ '''
+ repo = gitpath_to_name(repo)
+ path = bare_repopath(repo)
+ if not path:
+ bare_repopath()
+ command = "git --bare init "+repo
+ print(command)
+ os.system(command)
+ print(bare_repopath(repo))
+
+
+@task
+def cloneupdate(c, fullrepo):
+ '''
+ Clone a remote repo.
+ '''
+ repo = gitpath_to_name(fullrepo)
+ barepath = bare_repopath(repo)
+
+ working_repopath()
+ if not working_repopath(repo):
+ print("Cloning...")
+ command = "git clone "+fullrepo
+ print(command)
+ os.system(command)
+ working_repopath(repo)
+
+ print("Updating all branches")
+ os.system("git fetch --all")
+ os.system("git pull")
+
+@task
+def cloneupdatelocal(c, repo):
+ repo = gitpath_to_name(repo)
+ cloneupdate(c, bare_repopath(repo))
+
+
+@task
+def pull(c, repo):
+ '''
+ Update a repo to the latest version.
+ '''
+ path = working_repopath(repo)
+ command = "git pull --all"
+ print(command)
+ os.system(command)
+ return path
diff --git a/devops/tasks/scripts/baseline_packages.fab b/devops/tasks/scripts/baseline_packages.fab
new file mode 100644
index 000000000..fbd88ece4
--- /dev/null
+++ b/devops/tasks/scripts/baseline_packages.fab
@@ -0,0 +1,4 @@
+cd
+sudo apt-get -y install git ansible awscli
+git clone https://github.com/ETS-Next-Gen/writing_observer.git
+cd writing_observer/devops/ansible ; sudo ansible-playbook local.yaml
diff --git a/devops/tasks/scripts/python_venv.fab b/devops/tasks/scripts/python_venv.fab
new file mode 100644
index 000000000..629c773eb
--- /dev/null
+++ b/devops/tasks/scripts/python_venv.fab
@@ -0,0 +1,7 @@
+cd
+echo . /usr/share/virtualenvwrapper/virtualenvwrapper.sh >> ~/.profile
+source ~/.profile; mkvirtualenv learning_observer
+echo workon learning_observer >> ~/.profile
+source ~/.profile; pip install --upgrade pip
+source ~/.profile; cd writing_observer/ ; pip install -r requirements.txt
+source ~/.profile; cd ~/writing_observer/learning_observer/; python setup.py develop
diff --git a/devops/tasks/scripts/reboot.fab b/devops/tasks/scripts/reboot.fab
new file mode 100644
index 000000000..4abead54f
--- /dev/null
+++ b/devops/tasks/scripts/reboot.fab
@@ -0,0 +1 @@
+sudo init 6
diff --git a/devops/tasks/scripts/update.fab b/devops/tasks/scripts/update.fab
new file mode 100644
index 000000000..96978f997
--- /dev/null
+++ b/devops/tasks/scripts/update.fab
@@ -0,0 +1,5 @@
+sudo apt-get update
+sleep 1
+sudo apt-get -y upgrade
+sleep 1
+sudo apt-get -y dist-upgrade
diff --git a/devops/tasks/settings/README.md b/devops/tasks/settings/README.md
new file mode 100644
index 000000000..55e5591ea
--- /dev/null
+++ b/devops/tasks/settings/README.md
@@ -0,0 +1,2 @@
+Add a file called CREDS.YAML here and add your security tokens. Docs in
+progress
diff --git a/devops/tasks/tasks.py b/devops/tasks/tasks.py
new file mode 100644
index 000000000..9339e993d
--- /dev/null
+++ b/devops/tasks/tasks.py
@@ -0,0 +1,351 @@
+import atexit
+import csv
+import datetime
+import itertools
+import os
+import shlex
+import sys
+
+from invoke import task
+
+import fabric.exceptions
+
+import orchlib.aws
+import orchlib.config
+import orchlib.fabric_flock
+import orchlib.templates
+import orchlib.ubuntu
+import orchlib.repos
+from orchlib.logger import system
+
+import remote_scripts.gitpaths
+
+
+@task
+def list(c):
+ '''
+ Give a human-friendly listing of all provisioned machines
+ '''
+ for instance in orchlib.aws.list_instances():
+ print("{:21} {:21} {:16} {}".format(
+ instance['InstanceId'],
+ instance['Tags']['Name'],
+ instance['PublicIpAddress'],
+ instance['Tags'].get("use", "")
+ ))
+
+
+@task
+def provision(c, machine_name):
+ '''
+ Set up a baseline image with all the packages needed for
+ Learning Observer. Note that this will **not** configure
+ the machine.
+ '''
+ print("Provisioning...")
+ machine_info = orchlib.aws.create_instance(machine_name)
+ print("Updating...")
+ ip = machine_info.public_ip_address
+ print("DNS....")
+ orchlib.aws.register_dns(machine_name, orchlib.config.creds['domain'], ip)
+ print("IP", ip)
+ orchlib.ubuntu.update(ip)
+ print("Baseline...")
+ orchlib.ubuntu.baseline_packages(ip)
+ print("Venv...")
+ orchlib.ubuntu.python_venv(ip)
+
+
+@task
+def update(c):
+ '''
+ Update all machines with the latest systems updates and security
+ patches
+ '''
+ addresses = [i['PublicIpAddress'] for i in orchlib.aws.list_instances()]
+ # Machines without IPs don't get updates
+ addresses = [i for i in addresses if i != "--.--.--.--"]
+ print(addresses)
+ orchlib.ubuntu.run_script("update")(*addresses)
+
+
+@task
+def create(c, machine_name):
+ '''
+ Create a machine end-to-end. This is a shortcut for:
+ * Provision
+ * Configure
+ * Certbot
+ * Download
+ * Reboot
+ '''
+ print("Provisioning EC2 instance")
+ provision(c, machine_name)
+ print("Configuring the Learning Observer")
+ configure(c, machine_name)
+ print("Setting up SSL")
+ certbot(c, machine_name)
+ print("Saving config")
+ downloadconfig(c, machine_name)
+ print("Rebooting")
+ reboot(c, machine_name)
+
+
+@task
+def terminate(c, machine_name):
+ '''
+ Shut down a machine.
+ '''
+ a = input("Are you sure? ")
+ if a.strip().lower() not in ['y', 'yes']:
+ sys.exit(-1)
+ orchlib.aws.terminate_instances(machine_name)
+
+
+@task
+def connect(c, machine_name):
+ '''
+ `ssh` to a machine
+ '''
+ command = "ssh -i {key} ubuntu@{machine_name}".format(
+ key=orchlib.config.creds['key_filename'],
+ machine_name = machine_name+"."+orchlib.config.creds['domain']
+ )
+ print(command)
+ system(command)
+
+
+@task
+def configure(c, machine_name):
+ '''
+ Configure a machine
+ '''
+ group = orchlib.aws.name_to_group(machine_name)
+
+ # We start be setting up `git` repos. This will fail if done later,
+ # since we need these to install pip packages, etc.
+ orchlib.repos.update(group, machine_name)
+
+ # Set up Python packages. We need git repos for this, but we might
+ # want to us these in scripts later.
+ print("Installing Python packages")
+ for package in orchlib.config.config_lines(machine_name, "pip"):
+ group.run("source ~/.profile; pip install {package}".format(
+ package=package
+ ))
+
+ template_config = {
+ "nginx_root_options": "",
+ "hostname": machine_name,
+ "domain": orchlib.config.creds['domain']
+ }
+
+ print("Uploading files")
+ uploads = [
+ l.strip().split(',')
+ for l in itertools.chain(
+ orchlib.config.config_lines(machine_name, "sync.csv"),
+ orchlib.config.config_lines(machine_name, "uploads.csv"),
+ )
+ ]
+ # We should consider switching back to csvreader, so we handle commas in
+ # the description
+ for [local_file, owner, perms, remote_file, description] in uploads:
+ print("Uploading: ", description)
+ remote_path = os.path.dirname(remote_file)
+ group.run("mkdir -p "+remote_path)
+ orchlib.templates.upload(
+ group=group,
+ machine_name=machine_name,
+ filename=local_file,
+ remote_filename=remote_file.format(**template_config),
+ config=template_config,
+ username=owner,
+ permissions=perms
+ )
+
+ for command in open("config/postuploads").readlines():
+ group.run(command.format(**template_config).strip())
+
+
+@task
+def downloadconfig(c, machine_name):
+ '''
+ After setting up certbot, it's helpful to download the nginx config
+ file. We also don't want to make changes remotely directly in deploy
+ settings, but if we have, we want to capture those changes.
+ '''
+ template_config = {
+ "nginx_root_options": "",
+ "hostname": machine_name,
+ "domain": orchlib.config.creds['domain']
+ }
+
+ group = orchlib.aws.name_to_group(machine_name)
+ downloads = [
+ l.strip().split(',')
+ for l in itertools.chain(
+ orchlib.config.config_lines(machine_name, "sync.csv"),
+ orchlib.config.config_lines(machine_name, "downloads.csv"),
+ )
+ ]
+ # We should consider switching back to csvreader, so we handle commas in
+ # the description
+ for [local_file, owner, perms, remote_file, description] in downloads:
+ print("Downloading: ", description)
+ try:
+ orchlib.templates.download(
+ group=group,
+ machine_name=machine_name,
+ filename=local_file,
+ remote_filename=remote_file.format(**template_config)
+ )
+ except fabric.exceptions.GroupException:
+ # This usually means the file is not found. In most cases,
+ # this happens when we've added a new file to the config,
+ # and we're grabbing from an old server.
+ #
+ # We should handle this more gracefully. How is TBD
+ print("Could not download file!")
+
+@task
+def certbot(c, machine_name):
+ '''
+ This sets up SSL. Note that:
+ - SSL will generally NOT work until everything else is set up
+ - This change nginx config. You don't want to override config
+ files later.
+ - This is untested :)
+ '''
+ group = orchlib.aws.name_to_group(machine_name)
+ CERT_CMD = "sudo certbot -n --nginx --agree-tos --redirect " \
+ "--email {email} --domains {hostname}.{domain}"
+ group.run(CERT_CMD.format(
+ email=orchlib.config.creds['email'],
+ hostname = machine_name,
+ domain=orchlib.config.creds['domain']
+ ))
+
+
+@task
+def reboot(c, machine_name):
+ '''
+ Untested: This doesn't seem to work yet....
+ '''
+ print("Trying to reboot... no promises.")
+ orchlib.ubuntu.reboot(machine_name)
+
+
+@task
+def downloadfile(c, machine_name, remote_filename, local_filename):
+ '''
+ Helper to download a single file.
+
+ This is verbose, and doesn't do wildcards. Perhaps better a helper to
+ `scp`? Don't use this in scripts until we've figured this out....
+ '''
+ group = orchlib.aws.name_to_group(machine_name)
+ group.get(
+ remote_filename,
+ local_filename
+ )
+
+
+@task
+def uploadfile(c, machine_name, remote_filename, local_filename):
+ '''
+ Helper to upload a single file.
+
+ This is verbose, and doesn't do wildcards. Perhaps better a helper to
+ `scp`? Don't use this in scripts until we've figured this out....
+ '''
+ group = orchlib.aws.name_to_group(machine_name)
+ group.put(
+ remote_filename,
+ local_filename
+ )
+
+
+@task
+def runcommand(c, machine_name, command):
+ '''
+ Run a remote command. Don't forget quotes!
+ '''
+ group = orchlib.aws.name_to_group(machine_name)
+ group.run(command)
+
+
+@task
+def hello(c):
+ '''
+ For testing!
+
+ For example, hooks.
+ '''
+ print("Hello, world!")
+
+
+@task
+def backup(c, machine_name, target):
+ '''
+ Grab a backup of a given directory by name
+ '''
+ targets = {
+ 'nginx': "/var/log/nginx/",
+ 'certs': "/etc/letsencrypt/"
+ }
+
+ if target not in targets:
+ print("Invalid target. Should be one of:")
+ print("\n".join(targets))
+ sys.exit(-1)
+
+ ts = datetime.datetime.utcnow().isoformat().replace(":", "-")
+ filebase = "{ts}-{mn}-{tg}".format(
+ ts=ts,
+ mn=machine_name,
+ tg=target
+ )
+
+ command = "tar /tmp/{filebase} {target}".format(
+ filebase=filebase,
+ target=target
+ )
+
+ group = orchlib.aws.name_to_group(machine_name)
+ group.get(
+ remote_filename,
+ local_filename
+ )
+
+
+@task
+def commit(c, msg):
+ '''
+ This should probably not be a task but a utility function. It's
+ helpful for debuggin, though.
+ '''
+ system(
+ "cd {gitpath} ; git add -A; git commit -m {msg}".format(
+ gitpath=orchlib.config.creds["flock-config"],
+ msg=msg
+ )
+ )
+
+
+START_TIME = datetime.datetime.utcnow().isoformat()
+
+def committer():
+ '''
+ On exit, commit changes to repo. This code is not finished.
+ '''
+ command_options = shlex.quote(" ".join(sys.argv))
+ stop_time = datetime.datetime.utcnow().isoformat()
+ log = {
+ 'start_time': START_TIME,
+ 'stop_time': stop_time,
+ 'command_options': command_options
+ }
+
+
+atexit.register(committer)
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 000000000..ca86761b2
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,16 @@
+version: '3.8'
+services:
+ app:
+ build: .
+ volumes:
+ - ./:/app
+ image: learning-observer-image:latest
+ stdin_open: true # Keep standard input open
+ tty: true # Allocate a pseudo-TTY
+ restart: always
+ ports:
+ - 8888:8888
+ depends_on:
+ - redis
+ redis:
+ image: redis:latest
diff --git a/docs/_images/block.png b/docs/_images/block.png
new file mode 100755
index 000000000..734a1c4c8
Binary files /dev/null and b/docs/_images/block.png differ
diff --git a/docs/_images/block.svg b/docs/_images/block.svg
new file mode 100755
index 000000000..9eda47d49
Binary files /dev/null and b/docs/_images/block.svg differ
diff --git a/docs/_images/lo_block.png b/docs/_images/lo_block.png
new file mode 100755
index 000000000..cb5bb92c4
Binary files /dev/null and b/docs/_images/lo_block.png differ
diff --git a/docs/_images/lo_block.svg b/docs/_images/lo_block.svg
new file mode 100755
index 000000000..df6dd3236
Binary files /dev/null and b/docs/_images/lo_block.svg differ
diff --git a/docs/_images/mmnd.png b/docs/_images/mmnd.png
new file mode 100755
index 000000000..360459dcd
Binary files /dev/null and b/docs/_images/mmnd.png differ
diff --git a/docs/_images/mmnd.svg b/docs/_images/mmnd.svg
new file mode 100755
index 000000000..24b0a6d73
Binary files /dev/null and b/docs/_images/mmnd.svg differ
diff --git a/docs/backlog.md b/docs/backlog.md
new file mode 100644
index 000000000..e2b7de766
--- /dev/null
+++ b/docs/backlog.md
@@ -0,0 +1,59 @@
+Project Backlog
+===============
+
+* Figure out why LO doesn't start on reboot, or how to make it restart
+ on crashes
+* Figure out if/when document ID is missing
+* Switch to the annotated canvas
+* Be able to pull a document associated with a specific assignment in
+ Google Classroom
+* Implement roll-offs for whole-document operations (e.g. long-running
+ NLP operations, which should be run periodically)
+ - Implement simple algorithm, comment on complex algorithms
+
+Robustness
+----------
+
+* Confirm what happens with students working in groups
+* How do we capture formatting?
+* How do we handle an Outline view?
+* What happens with large documents?
+* What happens with editing outside of the system
+
+Plumbing
+-------
+
+* Robust queues client-side
+* Client auth/auth
+* Handle server disconnects
+* Proper test frameworks
+ - Replay
+* Refactor rosters
+
+Additional features
+-------------------
+
+* How do we handle peer groups?
+* Create more dashboards
+1. Flagging students in need of help?
+2. Providing information about use of academic vocabulary?
+
+APIs
+----
+
+* Generate dashboards with generic aggregate operations
+* Handle client config robustly
+* Figure out how to integrate slooow NLP algorithm calls into the
+ real-time server architecture
+
+Logging
+-------
+
+* Implement robust data store
+
+Scaling
+-------
+
+* Database / database schemas for user management if we wish to move
+ beyond pilot
+* Online settings management?
\ No newline at end of file
diff --git a/docs/concepts/architecture.md b/docs/concepts/architecture.md
new file mode 100644
index 000000000..084a65170
--- /dev/null
+++ b/docs/concepts/architecture.md
@@ -0,0 +1,74 @@
+# Architecture
+
+Piotr Mitros
+
+## Introduction
+
+Like all such documents, this document should be taken with a grain of
+salt. It my be out-of-date, or not fully implemented.
+
+## Overview
+
+1. Events come from a web browser over a web socket.
+2. The server performs a reduce operation of some kind on these
+ events. This operation maintains a per-student state (for each
+ plug-in) inside of a KVS.
+3. A subset of the internal state is used to compute state as sent to
+ an educator dashboard.
+4. Whenever an event is processed, consumers are notified via a pubsub.
+5. Consumers can aggregate these notifications, inspect the external state,
+ and make a dashboard.
+
+## Application platform structure
+
+Learning Observer acts as the shared platform that hosts and coordinates
+modules. The core `learning_observer` package owns the boot process: it
+loads configuration, establishes connections to databases and pub/sub
+systems, and exposes the APIs modules use to register reducers,
+dashboards, and other artifacts. Individual modules focus on defining
+those artifacts, relying on the platform to handle data ingestion and
+communication so new functionality can be added without duplicating the
+runtime infrastructure.
+
+### Technology choices
+
+1. Generic student information (e.g. names, auth, etc.) cn live in
+ flat files on the disk, sqlite, or postgres. As of this writing, this
+ is not built.
+2. The KVS for the reduce can either be an in-memory queue or
+ redis. Redis can be persistent (for deploy) or ephemeral (for
+ development). As of this writing, all three work.
+3. The pub-sub can be an in-memory queue (for development), redis (fo
+ easy deployment), or xmpp (for scalable deployment). As of this writing,
+ all three work, but xmpp is buggy/unfinished.
+4. The front-end uses bulma and d3.js.
+
+### Architectural Constraints
+
+1. By design, this system should be in a usable (although not
+ necessarily scalable or reliable) state with just a `pip
+ install`. There should be no complex webs of dependencies.
+2. However, there can be a complex web of dependencies for robust,
+ scalable deployment. For example, we might use an in-memory
+ queue in a base system, and switch to a high-performance data
+ store for deployment.
+3. For most abstractions, we want to initially build 2-3 plug-ins. For
+ example, we're initially building this with 2-3 streaming
+ modules. We support 2-3 pubsubs, and 2-3 key-balue stores. This is
+ enough to, in most cases, guarantee the abstractions aren't
+ specific to one thing. However, it's small enough we can change
+ both sides of an API boundary.
+4. Once we know we have the right set of abstractions, we can open up
+ the flood gates to more plug-ins.
+
+### Process constraints
+
+It's better to say "no" to a feature than to break the
+architecture. We're in this for the long haul. It's okay to have
+scaffolding, though. Half-built things are okay if they're in the
+right place, and can be incrementally evolved to be right.
+
+We try to avoid any technical debt which carries high interest (higher
+maintenance costs down the line) -- bad APIs, etc. We don't mind
+low-interest technical debt nearly as much (things which need to get
+finished later, but won't blow up).
diff --git a/docs/concepts/auth.md b/docs/concepts/auth.md
new file mode 100644
index 000000000..a919799e6
--- /dev/null
+++ b/docs/concepts/auth.md
@@ -0,0 +1,114 @@
+Authentication Framework
+========================
+
+We have two types of authentication:
+
+* We would like to know that events coming into the system are coming
+ from where we believe they are.
+* We would like to know that users log into the system who can view
+ student data are who we think they are.
+
+For the most part, these have very different security profiles. If a
+user can spoof events, the worst-case outcome is:
+
+* A disrupted study
+* A disrupted teacher dashboard
+
+In small-scale studies, demos, and similar, a high level of security
+is not required, especially when running on `localhost`, VPN, or in an
+IP-restricted domain.
+
+On the other hand, we **cannot** leak student data. Authenticating
+teachers and staff requires a high level of security.
+
+Event authentication
+--------------------
+
+Events are authenticated in the file `events.py`. This is
+semi-modular. We have several authentication schemes, most of which
+rely on a special header. We used to include auth information with
+each event, and we have some backwards-compatibility code there as
+well.
+
+Event authentication isn't super-modular yet; it's all in one file,
+but the schemes are pluggable. Schemes include:
+
+* `guest`. Each session is assigned a unique guest ID. This is nice
+ for demos, studies, and coglabs.
+* `local_storage`. Designed for Chromebooks. Each user is given a
+ unique token, usually stored in the extension's local storage. The
+ header sends a unique, secure token mapping to one user.
+* `chromebook`. The Chromebook sends a user ID. This is *not secure*
+ and vulnerable to spoofing. It can be combined with `local_storage`
+ to be secure.
+* `hash_identify`. User sends an identity, which is not
+ authenticated. This is typically for small coglabs, where we might
+ have a URL like `http://myserver/user-study-5/#user=zihan`
+* `testcase_auth`. Quick, easy, and insecure for running testcases.
+
+We do maintain providence with events, so we can tell which ones came
+from secure or insecure sources.
+
+We need Google OAuth.
+
+Teacher authentication
+----------------------
+
+As authentication schemes, we support:
+
+* Password authentication
+* Trusting HTTP basic auth from nginx
+* Google OAuth
+
+We need to be particularly careful with the second of
+these. Delegating authentications to `nginx` means that we need to
+have nginx properly configured, or we can be attacked.
+
+User authentication is intended to be fully modular, and we intend to
+support more schemes in the future. Right now, each scheme is in its
+own file, with `handlers.py` defining a means to log users in, out, as
+well as a middleware which annotates the request with user
+information.
+
+Session framework
+-----------------
+
+We keep track of users through
+[aiohttp_session](https://aiohttp-session.readthedocs.io/en/stable/). We
+store tokens encrypted client-side, which eliminates the needed for
+database fields.
+
+User information
+----------------
+
+We keep track of user information in a dictionary. Eventually, this will
+probably be a dictionary-like object.
+
+Current fields:
+
+* `name`: We keep full name, since not all languages have a first name /
+ last name order and breakdown.
+* `nick`: Short name. For a teacher, this might be "Mrs. Q" or "李老师."
+ For a student, this might by "Timmy." In the future, we might think
+ through contexts and relationships (e.g. a person might be a teacher,
+ a coworker, and student)
+* `user_id`: Our internal user ID. In most cases, this is the authentication
+ scheme, followed by the ID within that scheme. For example, Google user
+ 1234, we might call 'gc-1234.' Test case user 65432, we might call
+ `tc-65432`
+* `safe_user_id`: An escaped or scrubbed version of the above. In some cases,
+ we have data from unauthenticated sources, and we don't want injection
+ attacks. There is an open question as to which of these is canonical,
+ and whether these ought to be swapped (e.g. `source_user_id` and
+ `user_id`). It depends on where we do JOIN-style operations more often.
+* `picture`: Avatar or photo for the user
+* `google_id`: Google username
+
+We will want to think about how to handle joins. Users often have multiple
+accounts which should be merged:
+
+* A user signs up through two mechanisms (e.g. signs up with passwords and
+ then Google)
+* Users are autoenrolled (e.g. through two educational institutions)
+* Automatic accounts convert into permanent accounts (e.g. data begins
+ streaming in for an unauthenticated / guest user)
diff --git a/docs/concepts/communication_protocol.md b/docs/concepts/communication_protocol.md
new file mode 100644
index 000000000..3738286a4
--- /dev/null
+++ b/docs/concepts/communication_protocol.md
@@ -0,0 +1,131 @@
+# Communication Protocol
+
+The communication protocol is Learning Observer's query and transport
+layer. It allows dashboards, notebooks, and other clients to request
+aggregated data from the key-value store and supporting services by
+submitting a declarative *execution DAG* (directed acyclic graph). The
+server evaluates the DAG node-by-node, resolves the required
+parameters, executes reducers or helper functions, and returns the
+assembled result. This document explains how that process fits
+together, the core building blocks you can use in a query, and the
+helper utilities that make it easier to integrate those queries into
+applications.
+
+## Lifecycle of a Request
+
+1. **Query construction** - A client builds a nested query description
+ in Python (or another language) with the helpers in
+ `learning_observer.communication_protocol.query`. The helpers mirror
+ relational concepts such as parameters, joins, and projections and
+ produce JSON-serialisable dictionaries. (See: query.py L1-L123)
+2. **Flattening** - Before execution, the DAG is normalised so every
+ node has a unique identifier and can reference other nodes via
+ `variable` pointers. The `flatten` utility rewrites nested
+ structures such as `select(keys(...))` into separate nodes to make
+ evaluation straightforward. (See: util.py L1-L59)
+3. **Execution** - The executor walks the flattened DAG, dispatching
+ each node type to a registered handler. Nodes can call Python
+ functions, fetch keys from the key-value store, join intermediate
+ datasets, or map functions across collections. The executor
+ assembles the final payload and enforces error handling through the
+ `DAGExecutionException` type. (See: executor.py L1-L145, L147-L220)
+4. **Exports** - Queries expose named *exports* that identify the DAG
+ nodes clients may request. The integration layer can bind those
+ exports to callables so dashboards or notebooks can invoke them as
+ regular async functions. (See: util.py L64-L104, integration.py L38-L102)
+
+This flow supports both server-defined queries and open-ended
+exploration. Production deployments typically offer curated, predefined
+queries while development tooling exposes the full language for
+experimentation. (See: README.md L11-L36)
+
+## Core Node Types
+
+Every node in the execution DAG has a `dispatch` type that determines
+how the executor evaluates it. The query helper functions generate the
+correct shape for each node type. (See: query.py L19-L123) The most common nodes are:
+
+- **`parameter`** - Declares a runtime argument. Parameters can be
+ required or optional, and the executor substitutes provided values or
+ defaults before downstream nodes run. (See: query.py L33-L42, executor.py L114-L144)
+- **`variable`** - References the output of another node in the DAG.
+ These indirections are automatically inserted during flattening but
+ can also be used explicitly when wiring complex queries. (See: query.py L45-L52, util.py L13-L61)
+- **`call`** - Invokes a published Python function on the server.
+ Functions are registered with `publish_function`, which ensures every
+ callable has a unique name. Called functions may be synchronous or
+ asynchronous; the executor awaits results as needed. (See: query.py L55-L67, executor.py L61-L112, integration.py L21-L47)
+- **`keys`** - Produces the key descriptions required to fetch reducer
+ outputs from the key-value store. Keys nodes typically wrap the
+ outputs of roster or metadata queries so downstream `select` nodes
+ can retrieve the associated reducer documents. (See: query.py L114-L123, util.py L72-L102)
+- **`select`** - Retrieves documents from the key-value store for the
+ provided keys. You can request all fields or limit to specific
+ projections via `SelectFields` enumerations. (See: query.py L70-L83)
+- **`join`** - Merges two lists of dictionaries on matching keys using
+ dotted-path lookups. Left rows are preserved even without a matching
+ right-hand record, making it straightforward to enrich reducer
+ outputs with roster data. (See: query.py L86-L96, executor.py L147-L220)
+- **`map`** - Applies a published function to each value in a list,
+ optionally in parallel, returning the transformed collection. This is
+ useful for server-side post-processing or feature extraction before a
+ result is exported. (See: query.py L99-L111)
+
+## Building Queries Efficiently
+
+Writing DAGs by hand is verbose, so the protocol provides shorthands
+for common access patterns. For example,
+`generate_base_dag_for_student_reducer` returns an execution DAG that
+retrieves the latest reducer output for every student in a course,
+including roster metadata and a preconfigured export entry. Dashboards
+use this helper to quickly expose reducer results without writing the
+full DAG each time. (See: util.py L63-L101)
+
+The `integration` module can also bind exports directly to a module so
+code can call `await module.student_event_counter_export(course_id=...)`
+instead of manually constructing requests. This keeps the protocol's
+flexibility while offering ergonomic entry points for UI
+components. (See: integration.py L49-L102)
+
+## WebSocket Endpoint
+
+Dashboards and other clients interact with the communication protocol
+through a dedicated WebSocket endpoint exposed at
+`/wsapi/communication_protocol`. The aiohttp application wires that path
+to `websocket_dashboard_handler`, making the protocol available to
+browser sessions and backend consumers alike. (See: learning_observer/routes.py L195-L213)
+
+When a client connects, the handler waits for a JSON payload describing
+one or more queries. Each entry typically includes the flattened
+`execution_dag`, a list of `target_exports` to stream, and optional
+`kwargs` that provide runtime parameters. Whenever the client submits a
+new payload, the server builds the requested DAG generators, executes
+them, and schedules reruns based on the provided settings. Responses are
+batched into arrays of `{op, path, value}` records so the client can
+efficiently apply partial updates to its local state. (See: learning_observer/dashboard.py L331-L411)
+
+## Tooling and Debugging
+
+Two exploratory tools live alongside the protocol implementation:
+
+- `debugger.py` - Provides an interface for submitting ad-hoc queries
+ and inspecting intermediate results.
+- `explorer.py` - Lists predefined queries already published on the
+ server so you can execute them interactively.
+
+Because the protocol is evolving, these tools occasionally require
+updates when the underlying schema changes. Keeping the communication
+protocol documented and covered by tests makes it easier to spot and
+fix those regressions quickly. (See: README.md L45-L72)
+
+## Security Considerations
+
+Production deployments default to predefined queries so clients can
+only request vetted datasets. Open-query mode should be restricted to
+trusted environments—such as local notebooks or read replicas—because
+it allows arbitrary function calls and joins that may expose sensitive
+information or stress backing stores. (See: README.md L11-L36)
+
+Understanding these concepts makes it easier to extend the protocol,
+design new reducers, and reason about the performance characteristics
+of dashboards built on Learning Observer.
diff --git a/docs/concepts/events.md b/docs/concepts/events.md
new file mode 100644
index 000000000..d4b1d12d2
--- /dev/null
+++ b/docs/concepts/events.md
@@ -0,0 +1,162 @@
+# Event Format
+
+Our event format is inspired in part by IMS Caliper, xAPI/Tincan, and the edX
+tracking log events. None of these standards are quite right for our
+application, but several are close. They're pretty good standards!
+
+## Limitations of Industry Formats
+
+* **Verbosity.** Both Caliper and xAPI require a lot of cruft to be appended to
+ events. For example, we have random GUIDs, URLs, and other redundancy on each
+ event. Having a little bit of context (e.g. a header) or a little rationale
+ (e.g. IDs which point into a data store) is sometimes good, but too much is a
+ problem. With too much redundancy, events can get massive:
+ * Our speed in handling large data scales with data size. Megabytes can be
+ done instantly, gigabytes in minutes, and terabytes in hours. Cutting data
+ sizes makes working with data easier.
+ * Repeating oneself can lead to inconsistent data. Data formats where data
+ goes in one place (or where redundancy is intentional and engineered for
+ data correction) are more robust and less bug-prone.
+* **Envelopes.** Caliper payloads are bundled in JSON envelopes. This is a
+ horrible format because:
+ * It results in a lot of additional parsing...
+ * ... of very large JSON objects.
+ * If there's an error or incompatibility anywhere, you can easily lose a
+ whole block of data.
+ * You can't process events in realtime, for example, for formative feedback.
+
+Text files with one JSON event per line are more robust and more scalable:
+
+* They can be processed as a stream, without loading the whole file.
+* Individual corrupt events don't break the entire pipeline-you can skip bad
+ events.
+* They can be streamed over a network.
+* They can be preprocessed without decoding. For example, you can filter a file
+ for a particular type of event, student ID, or otherwise with a plain text
+ search. The primary goal of first-stage preprocessing is simply to quickly cut
+ down data size, so it doesn't need to reject 100% of irrelevant events.
+
+* **Details.** In many cases, the details of a format are inappropriate for a
+ given purpose. There are event types which are in neither Tincan/xAPI nor
+ Caliper, and don't fit neatly into their frameworks. For example:
+ * Formats specify timestamps with great precision, while coarse events (such
+ as a student graduating) don't maintain that precision.
+ * In one of our clients, events are generated without a user identifier, which
+ is then added by the server once the user is authenticated. For these
+ events, validation fails.
+ * Related to the above, fields are sometimes repeated (e.g. client-side
+ timestamp, server-side timestamp, and further timestamps as the event is
+ processed by downstream systems). Much of this fits into security;
+ downstream systems should not trust data from upstream systems. For example,
+ a student shouldn't be able to fake submitting a homework assignment earlier
+ than they did, and a school should not be able to backdate a state exam
+ response.
+
+There are similar minor mismatches for group events, very frequent events (such
+as typing), and other types of events not fully anticipated when the standards
+were created.
+
+I'd like to emphasize that, in contrast to most industry formats, these are
+quite good. They're not fundamentally broken.
+
+## How We'd Like to Leverage Industry Formats
+
+Fortunately, we don't need 100% compatibility for pretty good interoperability.
+Our experience is that event formats are almost never interchangeable between
+systems; even with standardized formats, the meaning changes based on the
+pedagogical design. This level of compatibility is enough to give interoperability
+without being constrained by details of these formats.
+
+Our goal is to be compatible where convenient. Pieces we'd like to borrow:
+
+* Critically, the universe has converged on events as JSON lines. This already
+ allows for common data pipelines.
+* We can borrow vocabulary-verbs, nouns, and similar.
+* We can borrow field formats, where sensible.
+
+With this level of standardization, adapting to data differences is typically
+already less work than adapting to differences in underlying pedagogy.
+
+## Where We Are
+
+We have not yet done more careful engineering of our event format. Aside from a
+JSON-event-per-line, the above level of compatibility is mostly aspirational.
+
+## Incoming Event Flow
+
+Incoming events reach the Learning Observer through `/wsapi/in/`, which
+establishes a long-lived websocket for each client session. The websocket stream
+is processed through a series of generators that progressively enrich and
+validate each message before it reaches reducers.
+
+1. **Initial decode and logging.** Every websocket frame is decoded by
+ `event_decoder_and_logger`, which writes the raw payloads to per-session log
+ files. When the Merkle feature flag is enabled, the same routine also commits
+ events to the Merkle store using the configured backend. This stage ensures
+ that we always have an immutable audit log of the stream.
+2. **Lock fields and metadata.** Clients typically send a `lock_fields` event
+ first to declare metadata such as the `source`, `activity`, or other
+ immutable context. These fields are cached and injected into subsequent
+ events so downstream reducers receive consistent metadata. Server-side
+ metadata like IP and user agent is added separately via `compile_server_data`
+ and cannot be spoofed by the client.
+3. **Authentication.** The pipeline buffers events until
+ `learning_observer.auth.events.authenticate` confirms the session. Successful
+ authentication attaches the derived `auth` context-containing identifiers
+ like the `user_id`-to each event before it continues. The websocket
+ acknowledges authentication so the client can react if credentials are
+ missing or invalid.
+4. **Protection stages.** Events flow through guardrails that:
+ * stop processing on an explicit `terminate` event and close the associated
+ log files,
+ * block sources that appear on the runtime blacklist, notifying the client
+ when a block occurs, and
+ * handle optional blob storage interactions (`save_blob` and `fetch_blob`)
+ that reducers can request.
+5. **Reducer preparation.** After authentication and metadata are in place we
+ call `handle_incoming_client_event`. This builds a pipeline from the declared
+ client `source`. Each source maps to a set of stream analytics modules that
+ expose coroutine reducers. Reducers are partially applied with metadata
+ (including the authenticated user) so they can maintain per-student state.
+6. **Reducer execution.** Every canonicalized event passes through the prepared
+ reducers. Events are logged a second time-now with server metadata and
+ authentication context-and reducers update their internal state. If the
+ reducer definitions change during a session (e.g. due to a hot reload in
+ development) the pipeline is rebuilt on the next event.
+
+This staged processing allows us to maintain separate concerns for logging,
+authentication, safety checks, and analytics while keeping the event format
+itself lightweight. Clients only need to agree on the JSON structure of events,
+while the server handles durability and routing responsibilities on their
+behalf.
+
+## Configuring domain-based event blacklisting
+
+Incoming events can be blacklisted through PMSS rules so that specific domains
+either continue streaming, are told to retry later, or drop events entirely.
+The `blacklist_event_action` setting controls the action and defaults to
+`TRANSMIT`. Define rules under the `incoming_events` namespace and include a
+`domain` attribute to scope the behavior per organization. When the action is
+`MAINTAIN`, the `blacklist_time_limit` setting controls whether the client
+should wait a short time or stop sending forever.
+
+```pmss
+incoming_events {
+ blacklist_event_action: TRANSMIT;
+}
+
+incoming_events[domain="example.org"] {
+ blacklist_event_action: DROP;
+}
+
+incoming_events[domain="pilot.example.edu"] {
+ blacklist_event_action: MAINTAIN;
+ blacklist_time_limit: DAYS;
+}
+```
+
+When a client connects, the server extracts a candidate domain from the event
+payload and uses it to resolve the `blacklist_event_action` setting. If
+a rule returns `DROP`, the client is instructed to stop sending events.
+`MAINTAIN` asks the client to retain events and retry after a delay (as defined
+by `blacklist_time_limit`), while `TRANSMIT` streams events normally.
diff --git a/docs/concepts/history.md b/docs/concepts/history.md
new file mode 100644
index 000000000..5b3e89e97
--- /dev/null
+++ b/docs/concepts/history.md
@@ -0,0 +1,29 @@
+History
+=======
+
+Second prototype
+-------
+
+The second prototype integrated with Google Classroom, and presented a
+(less pretty, more cluttered) consolidated view with:
+
+* Current student typing
+* Time-on-task, idle time, and text length
+
+First prototype
+-------
+
+Our first version of the tool was a UX mockup (with real front-end
+code, but no backend). We had five tabs, of which two are shown. The
+typing view showed a block of text around the student cursor in
+real-time
+
+The outline view showed section titles, and how much text students had
+written in each section.
+
+In addition, we had a view which showed summary stats (e.g. amount of
+text written), contact info for students, as well as a visualization
+of the students' writing process. Teachers wanted a streamlines view
+which showed just text around the cursor and three of the summary
+stats (amount of text written, idle time, and time-on-task). Most of
+the other stuff felt like too much.
diff --git a/docs/concepts/key_value_store.md b/docs/concepts/key_value_store.md
new file mode 100644
index 000000000..5d994724f
--- /dev/null
+++ b/docs/concepts/key_value_store.md
@@ -0,0 +1,107 @@
+# Key-value store
+
+Learning Observer reducers and dashboards communicate through a key-value store (KVS).
+Reducers write internal state and dashboard-facing summaries to the store, while
+queries and presentation layers read those JSON blobs back. The
+[`learning_observer.kvs` module](../../learning_observer/learning_observer/kvs.py)
+wraps the different storage backends behind a common async API.
+
+## Router and lifecycle
+
+The `KVSRouter` constructed during startup owns every configured backend. When
+`learning_observer.prestartup` runs it reads the `kvs` section from system
+settings, instantiates the requested implementations, and exposes them through
+the module-level `KVS` callable. Most code imports `learning_observer.kvs.KVS`
+and invokes it to obtain the default backend:
+
+```python
+from learning_observer import kvs
+
+store = kvs.KVS() # returns the default backend configured in settings
+value = await store[key]
+```
+
+Reducers obtain the store implicitly through decorators such as
+`kvs_pipeline` and `student_event_reducer`. Those helpers capture the module
+context, derive the reducer keys, and persist the reducer's internal and
+external state back to the configured KVS.
+
+The router also exposes named backends as attributes. If a module needs to
+store data in a non-default backend it can call `kvs.KVS.()` where
+`` matches the identifier from configuration.
+
+## Configuring backends
+
+The `kvs` block in `creds.yaml` (or an equivalent PMSS overlay) declares each
+backend. Settings accept either a mapping or an array of key/value tuples.
+Every entry must provide a `type` that matches one of the built-in
+implementations:
+
+```yaml
+kvs:
+ default:
+ type: filesystem
+ path: .lo_kvs
+ redis_cache:
+ type: redis_ephemeral
+ expiry: 30
+```
+
+During startup the router validates the configuration and raises a
+`StartupCheck` error if a backend is missing required parameters or references
+an unknown type. Once the process finishes booting, the resulting callable is
+available to the rest of the system as `learning_observer.kvs.KVS`.
+
+### Supported types
+
+| Type | Class | Persistence behavior | Required settings |
+|------------------|-----------------------------|------------------------------------------------------------------------|-----------------------------------------------|
+| `stub` | `InMemoryKVS` | Data lives only in process memory and disappears on restart. | None |
+| `redis_ephemeral`| `EphemeralRedisKVS` | Uses Redis with a per-key TTL for temporary caches. | `expiry` (seconds) |
+| `redis` | `PersistentRedisKVS` | Stores data in Redis without an expiry; persistence depends on Redis. | Redis connection parameters in system config. |
+| `filesystem` | `FilesystemKVS` | Serializes each key to JSON on disk for simple local persistence. | `path`; optional `subdirs` boolean |
+
+All backends share the async API: `await store[key]`, `await store.set(key, value)`,
+`await store.keys()`, and `await store.dump()` for debugging.
+
+### Filesystem layout
+
+The filesystem implementation writes JSON documents under the configured path.
+If `subdirs` is true it mirrors slash-separated key prefixes into nested
+folders while prefixing directory names with underscores to avoid collisions.
+This backend is convenient for workshops or debugging because state survives
+restarts as long as the directory remains intact, but it is not designed for
+large-scale deployments.
+
+### Redis variants
+
+Both Redis implementations rely on the shared connection utilities in
+`learning_observer.redis_connection`. The ephemeral variant requires an
+`expiry` value so it can set a TTL when calling `SET`, making it suitable for
+integration tests or scratch environments. The persistent variant omits the TTL
+so keys remain until explicitly deleted or until the Redis server evicts them.
+Ensure the Redis instance has persistence enabled (`appendonly` or RDB
+snapshots) if the deployment expects reducer state to survive reboots.
+
+### In-memory stub
+
+The stub backend keeps a Python dictionary in memory. It is useful for unit
+tests or prototype scripts but should not be used when the process restarts or
+scales across workers. The module exposes a `clear()` helper to wipe the store
+between tests.
+
+## Working with reducer state
+
+Reducer keys follow the pattern `,,` where the scope
+captures whether the state is internal or external, the module identifies the
+producer, and the selector encodes the entity (for example, a student ID). When
+reducers process new events they read the previous state from the KVS, compute
+an updated value, and call `set()` to write it back. Dashboards and protocol
+adapters then fetch the external state by constructing the same key or by using
+higher-level query helpers that wrap the KVS API.
+
+If a dashboard appears empty after restarting the server, confirm which backend
+is active. In-memory and ephemeral Redis stores start empty on boot, so the
+system needs a fresh stream of events to repopulate reducer state. Filesystem
+and persistent Redis backends retain data unless their underlying storage was
+cleared.
diff --git a/docs/concepts/privacy.md b/docs/concepts/privacy.md
new file mode 100644
index 000000000..05a7649a0
--- /dev/null
+++ b/docs/concepts/privacy.md
@@ -0,0 +1,292 @@
+# Privacy
+
+Piotr Mitros
+
+**Disclaimer:** This is a work-in-progress. It is a draft for
+discussion. It should not be confused with a legal document (policy,
+contract, license, or otherwise). It has no legal weight. As of the
+time of this disclaimer, has not been reviewed by anyone other than
+Piotr Mitros, who does not speak for either ETS or anyone else. I'm
+soliciting feedback fron collaborators (hence, it is here), but it
+shouldn't be confused with any sort of policy (yet). It's a working
+document to figure stuff out.
+
+This was written when we were sprinting to respond to COVID19 remote
+learning in spring 2020.
+
+## Introduction
+
+It is our goal to treat educational data with a hybrid model between
+that of data as
+[public good](https://en.wikipedia.org/wiki/Public_good_(economics))
+and that of personal data belonging to the individuals to whom the data
+pertains. Specifically, we would like to balance the demands of:
+
+* Family rights and student privacy;
+* Transparency and the use of data for advancement of public policy
+ and education; and
+* Scientific integrity and replicability
+
+This approach contrasts with the current industry trend of treating
+student data as a proprietary corporate resource for the maximization
+of profit.
+
+These are fundamental tensions between the demands on any student data
+framework. For example, family rights and student privacy suggest that
+we should remove student data when asked. On the other hand,
+scientific replicability suggests that we maintain data which went
+into experiments so people can independently confirm research results.
+
+Building out both the technology and legal frameworks to do this will
+take more time than possible for a pilot project. Until we have built
+out such frameworks, student data should be governed by a standard
+research Privacy framework, along the lines of what's outlined below.
+
+If and when appropriate frameworks are available, we hope to
+transition and extended research privacy framework described
+below. Our thoughts was that we would define a set of guiding
+principles and boundaryies right now. If we can find a way to respect
+those (build out computer code, legal code, and funding), we would
+transition over to this, notifing schools and/or families, giving an
+opportunity to opt-out. Should we be unable to implement this
+framework within five years, or should we decide to build a different
+privacy framework, student data will move over only on an opt-in basis.
+
+## Standard Research Privacy Framework
+
+In the short term, this dashboard is intended to address immediate
+needs related to COVID19. During the sprint to bring this dashboard to
+a pilot, we cannot build out the legal and technical frameworks for
+student data management and family rights (e.g. to inspect and erase
+data). We would initially use a simple, conservative data policy:
+
+* Until and unless we have the framework described below (“Extended
+ Framework”) in place, all student data will be destroyed at most
+ five years after it was collected.
+* The data will live on secure machines controlled by the research
+ team (currently, ETS and NCSU).
+* For the purposes of this project, we can and will share student data
+ with the student's school. Beyond the school, the parents, and the
+ student, we would not share your data with anyone outside of the
+ research team, except as required by law (e.g. in the case of
+ subpoenas or law enforcement warrants).
+* We may perform research and publish based on such data, but only to
+ the extent that any published results are aggregate to such a level
+ that it is impossible to re-identify students.
+
+## Extended Research Privacy Framework
+
+In order to keep data beyond the five-year window, we would have
+technological and organizational frameworks to provide for:
+
+1. The right of guardians (defined below) to inspect all student data.
+2. The right of guardians to have student data removed upon request.
+3. The right of guardians to understand how data used, both at
+ a high-level and a a code level.
+4. Reasonable and non-discriminatory access to deidentified data with
+ sufficient protections to preserve privacy (for example, for
+ purposes such as research on how students learn or policy-making
+5. Transparent and open governance of such data
+6. Checks-and-balances to ensure data is managed primarily for the
+ purpose of helping students and student learning (as opposed to
+ e.g. as a proprietary corporate resource)
+7. An opportunity for guardians to review these frameworks, and to
+ opt-out if they choose.
+8. Review by the ETS IRB.
+
+Helping students is broadly defined, and includes, for example:
+
+1. Driving interventions for individual students (for example,
+ student and teacher dashboards)
+2. Allowing interoperability of student records (for example, if a
+ student completes an assignment in one system, allowing another
+ system to know about it).
+3. Research for the purpose of school improvements (for example,
+ providing for insights about how students learn, or how different
+ school systems comparea, in ways analogous to state exams, NAEP, or
+ PISA).
+
+It does not include advertising or commercial sale of data (although
+it does include basic cost recovery, for example on a cost-plus
+basis).
+
+Depending on context, 'guardian' may refer to:
+
+1. The student who generated the data;
+2. The student's legal parent/guardian; or
+3. The student's school / educational institution (for example, acting
+ as the parent/guardian's agent, as per COPPA)
+
+We would reserve the right to make the determination of who acts as
+the guardian at our own judgement, based on the context.
+
+## Any other changes
+
+Any changes to the privacy policy which do not follow all of the above
+would require affirmative **opt-in** by the guardian.
+
+## Rationale and Discussion
+
+To help contextualize and interpret the above policies.
+
+### Definitions of Deidentification, anonymization, and aggregation
+
+* Student data is **deidentified** by removing obvious identifiers,
+ such as names, student ID numbers, or social security numbers. Note
+ that deidentified learning data can often be reidentified through
+ sophisticated algorithms, for example comparing writing style,
+ skills, typing patterns, etc., often correlating with other
+ sources. Although such techniques are complex, they tend to be
+ available as automated tools once discovered.
+
+* Student data is **anonymized** by removing any data from which a
+ student may be reidentified. Anonymization involves sophisticated
+ techniques, such as the use of protocols like k-anonymity/
+ l-diversity, or maintaining privacy budgets.
+
+* Student data may be **aggregated** by providing statistics about
+ students, for example in the form of averages and standard
+ deviations. Some care must still be maintained that those
+ aggregations cannot be combined to make deductions about individual
+ students.
+
+For learning data, simple deidentification **cannot** be counted on to
+provide security. With data of any depth, it is possible to
+re-identify students. However, such obfuscation of obvious identifiers
+can still significant reduce risk in some contexts since it prevents
+casual, careless errors (such as a researcher accidentally including
+the name of a student in a paper, or chancing upon someone they know
+in a dataset). With obfuscated identifiers, re-identifying students
+generally requires affirmative effort.
+
+### Scientific integrity and open science
+
+Over the past few decades, there have been significant changes in
+scientific methodology. Two key issues include:
+
+* **The ability to replicate results.** When a paper is published,
+ scientist need access to both data and methods (source code) to be
+ able to confirm results.
+
+* **Understanding the history of research** Confidence in results
+ depends not just on the final data and its analysis, but the steps
+ taken to get there. Scientists need to understand steps taken on
+ data prior to final publication.
+
+These suggest maintaining a log of all analyses performed on the data
+(which in turn suggests open source code).
+
+### Educational transparency
+
+Historically, the general public has had a lot of access to
+educational information:
+
+* PPRA provides for families to have access to school curricular
+ materials.
+* FERPA provides for families to have access to student records, as
+ well as the ability to correct errors in such records.
+* Public records laws (FOIA and state equivalents) provides for
+ access to substantially everything which does not impact
+ student privacy or the integrity of assessments.
+* In Europe, GDPR provides for people to have the right to
+ inspect their data, to understand how it is processed, and
+ to have data removed.
+
+While FERPA, PPRA, and FOIA were drafted in the seventies (with only
+modest reforms since) and do not apply cleanly in digital settings,
+the spirit these laws were grounded in a philosophy that the general
+public ought to be able to understand school systems. State exams,
+NAEP, PISA, and similar exams were likewise created to provide for
+transparency.
+
+This level of transparency has lead to improvements to both the
+learning experiences of individual students and to school systems as a
+whole, by enabling academic researchers, parent advocates,
+policymakers, journalists, and others to understand our schools.
+
+One of our goals is to translate and to reassert these right as
+education moves into the digital era. With digital learning materials,
+in many cases, parents, researchers, and others have lost the ability
+to meaningfully inspect student records (which are often treated as
+corporate property) or curricular materials (which sit on corporate
+servers). Increasingly, students' lives are governed by machine
+models, to which families have no access.
+
+Again, this dictates that analysis algorithms (including ML models
+where possible without violating student privacy) ought to be open to
+inspection, both at a high level (human-friendly descriptions) as well
+as at a detailed level (source code). In addition, there ought to be a
+way to analyze student data, to the extent this is possible without
+violaitng student privacy.
+
+### Guardianship and Proxy
+
+Guardianship is a complex question, and hinges on several issues:
+
+* Age. For example, young elementary school students are unlikely to
+ be able to make sense of educatonal data, or the complex issues
+ there-in. High school students may be able to explore such issues in
+ greater depth, but may have limited legal rights as minors.
+
+* Identity. Releasing data to an unauthorized party carries high
+ risk. Robust online identity verification is potentially expensive
+ and / or brittle. Working through institutions with whom we have
+ relationships, and who in turn have relationships with students and
+ families can mitigate that risk.
+
+* COPPA grants for
+ [schools to act on behalf of parents](https://www.ftc.gov/tips-advice/business-center/guidance/complying-coppa-frequently-asked-questions#Schools).
+ First, schools frequently have legal resources and expertise (either
+ acting individually or in consortia) which parents lack. Second,
+ reviewing the number of technologies typical students interact with
+ would be overwhelming to parents.
+
+However, ultimately, there is a strong argument that access ought to
+rest as close to the individual as possible. Where schools act as
+agents for families, and parents for students, there is a growing
+level of security and competence. On the other hand, there is also a
+grwoing level of trust required that those parties are acting in the
+best interests of those they are representing. It is incumbent on us,
+at all levels, to ensure have appropriate transparency, incentive
+structures, and organizational structures to guarantee that proxies do
+act for stakeholder benefit, and to balance these based on the
+context.
+
+### Minimizing security perimeter
+
+Even when all parties act in good faith, broad data sharing exposes
+students to high levels of risk of data compromises, whether through
+deliberate attacks, disgruntled employees, or human error.
+
+### Models for data access
+
+In light of the above constraints, several models for data access have
+emerged which allow for both complete transparency and protect student
+privacy.
+
+* In the FSDRC model, deidentified (but not anonymized) data would be
+ kept in a physically-secure facility. People could visit the
+ facility and crunch data within the facility. Visitors would be
+ under both contractual bounds and have physical security to not
+ remove data, except for sufficiently aggregated results so as to
+ make reidentification impossible. Access is provided on a cost-plus
+ basis.
+
+* People can develop algorithms on synthetic data, and upload
+ algorithms to a data center, where those algorithms run on student
+ data. Both code and data are inspected prior to releasing results,
+ again, on a cost-plus basis.
+
+* Corporations can run real-time algorithms (such as to drive learning
+ dashboards) in a data center on a cost-plus basis. Educational
+ applications can work on shared models of student expertise, without
+ providing access to student data to the organizations which
+ developed them.
+
+* If a student (or proxy there-of) asks to have data removed, that
+ data is removed within some timeframe. However, for scientific
+ replicability, there is a before-and-after snapshot of how study
+ results changed when student data was removed. Note that this has
+ implications for both perfomance and re-identification.
+
+... to be continued
\ No newline at end of file
diff --git a/docs/concepts/reducers.md b/docs/concepts/reducers.md
new file mode 100644
index 000000000..2139974dd
--- /dev/null
+++ b/docs/concepts/reducers.md
@@ -0,0 +1,94 @@
+# Event Reducers
+
+The Learning Observer project uses a reducer system to handle events from various event sources, process them, and provide aggregated data for use in dashboards. This page describes the reducer system's architecture, how it processes events, and its components.
+
+## Reducer System Architecture
+
+The reducer system is designed to be modular and flexible, allowing for the addition and removal of event sources, aggregators, and dashboards as needed. The overall system diagram is as follows:
+
+```bash
++---------------+
+| | +-------------+
+| Event Source ---| | Key-Value |
+| | | | Store |
+| | | | |
++---------------+ | +-------------+
++---------------+ | +-----------+ <------|-- Internal |
+| | | | | -------|-> State | +---------------+ +------------+
+| Event Source --------|---->| Reducer | | |------>| | | |
+| | | | | | +-------------+ | Communication |----> | Dashboard |
++---------------+ | | +-----------+ | Protocol | | |
++---------------+ | | +---------------+ +------------+
+| | | |
+| Event Source ----| |
+| | |
++---------------+ v
+ +------------+
+ | |
+ | Archival |
+ | Repository |
+ | |
+ +------------+
+```
+
+The reducer system consists of the following components:
+
+1. **Event Sources**: These are the sources of events that need to be processed. Each event source is responsible for generating events based on user activities, such as clicks, page views, or interactions with learning materials.
+
+2. **Reducer**: The reducer is the central component that processes the events from all event sources. It takes the incoming events and applies a specific reduction function to transform the event data into a more concise and meaningful form. The reducer is created using the `student_event_reducer` decorator, which enables modularity and flexibility in defining reduction functions.
+
+3. **Key-Value Store**: This component stores the internal and external state generated by the reducer. The internal state is used for the reducer's internal processing, while the external state is shared with other components, such as aggregators and dashboards.
+
+4. **Communication Protocol**: The communication protocol handles fetching and transforming data from the key-value store using an SQL-like structure.
+
+5. **Dashboard**: The dashboard is the user interface that displays the data from the communication protocol, providing insights into user activities and learning outcomes.
+
+6. **Archival Repository**: This component is responsible for archiving event data, ensuring that historical data is available for analysis and reporting purposes.
+
+## Using the Reducer System
+
+To create a new reducer, use the `student_event_reducer` decorator. This allows you to define custom reduction functions that process events and transform them into meaningful insights. As the system evolves, it will be possible to plug in different aggregators, state types, and keys (e.g., per-student, per-resource) to the reducer system.
+
+In the long term, the goal is to have pluggable, independent modules that can be connected to create a versatile and extensible analytics system. The current reducer system serves as a foundation for building such a system.
+
+An example of a simple reducer to count events can be defined as
+
+```python
+# import student scope reducer decorator
+from learning_observer.stream_analytics.helpers import student_event_reducer
+
+@student_event_reducer(null_state={"count": 0})
+async def student_event_counter(event, internal_state):
+ # do something with the internal state, such as increment
+ state = {"count": internal_state.get('count', 0) + 1}
+
+ # return internal state, external state (no longer used)
+ return state, state
+```
+
+To add a reducer to a module, we much define a `REDUCERS` section in a module's `module.py` file like so
+
+```python
+# module.py
+# ...other items
+
+REDUCERS = [
+ {
+ 'context': 'org.mitros.writing_analytics',
+ 'scope': Scope([KeyField.STUDENT]),
+ 'function': module.path.to.reducers.student_event_counter,
+ 'default': {'count': 0}
+ }
+]
+```
+
+The `context` value must match the `source` string attached to incoming
+events. When an event arrives on the websocket it advertises a `source`
+identifier (for example `org.mitros.writing_analytics`). The stream
+analytics loader uses that identifier to look up which reducers to
+execute. If the reducer `context` does not match the event `source`, the
+event will never reach your reducer. Double-check that any new event
+emitters (such as browser extensions or test scripts) populate the same
+`source` string that your module registers here.
+
+NOTE: the `default` defined in the `module.py` file is for handling defaults when queries are made, while the `null_state` defined in the reducer decorator is used for initializing state of a new incoming event stream (e.g. a new student started sending events).
diff --git a/docs/concepts/scaling.md b/docs/concepts/scaling.md
new file mode 100644
index 000000000..a7301573c
--- /dev/null
+++ b/docs/concepts/scaling.md
@@ -0,0 +1,116 @@
+# Scaling Architecture
+
+The goal is for the Learning Observer to be:
+
+* Fully horizontally-scaleable in large-scale settings
+* Simple to run in small-scale settings
+
+It is worth noting that some uses of Learning Observer require
+long-running processes (e.g. NLP), but the vast majority are small,
+simple reducers of the type which would work fine on an 80386
+(e.g. event count, time-on-task, or logging scores / submission).
+
+## Basic use case
+
+In the basic use case, there is a single Learning Observer process
+running. It is either using redis or, if unavailable, disk/memory as a
+storage back-end.
+
+## Horizontally-scalable use-case
+
+LO needs to handle a high volume of incoming data. Fortunately,
+reducers are sharded on a key. In the present system, the key is
+always a student. However, in the future, we may have per-resource,
+per-class, etc. reducers.
+
+A network roundtrip is typically around 30ms, which we would like to
+avoid. Therefore, we would like reducers to be able to run keeping
+state in-memory (and simply writing the state out to our KVS either
+with each event, or periodically e.g. every second). Therefore, we
+would like to have a fixed process per key so that reducers can run
+without reads.
+
+Our eventual architecture here is:
+
+```
+incoming event --> load balancer routing based on key --> process pool
+```
+
+Events for the same key (typically, the same student) should always
+land on the same process.
+
+Eventually, we will likely want a custom load balancer / router, but
+this can likely be accomplished off-the-shelf, for example by
+including the key in an HTTP header or in the URL.
+
+**HACK**: At present, if several web sockets hit a server even with a
+ common process, they may not share the same in-memory storage. We
+ should fix this.
+
+## Task-scalable use-case
+
+A second issue is that we would like to be able to split work by
+reducer, module, or similar (e.g. incoming data versus dashboards).
+
+Our eventual architecture here is:
+
+```
+incoming event --> load balancer routing based on module / reducer --> process pool
+```
+
+The key reason for this is robustness. We expect to have many modules,
+at different levels of performance and maturity. If one module is
+unstable, uses excessive resources, etc. we'd like it to not be able
+to take down the rest of the system.
+
+This is also true for different views. For example, we might want to
+have servers dedicated to:
+
+* Archiving events into the Merkle tree (must be 100% reliable)
+* Other reducers
+* Dashboards
+
+## Rerouting
+
+In the future, we expect modules to be able to send messages to each
+other.
+
+## Implementation path
+
+At some point, we expect we will likely need to implement our own
+router. However, for now, we hope to be able to use sticky routing and
+content-based routing in existing load balancers. This may involve
+communcation protocol changes, such as:
+
+- Moving auth information from the websocket stream to the header
+- Moving information into the URL (e.g. `http://server/in#uid=1234`)
+
+Note that these are short-term solutions, as in the long-term, only
+the server will know which modules handle a particular event. Once we
+route on modules, an event might need to go to serveral servers. At
+that point, we will likely need our own custom router / load balancer.
+
+In the short-term:
+
+* [Amazon](https://aws.amazon.com/elasticloadbalancing/application-load-balancer/?nc=sn&loc=2&dn=2)
+supports sticky sessions and content-based routing. This can work on data in the headers.
+* nginx can be configured to route to different servers based on headers and URLs. This is slightly manual, but would work as well.
+
+## Homogenous servers
+
+Our goal is to continue to maintain homogenous servers as much as
+possible. The same process can handle incoming sockets of data, render
+dashboards, etc. The division is handled in devops and in the load
+balancer, e.g. by:
+
+- Installing LO modules only on specific servers
+- Routing events to specific servers
+
+The goal is to continue to support the single server use-case.
+
+## To do
+
+We need to further think through:
+
+- Long-running processes (e.g. NLP)
+- Batched tasks (e.g. nightly processes)
diff --git a/docs/concepts/student_identity_mapping.md b/docs/concepts/student_identity_mapping.md
new file mode 100644
index 000000000..73d4fc123
--- /dev/null
+++ b/docs/concepts/student_identity_mapping.md
@@ -0,0 +1,37 @@
+# Student Identity Mapping
+
+This document describes the current approach for reconciling a student's identity across the Google Workspace context used by Writing Observer and external platforms that only surface an email address (for example when the application is launched as an LTI tool).
+
+## Why the mapping exists
+
+When Writing Observer runs inside Google Workspace we naturally have access to the Google user identifier that shows up in event payloads. However, when the product is embedded as an LTI application we receive the learner's email address but do not receive the Google identifier. Many downstream reducers and dashboards expect to look students up by the Google identifier that is emitted by Google Docs events. Without an explicit bridge between those two identifiers we would be unable to join activity data with roster or profile information for LTI launches.
+
+## Data sources involved
+
+Two pieces of infrastructure cooperate to keep an email-to-Google-ID lookup table available:
+
+1. **`student_profile` reducer** – The `student_profile` KVS pipeline in `writing_analysis.py` stores the latest email address and Google identifier (`safe_user_id`) observed for each student. The reducer only updates its state when either value changes. The resulting records live in the reducer's internal key-value namespace and therefore need to be copied to a place where other services can access them. 【F:modules/writing_observer/writing_observer/writing_analysis.py†L233-L253】
+2. **`map_emails_to_ids_in_kvs.py` script** – This maintenance script scans the reducer's internal keys, extracts any records that contain both `email` and `google_id`, and writes a dedicated `email-studentID-mapping:{email}` entry to the key-value store. The explicit mapping gives any service that only knows the email address a way to recover the Google identifier. 【F:scripts/map_emails_to_ids_in_kvs.py†L1-L29】
+
+This flow is intentionally simple: the reducer captures whatever the client reports, and the script copies the data to keys that other components already know how to query.
+
+## Operating the script
+
+The email mapping script is normally run in the same environment as other KVS maintenance tasks. It requires access to the same credentials file that the main server use. Thus, we need to run the script from the `learning_observer` directory. A manual run looks like this:
+
+```bash
+cd learning_observer/
+python ../scripts/map_emails_to_ids_in_kvs.py
+```
+
+The script performs a full scan every time it runs, so it is safe to execute multiple times or to schedule as a recurring job.
+
+## Limitations and future direction
+
+The current reducer-plus-script approach fills an immediate gap but remains a stopgap solution:
+
+* **Tight coupling to Google identity** – The reducer only records the Google identifier surfaced by Google Docs. If we ingest events from another platform, there is no canonical place to persist its identifiers.
+* **No user object abstraction** – Each consumer must know which KVS keys to query. A shared user object (or identity service) would allow the system to attach multiple external identifiers, roles, and profile attributes to a learner and to expose them through a stable API.
+* **Operational overhead** – Because the mapping lives in the KVS, we must remember to run the maintenance script anywhere we expect the lookup table to be fresh.
+
+In the future we plan to introduce a formal user object that encapsulates identifiers, roles, and cross-system metadata. That abstraction would make this lookup process unnecessary by giving every component a single source of truth for student identity. Until then, this document serves as a reference for the current mapping workflow.
diff --git a/docs/concepts/system_design.md b/docs/concepts/system_design.md
new file mode 100644
index 000000000..e4a15ae4e
--- /dev/null
+++ b/docs/concepts/system_design.md
@@ -0,0 +1,56 @@
+Learning Observer System Design
+-------------------------------
+
+Piotr Mitros. 2021-07-11
+
+This lays out the system design, as planned. This design does not
+fully reflect the current implementation, yet.
+
+Our goal is to build a system which will:
+
+* Take in process data from a diversity of sources
+
+* Perform real-time processing on that data, in order to support
+ teachers in real-time. This is done through a series of pluggable
+ analytics modules.
+
+* Archive that data for research purposes and archival analysis
+
+* Provide open science tools to log such analyses
+
+In other words:
+
+
+
+Internally, the system takes a stream of events from each learner, and
+routes it to one or more analytics modules. Each of these modules
+performs a `reduce` operation over that stream in realtime. The
+reduced state is stored in a KVS (currently `redis`, although this is
+pluggable). These modules run as asynchronous Python co-routines,
+which makes them quite scalable. We ought to be able to handle large
+numbers of simultanious connections.
+
+Each time an instructor connects, periodically, such data is
+aggregated from redis, and sent back to the instructor. This would be
+a logical place to be more clever about scaling; ideally, we'd cycle
+through instructors for such an aggregation, and only aggregate where
+data has changed, so that with large numbers of instructors, the
+system merely updates dashboards less quickly:
+
+
+
+Although at present, reduce operations are per-student, and
+aggregations per-class, in the future, we envision:
+
+* Other ways to shard (e.g. per-resource, per-document, etc.).
+* Being able to cascade events, either by generating new events, or in
+ much the same way as we handle the current aggregation
+* Potentially, being more clever about routing the same student to a
+ common process each time. Right now, we're connected per-session,
+ but we may have concurrency issues if a student connects twice.
+
+Data will be stored in a git-like Merkle tree format:
+
+
+
+We'll document this in more detail later.
\ No newline at end of file
diff --git a/docs/concepts/system_settings.md b/docs/concepts/system_settings.md
new file mode 100644
index 000000000..f1749dc40
--- /dev/null
+++ b/docs/concepts/system_settings.md
@@ -0,0 +1,149 @@
+# System settings
+
+Learning Observer depends on a single source of truth for everything from
+server ports to which pieces of modules are enabled. We rely on the
+[PMSS](https://github.com/ETS-Next-Gen/pmss) registry because it gives us a
+predictable, type-checked way to describe those concerns once and reuse them
+across the whole stack. This article explains why the settings layer exists,
+how `creds.yaml` fits into the picture, and why we support cascading
+``*.pmss`` files that behave a bit like CSS for configuration.
+
+To view all settings and what they do, checkout the [System Settings Reference](../reference/system_settings.md).
+
+## Why centralize configuration?
+
+* **Shared vocabulary.** Modules, reducers, and integrations all speak the same
+ language when they ask for `hostname`, `redis_connection.port`, or
+ `modules.writing_observer.use_nlp`. PMSS enforces the field definitions so we
+ can freely move code between services without wondering what the knobs are
+ called.
+* **Type safety and validation.** Every field is registered with a type and
+ optional parser. PMSS refuses to start if a value is missing or malformed,
+ surfacing errors during boot instead of in the middle of a request.
+* **Operational portability.** Teams deploy Learning Observer to wildly
+ different environments. A single registry allows a site to describe network
+ boundaries, third-party credentials, or feature flags in one place and keep
+ those choices under version control.
+
+## Defining which settings files to use
+
+To load alternate or additional PMSS rulesets, start the server with
+`--pmss-rulesets` and pass one or more file paths or a directory. The startup
+logic expands directories into sorted file lists, then loads each file as a
+`YAMLFileRuleset` when it ends in `.yaml`/`.yml` or a `PMSSFileRuleset` when it
+ends in `.pmss`. Any other file suffix is skipped with a warning so you can
+keep README files or notes alongside the rulesets without breaking startup.
+
+## The role of `creds.yaml`
+
+Most installations load configuration from `creds.yaml`. When the process
+starts, `learning_observer.settings` initializes PMSS with a
+`YAMLFileRuleset`, parses that file, and registers the run mode and other core
+fields. The YAML mirrors the namespace hierarchy, so options live exactly where
+operators expect to see them:
+
+```yaml
+server:
+ port: 5000
+modules:
+ writing_observer:
+ use_nlp: true
+```
+
+`creds.yaml` gives us a stable default: it travels with the deployment, can be
+checked into private infrastructure repositories, and is easy to audit during
+reviews. Even when we introduce additional sources, the YAML baseline remains
+the anchor that documents the "intent" of the environment.
+
+## Cascading ``*.pmss`` overlays
+
+While one file covers global defaults, we often need tweaks that depend on
+**who** is asking for data or **where** a request originates. PMSS supports
+multiple rule sets, so we extend the base YAML with optional ``*.pmss``
+overlays. Each overlay is a small PMSS file whose contents look just like the
+YAML fragment they augment, but they add selectors that encode *specificity*.
+
+Think of these selectors like CSS. We start with the low-specificity default
+rule and then layer on increasingly precise matches:
+
+```pmss
+roster_data {
+ source: all;
+}
+
+roster_data[domain="learning-observer.org"] {
+ source: google;
+}
+```
+
+When `learning_observer/learning_observer/rosters.py` asks for `roster_data`
+it supplies attributes such as the caller's email domain or provider. PMSS
+walks the rule set, finds the most specific block that matches the request, and
+returns that value. In the example above a teacher from
+`learning-observer.org` would receive the `google` roster source, while any
+other user would keep the global `all` default. Additional selectors can layer
+on top for providers, schools, or classrooms with each more specific rule
+overriding the broader ones.
+
+At runtime we still assemble a deterministic cascade:
+
+1. Load the global `creds.yaml` defaults.
+2. Apply any environment overlays (for example, a `district.pmss` file that
+ swaps OAuth credentials for that customer).
+3. Resolve request-scoped overlays that match the supplied selectors, letting
+ the most specific rule win.
+
+PMSS tracks the provenance of each value so developers can inspect which file
+supplied the final answer when troubleshooting. Because overlays reuse the
+same registered fields, we retain all of the type checking that protects the
+base configuration.
+
+## How code consumes the cascade
+
+Once the cascade is assembled, code does not care whether a value came from the
+YAML baseline or an overlay. Components call
+`learning_observer.settings.pmss_settings.()` (optionally via
+`module_setting()` helpers) and PMSS resolves the field using the active rule
+stack. That means a request handled for an instructor can pick up
+instructor-specific defaults while a system job, using the same accessor, still
+observes the site-wide configuration.
+
+### What context we pass today
+
+Every call into `pmss_settings` names the setting through the `types` list. We
+build that list from the canonical namespace of the setting—`['server']` for the
+public port, `['redis_connection']` for Redis, `['modules', module_name]` for
+module flags, and so on. Because the list mirrors the hierarchy defined in
+`creds.yaml`, we get deterministic lookups even when overlays layer additional
+rules on top.
+
+Selectors (the `attributes` argument) are rarer. Only features that genuinely
+vary per request provide them today. For example, roster resolution passes the
+requesting user's email domain and the LTI provider so the `roster_data`
+configuration can pick the correct backend, and the dashboard logging toggle
+adds the user's domain to honour tenant-specific overrides. Most other settings
+still rely solely on the namespace lookup.
+
+### Where we want to go
+
+We want every lookup that depends on request context to assemble the same
+attribute payload in the same place. Rather than sprinkling ad-hoc conditionals
+around the codebase, helpers should gather the domain, provider, role, or other
+selectors once and pass them through every relevant PMSS call. This keeps the
+setting definitions declarative, makes it obvious which selectors operators can
+target in overlays, and avoids drift between different parts of the system.
+
+## Extending the system settings surface
+
+Adding a new capability follows a consistent pattern:
+
+1. Register the field with PMSS, giving it a name, type, description, and
+ default if appropriate.
+2. Update `creds.yaml` (or the reference documentation) to teach operators what
+ the new setting does.
+3. Optionally create overlay files where the value should vary by tenant, user,
+ or integration partner.
+
+By keeping configuration declarative and cascading, we get the flexibility to
+serve many partners without branching the codebase, all while preserving the
+predictability administrators expect from a single system settings registry.
diff --git a/docs/concepts/technologies.md b/docs/concepts/technologies.md
new file mode 100644
index 000000000..aac422a7f
--- /dev/null
+++ b/docs/concepts/technologies.md
@@ -0,0 +1,46 @@
+# Technologies in the _Learning Observer_
+
+### Technologies
+
+
+You are welcome to use your own instance of redis; however, `docker compose` allows us to spin up an instance of Redis and connect to it. See the Docker Compose section for more information.
+
+The provided run commands all include watchdog turned on to ease development time on re-running the application.
+
+
+Several potential contributors have asked for a list of technologies
+needed to be productive helping developing the *Learning Observer* or
+modules for the *Learning Observer*. A short list:
+
+* We use [Python](https://www.python.org/) on the server side, and JavaScript on the client side. We do rely on current Python (dev systems are mostly 3.10 as of this writing).
+* Since we're managing large numbers of web socket connections, we make heavy use of [asynchronous Python](https://docs.python.org/3/library/asyncio.html). If you haven't done async programming before, there is deep theory behind it. However, we again recommend any short tutorial for aiohttp, and then learning in context.
+* Our web framework is [aiohttp](https://docs.aiohttp.org/en/stable/).
+* We are moving towards [react](https://react.dev/) and [redux](https://redux.js.org/).
+* Simple dashboards can be built with [plot.ly](https://plotly.com/python/)
+* Our main database is the original [redis](https://redis.io/), but we plan to switch to a different redis due to licensing and other nasty changes by a company which coopted this from the open source community. We have a simple key-value store abstraction, so this is easy to swap out.
+* We make heavy use of `git`, as well as of data structures which are `git`-like. I recommend reading [Git Internals](https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain)
+ and following [Write Yourself a Git](https://wyag.thb.lt/)
+* Our CSS framework is currently [Bulma](https://bulma.io/), but that may change.
+* Our icon library is [Font Awesome](https://fontawesome.com/)
+* For rapid prototyping, we use [P5.js](https://p5js.org/), although we hope to avoid this beyond the prototype phase. This is super-easy to learn (even for little kids), and super-fast to develop in. It doesn't do to production-grade software, though (responsive, i18n, a11y, testability, etc.). The best way to learn this is by helping a child do the Khan Academy JavaScript courses :)
+* Our web server is [nginx](https://nginx.org/en/), but that's easy to
+ change.
+* Our dev-ops framework is home baked, but uses [boto](http://boto.cloudhackers.com/), [invoke](https://www.pyinvoke.org/), [Fabric](https://www.fabfile.org/), and a
+ little bit of [ansible](https://docs.ansible.com/ansible/latest/dev_guide/developing_python_3.html).
+* We recommend Debian/Ubuntu, but run on Fedora/Red Hat. People have successfully run this on MacOS and on Windows/WSL, but this is not well-tested.
+* At some point, we do plan do add [postgresql](https://www.postgresql.org/).
+* For a while, when we thought we'd need queues, we used an XMPP server. I don't think we need queues, but if we do, it will come back.
+
+For grad students, interns, student volunteers, and other contributors who are here primarily to learn: One of the fun things here is that most of these are _deeply interesting tools_ with a strong theoretical basis in their design.
+
+On the whole, our goal is to keep a *small set of dependencies*. To add a new tool to the system, it will need to do something _substantially_ different than what's in the system already. We do plan on adding Postgresql once needed, but not too much beyond that.
+
+Note that some modules within the system (including and especially the _Writing Observer_) do have more extensive dependencies. The _Writing Observer_ uses _a lot_ of different NLP libraries, and until we streamline that, can be quite annoying to install.
+
+# Deprecations
+
+* We are deprecating [D3](https://d3js.org/) for displaying data in
+ real-time on the client, and otherwise, as a front-end framework. D3
+ is a relatively small and simple library with a fairly steep
+ learning curve (in much the same way as Go is a small and simple
+ game). Much of the use of this is obsoleted by our use of react.
\ No newline at end of file
diff --git a/docs/how-to/communication_protocol.md b/docs/how-to/communication_protocol.md
new file mode 100644
index 000000000..a0725329a
--- /dev/null
+++ b/docs/how-to/communication_protocol.md
@@ -0,0 +1,286 @@
+# How to Build and Run Communication Protocol Queries
+
+This guide explains the end-to-end workflow for turning a reporting idea into a runnable query on the Learning Observer communication protocol. Follow the steps in order—both humans and language models can use this as a checklist when creating or automating queries.
+
+## 1. Frame the Data Task
+
+1. Write a one-sentence description of the insight or dataset you need (e.g., *“Return the latest writing sample for each student in a course”*).
+2. Identify the reducers, helper functions, or key-value store documents that expose the data. Concept docs provide summaries of the executor lifecycle, node types, and utilities for transforming reducer outputs. 【F:docs/concepts/communication_protocol.md†L1-L100】
+3. Check whether an existing helper (e.g., `generate_base_dag_for_student_reducer`) already provides most of the DAG structure. Reusing helpers keeps queries consistent and concise. 【F:docs/concepts/communication_protocol.md†L62-L87】
+
+## 2. Confirm Your Goal and Required Data
+
+1. Identify the data source(s):
+
+ * **Reducers** - Aggregated documents stored in the key-value store.
+ * **Helper functions** - Python callables published with `publish_function`.
+ * **Roster/metadata** - Collections that need to be joined with reducer data.
+2. Decide which fields must appear in the output. Note whether you need the entire document or only specific fields.
+3. List all runtime values (course ID, time range, student list, etc.). These become `parameter` nodes later.
+
+Document these choices (in comments or metadata) so you can refer to them in later steps.
+
+## 3. Declare Parameters and Defaults
+
+Each runtime input must be expressed as a `parameter` node. Parameters can be required or optional and may include default values:
+
+```python
+course_id = query.parameter("course_id", required=True)
+student_id = query.parameter("student_id", required=False, default=None)
+```
+
+For each parameter, document:
+
+* **Name** - Identifier passed to the DAG node.
+* **Type** - String, UUID, ISO date, etc.
+* **Required** - Boolean flag.
+* **Default** - Optional fallback value.
+
+> Tip: Emit parameter declarations first so later steps can reuse variables like `course_id["variable"]` consistently.
+
+For fixed values (e.g., reducer names or field lists), define constants once near where they are used.
+
+## 4. Plan the Data Flow (DAG Skeleton)
+
+Translate the goal into a linear sequence of operations. A typical reducer query involves:
+
+1. Fetching roster metadata or other context.
+2. Producing keys for each entity (`keys` nodes).
+3. Retrieving reducer documents with `select`.
+4. Joining reducer outputs with metadata.
+5. (Optional) Post-processing with `map` or `call`.
+
+Example outline:
+
+```
+roster = call("get_course_roster", course_id)
+reducer_keys = keys(reducer_name, roster.students)
+reducer_docs = select(reducer_keys, fields=[...])
+enriched = join(reducer_docs, roster, left_on="student.id", right_on="id")
+export enriched
+```
+
+Verify that every step depends only on earlier outputs, and adjust until the flow is acyclic.
+
+## 5. Construct Nodes with Query Helpers
+
+Use `query.py` helpers to implement the skeleton:
+
+```python
+from learning_observer.communication_protocol import query
+
+roster = query.call("get_course_roster", args={"course_id": course_id})
+reducer_keys = query.keys(
+ "reading_fluency",
+ scope_fields={
+ "student": {"values": query.variable(roster), "path": "user_id"},
+ },
+)
+reducer_docs = query.select(
+ keys=reducer_keys,
+ fields=query.SelectFields.SUMMARY,
+)
+enriched = query.join(
+ left=reducer_docs,
+ right=query.variable(roster),
+ left_on="student_id",
+ right_on="id",
+)
+```
+
+Guidelines:
+
+* Use `query.variable(node, path=None)` for downstream access to prior outputs.
+* Encapsulate repeated or complex logic in functions for reuse and testing.
+* Use explicit names and keyword arguments—avoid positional arguments for clarity.
+
+### Defining reducer scopes for `keys` (preferred vs. legacy)
+
+Reducers define a scope (e.g., student, student+document, student+document+tab). When
+building a `keys` node, pass scope values that align with the reducer scope so the
+executor can build the right Redis keys.
+
+**Preferred: `scope_fields` (supports arbitrary scopes)**
+
+Use `scope_fields` to supply each scope axis with either a `values` iterable or a
+single value (applied across all items), plus an optional `path` into each item.
+The scope field names should match the reducer scope: `student`,
+`doc_id`, `tab_id`, `page_id`, etc.
+
+```python
+reducer_keys = query.keys(
+ "writing_observer.some_tabbed_reducer",
+ scope_fields={
+ "student": {"values": query.variable("roster"), "path": "user_id"},
+ "doc_id": {"values": query.variable("documents"), "path": "doc_id"},
+ "tab_id": {"values": query.variable("tabs"), "path": "tab_id"},
+ # or a single value
+ "student": "bobs_user_id"
+ },
+)
+```
+
+**Legacy: `STUDENTS`/`RESOURCES`**
+
+The older hack only supported student-only or student+document scopes. It is still
+accepted for backward compatibility, but prefer `scope_fields` for new work.
+
+```python
+reducer_keys = query.keys(
+ "writing_observer.last_document",
+ STUDENTS=query.variable("roster"),
+ STUDENTS_path="user_id",
+ RESOURCES=query.variable("documents"),
+ RESOURCES_path="doc_id",
+)
+```
+
+## 6. Define Exports and Integrations
+
+Choose which nodes should be externally accessible:
+
+```python
+exports = {
+ "reading_fluency": query.export("reading_fluency", enriched)
+}
+```
+
+If integrating with the async helper layer, pass `exports` to `learning_observer.communication_protocol.integration.bind_exports`.
+
+Document required parameters and defaults with the export definitions.
+
+## 7. Flatten, Validate, and Serialise
+
+1. Convert the nested DAG into executor-ready form:
+
+ ```python
+ from learning_observer.communication_protocol import util
+ dag = util.flatten(exports)
+ ```
+
+2. Confirm all node IDs are unique and reference earlier nodes. Inspect the flattened DAG if generated automatically.
+
+3. Serialise to JSON (e.g., `json.dumps(dag)`) when sending over the wire.
+
+4. Add automated tests—at minimum a smoke test against a fixture store.
+
+## 8. Expose the DAG to Clients
+
+To make the DAG discoverable over the websocket interface:
+
+* Define `EXECUTION_DAG` in the module file and register it with the loader.
+* On server start, the DAG will be advertised under the module’s namespace.
+
+Production deployments should prefer predefined DAGs for security. Open-query mode is optional and must be explicitly enabled.
+
+## 9. Execute the Query
+
+Submit the flattened DAG to the communication protocol endpoint with runtime parameters:
+
+```json
+{
+ "parameters": {
+ "course_id": "course-123",
+ "start_date": "2023-09-01"
+ },
+ "exports": ["reading_fluency"],
+ "dag": { ... flattened nodes ... }
+}
+```
+
+On success, the response includes export payloads keyed by export name. Inspect `DAGExecutionException` for error details.
+
+The executor validates each requested export before any DAG work begins. If an
+export name is unknown - or if its declared `returns` node cannot be found - the
+server responds with a `DAGExecutionException` describing the missing export or
+node. Surfacing these errors in logs or UI telemetry helps diagnose typos and
+stale configuration quickly.
+
+When using integration bindings, call the generated async function with the same parameters.
+
+## 10. Construct Websocket Requests
+
+Clients interact with `/wsapi/communication_protocol` via JSON messages. Each message contains:
+
+* `execution_dag` - Name of a predefined DAG or a full DAG object.
+* `target_exports` - List of exports to run.
+* `kwargs` - Runtime parameters.
+
+Example:
+
+```json
+{
+ "docs_request": {
+ "execution_dag": "writing_observer",
+ "target_exports": ["docs_with_roster"],
+ "kwargs": { "course_id": "COURSE-123" }
+ }
+}
+```
+
+The server streams back updates in messages shaped like:
+
+```json
+[
+ {
+ "op": "update",
+ "path": "students.student-1",
+ "value": { "text": "...", "provenance": { ... } }
+ }
+]
+```
+
+If `rerun_dag_delay` is set, the server automatically re-executes the DAG and pushes updates.
+
+### Manual testing with the generic websocket dashboards
+
+Two helper scripts live in `scripts/` for exercising websocket flows without running a full dashboard UI:
+
+* `generic_websocket_dashboard.py` (Python + `aiohttp`)
+* `generic_websocket_dashboard.js` (Node.js + `ws`)
+
+Both scripts ship with a template payload under the `REQUEST` constant. Update the payload to target the exports and parameters you want to test—for example, changing `execution_dag`, `target_exports`, or `kwargs.course_id`.
+
+To run the Python version:
+
+```bash
+python scripts/generic_websocket_dashboard.py
+```
+
+The script opens a websocket to `/wsapi/communication_protocol`, sends the JSON request, and pretty-prints any responses. Install dependencies with `pip install aiohttp` if needed.
+
+The Node.js version follows the same pattern. After adjusting `REQUEST`, run:
+
+```bash
+node scripts/generic_websocket_dashboard.js
+```
+
+If you copy the script into a browser console, delete the `require('ws')` line so the native `WebSocket` implementation is used.
+
+Use these scripts to confirm executor behaviour during development—for example, to observe partial updates or to verify that query parameters are wired correctly before embedding a request in a Dash dashboard.
+
+## 11. Iterate and Maintain
+
+* Profile slow queries; large joins may need new helpers or precomputed reducers.
+* Keep DAGs version-controlled. Update dependent queries when reducers or helpers change.
+* Review security before exposing exports to untrusted clients.
+
+## 12. Test End-to-End
+
+* **Unit-test** reducers and helpers independently.
+* **Reference** `learning_observer/learning_observer/communication_protocol/test_cases.py` for DAG tests.
+* **Exercise websocket flows** manually or with automated integration tests.
+
+## 13. Document Parameters and Outputs
+
+Update module documentation with:
+
+* Export descriptions, parameter types, and return structures.
+* Sample request payloads.
+* Notes on authentication or runtime context.
+
+Good documentation ensures developers and tooling can invoke queries reliably.
+
+### Summary
+
+Following this workflow ensures queries are consistent, testable, and safe to expose across dashboards, notebooks, and automation tools.
diff --git a/docs/how-to/connect_lo_blocks_to_canvas.md b/docs/how-to/connect_lo_blocks_to_canvas.md
new file mode 100644
index 000000000..ef99b2f8a
--- /dev/null
+++ b/docs/how-to/connect_lo_blocks_to_canvas.md
@@ -0,0 +1,364 @@
+# Setting up LO Blocks with Canvas Integration
+
+This guide walks you through integrating LO Blocks with Learning Observer and Canvas via LTI. This setup allows Canvas users to access LO Blocks dashboards while maintaining proper authentication and data flow between all three systems.
+
+## Prerequisites
+
+You'll need access to the following systems:
+
+- **Learning Observer** - Base platform installation
+- **LO Blocks** - Dashboard application
+- **Canvas** - LMS instance with administrative rights
+
+## Part 1: Canvas Configuration
+
+### Initial Setup
+
+1. **Sign in to Canvas** with administrative privileges
+
+2. **Create test environment** (recommended for initial setup):
+ - Create a sample course
+ - Add sample students
+
+ > **Note for local testing**: If running Canvas locally via Docker Compose:
+ > - No email server is configured by default
+ > - All "sent" emails print to the console
+ > - You must find confirmation email URLs in the console when adding users
+
+### Configure LTI Application
+
+Follow the [detailed LTI configuration guide](https://learning-observer.readthedocs.io/en/latest/docs/how-to/lti.html) in our documentation.
+
+Within Canvas, you'll want to:
+
+1. Navigate to the Admin portal
+2. Click `Developer Keys`, then click `+ Developer Key`
+3. Select `LTI Key`
+4. Populate the configuration
+5. Save the key
+6. Enable the key for use
+
+> **Note**: You may need to revisit these settings after completing the Learning Observer configuration (see "Putting it all together" section below).
+
+## Part 2: Learning Observer Configuration
+
+### Base Installation
+
+1. **Install Learning Observer** base platform using the [Tutorial: Install](../tutorials/install.md)
+
+### Module Setup
+
+2. **Create a module** to connect to a reducer:
+ - [Tutorial: Build and Run a Module from the Cookiecutter Template](../tutorials/cookiecutter-module.md)
+ - Match the `context` in your module to the `source` in your LO Event (see LO Blocks)
+ - Add an endpoint in `COURSE_DASHBOARDS` defining the connection to the LO Blocks server
+
+3. **Configure authentication**:
+ - Set up password file login using `scripts/lo_passwd.py` (place the outputted file within the `learning_observer/` directory)
+
+### Canvas Integration Settings
+
+4. **Modify roster source settings**
+
+ Edit `learning_observer/rosters.py` to update available PMSS values for `roster_source`.
+
+ ```python
+ pmss.parser('roster_source', parent='string', choices=['google', 'demo-canvas', 'schoology', 'all', 'test', 'filesystem'], transform=None)
+ ```
+
+5. **Update core settings** in your configuration:
+
+ ```yaml
+ auth:
+ lti:
+ demo-canvas: # Allows users to sign in via LTI
+
+ event_auth:
+ lti_session: # Allows websocket events from LTI-authenticated users
+
+ feature_flags:
+ canvas_routes: true # Enables Canvas LTI API calls
+
+ roster_data: # See roster PMSS configuration below
+ ```
+
+ > **Important**: Replace `demo-canvas` with an identifier specific to your Canvas instance (e.g., `middleton-canvas`, `easttownhigh-canvas`)
+
+6. **Create roster PMSS file** (`rosters.pmss`):
+
+ ```pmss
+ roster_data {
+ source: all;
+ }
+
+ roster_data[provider="demo-canvas"] {
+ source: demo-canvas;
+ }
+ ```
+
+ Any user with the provider `demo-canvas` will use the roster source `demo-canvas` whereas the rest of the users will use rouster source equal to `all`.
+
+7. **Register PMSS file** in `learning_observer/settings.py`:
+
+ ```python
+ pmss_settings = pmss.init(
+ prog=__name__,
+ description="A system for monitoring",
+ epilog="For more information, see PMSS documentation.",
+ rulesets=[
+ pmss.YAMLFileRuleset(filename=learning_observer.paths.config_file()),
+ pmss.PMSSFileRuleset(filename='rosters.pmss')
+ ]
+ )
+ ```
+
+**TODO**: Document any other settings needed "for data to flow properly" (referenced but incomplete in original document)
+
+## Part 3: LO Blocks Configuration
+
+1. **Configure websocket connection**
+
+ Inside of `src/lib/state/store.ts` check the following:
+
+ - `WEBSOCKET_URL` points to the Learning Observer instance
+ - `websocketLogger` is included in our list of available `loggers`
+ - Ensure the `lo_event.init` function uses the same source as defined in the reducer created earlier
+
+2. **Build the application**:
+
+ ```bash
+ npx next build
+ ```
+
+3. **Start the application**:
+
+ ```bash
+ npx next start
+ ```
+
+## Part 4: Putting It All Together
+
+### Understanding the Architecture
+
+LO Blocks is a Next.js application with both client and server-side components. This means:
+
+- We cannot serve it directly from Learning Observer (which requires static builds)
+- We must run both applications side-by-side on the same machine
+- We need a reverse proxy to route traffic between them
+
+### Nginx Reverse Proxy Configuration
+
+We'll use Nginx to route traffic between Learning Observer and LO Blocks.
+
+#### Routing Strategy
+
+- **Default traffic** → Learning Observer
+- **`/lo-blocks` path** → LO Blocks application
+- Users navigate to LO Blocks via a course dashboard link in Learning Observer
+
+#### Step 1: Configure Next.js Base Path
+
+Edit your `next.config.js` to set the base path:
+
+```javascript
+const nextConfig = {
+ basePath: '/lo-blocks'
+};
+
+export default nextConfig;
+```
+
+Then rebuild LO Blocks:
+
+```bash
+npx next build
+```
+
+> **Important**: The `basePath` setting only affects `next/link` and `next/router`. It does NOT affect `fetch()` calls made by the application.
+
+#### Step 2: Configure Nginx
+
+Create an Nginx configuration to handle both applications:
+
+```
+upstream learning_observer {
+ server localhost:8002;
+}
+
+upstream lo_blocks {
+ server localhost:3000;
+}
+
+proxy_cache_path /var/cache/nginx/auth levels=1:2 keys_zone=auth_cache:10m max_size=100m inactive=60m;
+
+map $http_upgrade $connection_upgrade {
+ default upgrade;
+ '' close;
+}
+
+server {
+ listen 8001;
+ server_name localhost;
+
+ # Default: primary service
+ location / {
+ proxy_pass http://learning_observer;
+
+ # WebSocket bits
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection $connection_upgrade;
+
+ # Common headers
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+ proxy_set_header Cookie $http_cookie;
+ }
+
+ # 1) Next.js app mounted at /special-route
+ # (this is essentially your original block; I only added a slash to the prefix
+ # for clarity in matching deeper paths).
+ location /special-route {
+ # auth for anything under /special-route
+ auth_request /auth-check;
+
+ auth_request_set $user_id $upstream_http_x_user_id;
+ auth_request_set $user_email $upstream_http_x_user_email;
+ auth_request_set $user_name $upstream_http_x_user_name;
+
+ proxy_pass http://lo_blocks; # passes /special-route[...] as-is to Next
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ proxy_set_header X-User-ID $user_id;
+ proxy_set_header X-User-Email $user_email;
+ proxy_set_header X-User-Name $user_name;
+
+ proxy_set_header Authorization $http_authorization;
+ }
+
+ # 2) NEW: route /api/* -> Next's /special-route/api/*
+ #
+ # This is the key bit: we don't use 'rewrite' here, we let proxy_pass
+ # do the path mapping via its trailing slash behavior.
+ location /api/ {
+ # Apply the same auth behavior as /special-route
+ auth_request /auth-check;
+
+ auth_request_set $user_id $upstream_http_x_user_id;
+ auth_request_set $user_email $upstream_http_x_user_email;
+ auth_request_set $user_name $upstream_http_x_user_name;
+
+ # Map:
+ # /api/content -> /special-route/api/content
+ # /api/foo/bar?x=1 -> /special-route/api/foo/bar?x=1
+ proxy_pass http://lo_blocks/special-route/api/;
+
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ proxy_set_header X-User-ID $user_id;
+ proxy_set_header X-User-Email $user_email;
+ proxy_set_header X-User-Name $user_name;
+
+ proxy_set_header Authorization $http_authorization;
+ }
+
+ # Internal location for auth checking
+ location = /auth-check {
+ internal;
+
+ proxy_pass http://learning_observer/auth/userinfo;
+ proxy_pass_request_body off;
+ proxy_set_header Content-Length "";
+ proxy_set_header X-Original-URI $request_uri;
+
+ proxy_set_header Authorization $http_authorization;
+ proxy_set_header Cookie $http_cookie;
+
+ proxy_cache auth_cache;
+ proxy_cache_valid 200 5m;
+ proxy_cache_key "$http_authorization$cookie_session";
+ }
+
+ location /auth/userinfo {
+ proxy_pass http://learning_observer/auth/userinfo;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ }
+
+ error_page 401 403 = @auth_error;
+
+ location @auth_error {
+ return 401 '{"error": "Authentication required or user does not exist"}';
+ add_header Content-Type application/json;
+ }
+}
+```
+
+The configuration should include:
+
+- Proxy pass rules for Learning Observer (default)
+- Proxy pass rules for `/lo-blocks` → LO Blocks
+- **API fetch() workaround**: Rewrite `/api/*` requests to include the `/lo-blocks` prefix
+ - This is necessary because Next.js `fetch()` calls don't respect `basePath`
+ - Safe because Learning Observer doesn't use `/api` routes
+
+> **Assumption**: LO Blocks runs on port 3000, Learning Observer runs on port 8002
+> To change the Learning Observer port: Add `--port 8002` to the Makefile run command or adjust in `creds.yaml`
+
+### HTTPS Configuration for Canvas
+
+Canvas requires HTTPS for LTI integrations. For local development:
+
+#### Option 1: Cloudflare Tunnel (Recommended for local testing)
+
+1. **Create a secure tunnel**:
+
+ ```bash
+ cloudflared tunnel --url http://localhost:8001
+ ```
+
+ This creates a tunnel between your local port and a public HTTPS URL.
+
+2. **Update configurations** with the tunnel URL:
+
+ **In `creds.yaml`**:
+
+ ```yaml
+ hostname: your-tunnel-url.trycloudflare.com # Without https://
+
+ auth:
+ lti:
+ demo-canvas:
+ redirect_uri: https://your-tunnel-url.trycloudflare.com/auth/lti/callback
+ ```
+
+ **In Canvas LTI configuration**:
+ - Update all URL fields with the proper domain `https://your-tunnel-url.trycloudflare.com`
+ - `target_link_uri: domain/lti/demo-canvas/login`
+ - `oidc_initiation_url: domain/lti/demo-canvas/login`
+ - `redirect_uris: domain/lti/demo-canvas/launch`
+
+## Verification and Testing
+
+Once everything is configured:
+
+1. **Canvas → Learning Observer connection**:
+ - Users should be able to launch the LTI tool from Canvas
+ - Authentication should work via LTI
+
+2. **Navigation to LO Blocks**:
+ - Users should see the dashboard link in Learning Observer
+ - Clicking should navigate to LO Blocks interface
+
+3. **Data persistence**:
+ - User progress in LO Blocks should save properly
+ - Data flows through websocket connection to Learning Observer
diff --git a/docs/how-to/dashboards.md b/docs/how-to/dashboards.md
new file mode 100644
index 000000000..f0f767fc5
--- /dev/null
+++ b/docs/how-to/dashboards.md
@@ -0,0 +1,281 @@
+# Dashboards
+
+We can create custom dashboards for the system.
+
+## Dash
+
+Dash is a package for writing and serving web applications directly in Python. In Dash, there are 2 primary items, 1) page components such as headers, divs, spans, etc. and 2) callbacks.
+
+### Getting Started with Dash
+
+Page components can be set up similar to other `html` layouts, like so
+
+```python
+from dash import html
+
+layout = html.Div([
+ html.H1(children='This is a header'),
+ html.Div(id='A'),
+ html.Div(id='B'),
+ html.Input(id='input')
+])
+# html version
+#