diff --git a/.gitignore b/.gitignore index fa05a12..3fa7193 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,12 @@ dist/ raw/ wiki/ inbox/ + +# Allow vault content in examples +!examples/**/.kb/ +!examples/**/raw/ +!examples/**/wiki/ +!examples/**/inbox/ *.tgz .env .env.* diff --git a/ROADMAP.md b/ROADMAP.md index 120a4e2..49c3f53 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -171,7 +171,7 @@ What's built, what's next, and what's deferred. - [x] `docs/skill-authoring.md` — how to create custom skills - [x] `docs/provider-config.md` — LLM provider setup guide - [x] `docs/architecture.md` — codebase architecture for contributors -- [ ] Example vaults in `examples/` directory (ML research, software docs, reading list) +- [x] Example vaults in `examples/` directory (ML research, software docs, reading list) - [ ] Blog post / launch announcement ### Testing & Quality diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..fc38058 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,53 @@ +# Example Vaults + +Pre-built kib vaults demonstrating different use cases. Each vault includes raw sources, compiled wiki articles, a knowledge graph, and a complete manifest — ready to explore with `kib search`, `kib query`, and `kib chat`. + +## Vaults + +### [`ml-research/`](ml-research/) + +A machine learning research vault built from seminal papers and tutorials. Demonstrates how kib distills technical content into interconnected concept articles. + +- **3 sources**: Attention Is All You Need, RL overview, backpropagation paper +- **7 articles**: attention mechanism, transformers, reinforcement learning, backpropagation, gradient descent, MDPs, researcher reference +- **Tags**: deep-learning, nlp, optimization, reinforcement-learning + +### [`software-docs/`](software-docs/) + +A software engineering knowledge base covering APIs, version control, CI/CD, and cloud-native patterns. Shows how kib organizes practical development knowledge. + +- **4 sources**: REST API tutorial, Git repo, CI article, Twelve-Factor App +- **8 articles**: REST API, HTTP methods, Git, version control, CI, CD, API design patterns, twelve-factor app +- **Tags**: api, git, ci-cd, devops, architecture + +### [`reading-list/`](reading-list/) + +A personal reading notes vault from non-fiction books and articles. Demonstrates kib as a tool for synthesizing ideas across books into a personal knowledge graph. + +- **4 sources**: Thinking Fast and Slow, Design of Everyday Things, Mental Models blog, Atomic Habits +- **10 articles**: cognitive biases, System 1/2, mental models, affordances, habit formation, decision making, design principles, atomic habits framework, Kahneman, Don Norman +- **Tags**: psychology, thinking, design, habits, decision-making + +## Try It + +```bash +# Browse a vault +cd examples/ml-research +kib status +kib search "attention" +kib query "How do transformers work?" + +# Or point kib at it from anywhere +kib --vault examples/reading-list search "cognitive biases" +``` + +## Using as a Starting Point + +These vaults are fully functional but static (the raw sources are excerpts, not full documents). To build your own vault from scratch: + +```bash +kib init my-vault +cd my-vault +kib ingest https://example.com/article +kib compile +``` diff --git a/examples/ml-research/.kb/config.toml b/examples/ml-research/.kb/config.toml new file mode 100644 index 0000000..3f9b170 --- /dev/null +++ b/examples/ml-research/.kb/config.toml @@ -0,0 +1,21 @@ +[provider] +default = "anthropic" +model = "claude-sonnet-4-20250514" +fast_model = "claude-haiku-4-5-20251001" + +[compile] +auto_index = true +auto_graph = true +max_sources_per_pass = 10 +categories = ["concepts", "topics", "references", "outputs"] +enrich_cross_refs = true + +[search] +engine = "hybrid" +max_results = 20 + +[skills] +[skills.hooks] +post-compile = [] +post-ingest = [] +post-lint = [] diff --git a/examples/ml-research/.kb/manifest.json b/examples/ml-research/.kb/manifest.json new file mode 100644 index 0000000..010828d --- /dev/null +++ b/examples/ml-research/.kb/manifest.json @@ -0,0 +1,146 @@ +{ + "version": "1", + "vault": { + "name": "ml-research", + "created": "2026-03-01T09:00:00.000Z", + "lastCompiled": "2026-03-15T14:30:00.000Z", + "provider": "anthropic", + "model": "claude-sonnet-4-20250514" + }, + "sources": { + "src_att001": { + "hash": "a1b2c3d4e5f60001", + "ingestedAt": "2026-03-01T09:05:00.000Z", + "lastCompiled": "2026-03-15T14:30:00.000Z", + "sourceType": "web", + "originalUrl": "https://arxiv.org/abs/1706.03762", + "producedArticles": ["attention-mechanism", "transformer-architecture", "vaswani-et-al"], + "metadata": { + "title": "Attention Is All You Need", + "author": "Vaswani et al.", + "date": "2017-06-12", + "wordCount": 9200 + } + }, + "src_rl002": { + "hash": "a1b2c3d4e5f60002", + "ingestedAt": "2026-03-05T11:00:00.000Z", + "lastCompiled": "2026-03-15T14:30:00.000Z", + "sourceType": "web", + "originalUrl": "https://lilianweng.github.io/posts/2018-02-19-rl-overview/", + "producedArticles": ["reinforcement-learning", "markov-decision-process"], + "metadata": { + "title": "A (Long) Peek into Reinforcement Learning", + "author": "Lilian Weng", + "date": "2018-02-19", + "wordCount": 12000 + } + }, + "src_bp003": { + "hash": "a1b2c3d4e5f60003", + "ingestedAt": "2026-03-10T08:30:00.000Z", + "lastCompiled": "2026-03-15T14:30:00.000Z", + "sourceType": "pdf", + "originalUrl": "https://www.nature.com/articles/323533a0", + "producedArticles": ["backpropagation", "gradient-descent"], + "metadata": { + "title": "Learning representations by back-propagating errors", + "author": "Rumelhart, Hinton, Williams", + "date": "1986-10-09", + "wordCount": 4500 + } + } + }, + "articles": { + "attention-mechanism": { + "hash": "wiki_hash_0001", + "createdAt": "2026-03-15T14:30:00.000Z", + "lastUpdated": "2026-03-15T14:30:00.000Z", + "derivedFrom": ["src_att001"], + "backlinks": ["transformer-architecture"], + "forwardLinks": ["transformer-architecture", "backpropagation"], + "tags": ["deep-learning", "nlp", "attention"], + "summary": "Mechanism that lets models dynamically focus on relevant parts of the input", + "wordCount": 380, + "category": "concept" + }, + "backpropagation": { + "hash": "wiki_hash_0002", + "createdAt": "2026-03-15T14:30:00.000Z", + "lastUpdated": "2026-03-15T14:30:00.000Z", + "derivedFrom": ["src_bp003"], + "backlinks": ["attention-mechanism", "gradient-descent"], + "forwardLinks": ["gradient-descent"], + "tags": ["optimization", "neural-networks", "training"], + "summary": "Algorithm for computing gradients in neural networks via the chain rule", + "wordCount": 350, + "category": "concept" + }, + "gradient-descent": { + "hash": "wiki_hash_0003", + "createdAt": "2026-03-15T14:30:00.000Z", + "lastUpdated": "2026-03-15T14:30:00.000Z", + "derivedFrom": ["src_bp003"], + "backlinks": ["backpropagation"], + "forwardLinks": ["backpropagation"], + "tags": ["optimization", "training"], + "summary": "Iterative optimization algorithm that follows the negative gradient to minimize loss", + "wordCount": 290, + "category": "concept" + }, + "transformer-architecture": { + "hash": "wiki_hash_0004", + "createdAt": "2026-03-15T14:30:00.000Z", + "lastUpdated": "2026-03-15T14:30:00.000Z", + "derivedFrom": ["src_att001"], + "backlinks": ["attention-mechanism"], + "forwardLinks": ["attention-mechanism", "reinforcement-learning"], + "tags": ["deep-learning", "nlp", "architecture"], + "summary": "Neural network architecture based entirely on self-attention, replacing recurrence and convolutions", + "wordCount": 520, + "category": "topic" + }, + "reinforcement-learning": { + "hash": "wiki_hash_0005", + "createdAt": "2026-03-15T14:30:00.000Z", + "lastUpdated": "2026-03-15T14:30:00.000Z", + "derivedFrom": ["src_rl002"], + "backlinks": ["transformer-architecture"], + "forwardLinks": ["markov-decision-process"], + "tags": ["reinforcement-learning", "agents", "rewards"], + "summary": "Learning paradigm where agents learn optimal behavior through trial-and-error interaction with an environment", + "wordCount": 480, + "category": "topic" + }, + "markov-decision-process": { + "hash": "wiki_hash_0006", + "createdAt": "2026-03-15T14:30:00.000Z", + "lastUpdated": "2026-03-15T14:30:00.000Z", + "derivedFrom": ["src_rl002"], + "backlinks": ["reinforcement-learning"], + "forwardLinks": ["reinforcement-learning"], + "tags": ["reinforcement-learning", "math", "probability"], + "summary": "Mathematical framework for modeling sequential decision-making under uncertainty", + "wordCount": 310, + "category": "concept" + }, + "vaswani-et-al": { + "hash": "wiki_hash_0007", + "createdAt": "2026-03-15T14:30:00.000Z", + "lastUpdated": "2026-03-15T14:30:00.000Z", + "derivedFrom": ["src_att001"], + "backlinks": [], + "forwardLinks": ["attention-mechanism", "transformer-architecture"], + "tags": ["researchers", "google-brain"], + "summary": "Authors of the 2017 'Attention Is All You Need' paper that introduced the transformer", + "wordCount": 200, + "category": "reference" + } + }, + "stats": { + "totalSources": 3, + "totalArticles": 7, + "totalWords": 2530, + "lastLintAt": "2026-03-15T15:00:00.000Z" + } +} diff --git a/examples/ml-research/README.md b/examples/ml-research/README.md new file mode 100644 index 0000000..d09cad7 --- /dev/null +++ b/examples/ml-research/README.md @@ -0,0 +1,29 @@ +# ML Research Vault + +A machine learning research knowledge base built from foundational papers and tutorials. + +## Sources + +| Source | Type | Articles Produced | +|--------|------|-------------------| +| Attention Is All You Need (Vaswani et al., 2017) | web | attention-mechanism, transformer-architecture, vaswani-et-al | +| A (Long) Peek into Reinforcement Learning (Lilian Weng) | web | reinforcement-learning, markov-decision-process | +| Learning representations by back-propagating errors (Rumelhart et al., 1986) | pdf | backpropagation, gradient-descent | + +## Knowledge Graph + +``` +attention-mechanism ─── transformer-architecture + │ │ + backpropagation reinforcement-learning + │ │ + gradient-descent markov-decision-process +``` + +## Try It + +```bash +kib search "attention" +kib query "Explain how transformers replaced RNNs" +kib skill run explain --args '{"topic": "backpropagation", "level": "beginner"}' +``` diff --git a/examples/ml-research/raw/articles/attention-is-all-you-need.md b/examples/ml-research/raw/articles/attention-is-all-you-need.md new file mode 100644 index 0000000..aa2a922 --- /dev/null +++ b/examples/ml-research/raw/articles/attention-is-all-you-need.md @@ -0,0 +1,47 @@ +--- +title: "Attention Is All You Need" +source_type: web +url: "https://arxiv.org/abs/1706.03762" +author: "Vaswani et al." +date: "2017-06-12" +ingested: "2026-03-01" +word_count: 9200 +--- + +# Attention Is All You Need + +The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. + +## Architecture + +The Transformer follows an encoder-decoder structure using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder. + +### Encoder + +The encoder is composed of a stack of N=6 identical layers. Each layer has two sub-layers: a multi-head self-attention mechanism and a simple position-wise fully connected feed-forward network. We employ a residual connection around each of the two sub-layers, followed by layer normalization. + +### Decoder + +The decoder is also composed of a stack of N=6 identical layers. In addition to the two sub-layers in each encoder layer, the decoder inserts a third sub-layer which performs multi-head attention over the output of the encoder stack. + +## Attention + +An attention function maps a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is computed as a weighted sum of the values, where the weight assigned to each value is computed by a compatibility function of the query with the corresponding key. + +### Scaled Dot-Product Attention + +We compute the attention function on a set of queries simultaneously, packed together into a matrix Q. The keys and values are also packed together into matrices K and V: + +Attention(Q, K, V) = softmax(QK^T / sqrt(d_k)) V + +### Multi-Head Attention + +Instead of performing a single attention function, we found it beneficial to linearly project the queries, keys and values h times with different, learned linear projections. On each of these projected versions we perform the attention function in parallel, yielding d_v-dimensional output values. + +## Results + +The Transformer achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results by over 2 BLEU. On the English-to-French translation task, our model achieves 41.0 BLEU, outperforming all previously published single models. + +## Training + +We trained on the WMT 2014 English-German dataset consisting of about 4.5 million sentence pairs. Training took 3.5 days on 8 P100 GPUs. diff --git a/examples/ml-research/raw/articles/rl-overview.md b/examples/ml-research/raw/articles/rl-overview.md new file mode 100644 index 0000000..90b2147 --- /dev/null +++ b/examples/ml-research/raw/articles/rl-overview.md @@ -0,0 +1,55 @@ +--- +title: "A (Long) Peek into Reinforcement Learning" +source_type: web +url: "https://lilianweng.github.io/posts/2018-02-19-rl-overview/" +author: "Lilian Weng" +date: "2018-02-19" +ingested: "2026-03-05" +word_count: 12000 +--- + +# A (Long) Peek into Reinforcement Learning + +Reinforcement learning (RL) is one approach to machine learning where an agent learns to make decisions by interacting with an environment. Unlike supervised learning, the agent isn't told what the correct action is — instead it discovers which actions yield the most reward through trial and error. + +## Key Concepts + +### Agent and Environment + +The agent is the learner and decision-maker. The environment is everything the agent interacts with. At each time step, the agent observes the current state, takes an action, and receives a reward signal along with the next state. + +### Markov Decision Process + +A Markov Decision Process (MDP) provides the mathematical framework for RL. An MDP is defined by: +- S: a set of states +- A: a set of actions +- P(s'|s,a): transition probability function +- R(s,a): reward function +- γ: discount factor (0 ≤ γ ≤ 1) + +The Markov property states that the future depends only on the current state, not the history of states. + +### Policy + +A policy π(a|s) defines the agent's behavior — the probability of taking action a in state s. The goal is to find a policy that maximizes the expected cumulative reward. + +### Value Functions + +The state-value function V_π(s) represents the expected return starting from state s and following policy π. The action-value function Q_π(s,a) represents the expected return starting from state s, taking action a, then following policy π. + +## Methods + +### Model-Free Methods + +**Q-Learning**: An off-policy method that learns the optimal Q-function directly. The update rule: +Q(s,a) ← Q(s,a) + α[r + γ max_a' Q(s',a') - Q(s,a)] + +**SARSA**: An on-policy variant that updates based on the action actually taken. + +**Policy Gradient**: Instead of learning a value function, directly optimize the policy. REINFORCE is the simplest policy gradient method, using Monte Carlo sampling to estimate the gradient. + +### Deep Reinforcement Learning + +DQN (Deep Q-Network) uses a neural network to approximate the Q-function, enabling RL in high-dimensional state spaces like raw pixels. Key innovations include experience replay and target networks for stability. + +Policy gradient methods scale naturally with deep networks. Actor-critic methods combine value function estimation with policy optimization — the critic evaluates, the actor improves. diff --git a/examples/ml-research/raw/papers/backprop-paper.md b/examples/ml-research/raw/papers/backprop-paper.md new file mode 100644 index 0000000..b115382 --- /dev/null +++ b/examples/ml-research/raw/papers/backprop-paper.md @@ -0,0 +1,45 @@ +--- +title: "Learning representations by back-propagating errors" +source_type: pdf +url: "https://www.nature.com/articles/323533a0" +author: "Rumelhart, Hinton, Williams" +date: "1986-10-09" +ingested: "2026-03-10" +word_count: 4500 +--- + +# Learning representations by back-propagating errors + +We describe a new learning procedure, back-propagation, for networks of neurone-like units. The procedure repeatedly adjusts the weights of the connections in the network so as to minimize a measure of the difference between the actual output vector of the net and the desired output vector. + +## The Learning Procedure + +The total input x_j to unit j is a linear function of the outputs y_i of the units connected to j and the weights w_ji on these connections: + +x_j = Σ_i y_i w_ji + +The output of a unit is a non-linear function of its total input: + +y_j = 1 / (1 + e^(-x_j)) + +We define a measure of error E as: + +E = 1/2 Σ_j (y_j - d_j)^2 + +where d_j is the desired output of unit j. To minimize E by gradient descent, we need the partial derivative of E with respect to each weight: + +∂E/∂w_ji = ∂E/∂x_j · ∂x_j/∂w_ji = δ_j · y_i + +For output units, δ_j = (y_j - d_j) · y_j(1 - y_j). + +For hidden units, we back-propagate: δ_j = y_j(1 - y_j) Σ_k δ_k w_kj + +This allows us to compute the gradient for weights in any layer, enabling training of multi-layer networks. + +## Results + +We demonstrate the procedure on several tasks, showing that back-propagation creates useful internal representations in the hidden units. The network discovers features that are not explicitly present in the input or output — it learns to represent the structure of the task. + +## Significance + +Before this work, there was no known efficient method for training multi-layer neural networks. Backpropagation made deep networks trainable and is still the foundation of modern deep learning. diff --git a/examples/ml-research/wiki/GRAPH.md b/examples/ml-research/wiki/GRAPH.md new file mode 100644 index 0000000..82fb585 --- /dev/null +++ b/examples/ml-research/wiki/GRAPH.md @@ -0,0 +1,9 @@ +# Knowledge Graph + +attention-mechanism -> transformer-architecture, backpropagation +backpropagation -> gradient-descent +gradient-descent -> backpropagation +markov-decision-process -> reinforcement-learning +reinforcement-learning -> markov-decision-process +transformer-architecture -> attention-mechanism, reinforcement-learning, vaswani-et-al +vaswani-et-al -> attention-mechanism, transformer-architecture diff --git a/examples/ml-research/wiki/INDEX.md b/examples/ml-research/wiki/INDEX.md new file mode 100644 index 0000000..2b1cc0d --- /dev/null +++ b/examples/ml-research/wiki/INDEX.md @@ -0,0 +1,21 @@ +# Knowledge Base Index + +> 7 articles | 2530 words | Last compiled: 2026-03-15T14:30:00.000Z + +## Concepts + +- **[Attention Mechanism](concepts/attention-mechanism.md)** — Mechanism that lets models dynamically focus on relevant parts of the input `#deep-learning` `#nlp` `#attention` +- **[Backpropagation](concepts/backpropagation.md)** — Algorithm for computing gradients in neural networks via the chain rule `#optimization` `#neural-networks` `#training` +- **[Gradient Descent](concepts/gradient-descent.md)** — Iterative optimization algorithm that follows the negative gradient to minimize loss `#optimization` `#training` +- **[Markov Decision Process](concepts/markov-decision-process.md)** — Mathematical framework for modeling sequential decision-making under uncertainty `#reinforcement-learning` `#math` `#probability` + +## Topics + +- **[Reinforcement Learning](topics/reinforcement-learning.md)** — Learning paradigm where agents learn optimal behavior through trial-and-error interaction with an environment `#reinforcement-learning` `#agents` `#rewards` +- **[Transformer Architecture](topics/transformer-architecture.md)** — Neural network architecture based entirely on self-attention, replacing recurrence and convolutions `#deep-learning` `#nlp` `#architecture` + +## References + +- **[Vaswani et al.](references/vaswani-et-al.md)** — Authors of the 2017 'Attention Is All You Need' paper that introduced the transformer `#researchers` `#google-brain` + +## Outputs diff --git a/examples/ml-research/wiki/concepts/attention-mechanism.md b/examples/ml-research/wiki/concepts/attention-mechanism.md new file mode 100644 index 0000000..c543b83 --- /dev/null +++ b/examples/ml-research/wiki/concepts/attention-mechanism.md @@ -0,0 +1,44 @@ +--- +title: "Attention Mechanism" +slug: attention-mechanism +category: concept +tags: [deep-learning, nlp, attention] +sources: [src_att001] +created: "2026-03-15T14:30:00.000Z" +updated: "2026-03-15T14:30:00.000Z" +summary: "Mechanism that lets models dynamically focus on relevant parts of the input" +--- + +# Attention Mechanism + +The attention mechanism allows neural networks to dynamically focus on the most relevant parts of the input when producing each element of the output. Rather than compressing an entire input sequence into a single fixed-length vector, attention computes a weighted combination of all input positions, with weights determined by relevance to the current output step. + +## Scaled Dot-Product Attention + +The core operation computes attention scores between queries (Q) and keys (K), then uses those scores to weight the values (V): + +``` +Attention(Q, K, V) = softmax(QK^T / sqrt(d_k)) V +``` + +The scaling factor `sqrt(d_k)` prevents dot products from growing too large in high-dimensional spaces, which would push softmax into regions with vanishingly small gradients. + +## Multi-Head Attention + +Instead of computing a single attention function, the [[transformer-architecture]] uses multiple "heads" that attend to different representation subspaces. Each head independently computes attention with its own learned projections, and the results are concatenated and projected back. + +This enables the model to simultaneously attend to information from different positions and different feature dimensions. + +## Self-Attention vs Cross-Attention + +- **Self-attention**: queries, keys, and values all come from the same sequence. Each position attends to every other position in the same input. +- **Cross-attention**: queries come from one sequence (e.g., decoder), while keys and values come from another (e.g., encoder output). + +## Training + +Attention weights are learned end-to-end through [[backpropagation]] — no explicit supervision is needed to tell the model what to attend to. The model discovers useful attention patterns from the task loss signal alone. + +## See Also + +- [[transformer-architecture]] +- [[backpropagation]] diff --git a/examples/ml-research/wiki/concepts/backpropagation.md b/examples/ml-research/wiki/concepts/backpropagation.md new file mode 100644 index 0000000..5073a1e --- /dev/null +++ b/examples/ml-research/wiki/concepts/backpropagation.md @@ -0,0 +1,42 @@ +--- +title: "Backpropagation" +slug: backpropagation +category: concept +tags: [optimization, neural-networks, training] +sources: [src_bp003] +created: "2026-03-15T14:30:00.000Z" +updated: "2026-03-15T14:30:00.000Z" +summary: "Algorithm for computing gradients in neural networks via the chain rule" +--- + +# Backpropagation + +Backpropagation (back-propagation of errors) is the algorithm that makes training deep neural networks practical. It efficiently computes the gradient of a loss function with respect to every weight in the network by applying the chain rule of calculus, layer by layer, from output back to input. + +## How It Works + +1. **Forward pass**: Input flows through the network, each layer computing activations, producing a final output and a loss value. +2. **Backward pass**: Starting from the loss, compute the gradient at each layer by multiplying local gradients backward through the network. +3. **Weight update**: Use the computed gradients with [[gradient-descent]] (or a variant like Adam) to adjust weights in the direction that reduces the loss. + +## The Key Insight + +For a hidden unit j, the error signal δ_j is computed by propagating errors backward from the layers above: + +``` +δ_j = f'(x_j) · Σ_k δ_k · w_kj +``` + +This recursive formula means we can compute gradients for every weight in an arbitrarily deep network in a single backward pass — the same computational cost as the forward pass. + +## Historical Significance + +Rumelhart, Hinton, and Williams popularized backpropagation in their 1986 Nature paper. Before this work, there was no known efficient method to train multi-layer networks. Backpropagation showed that hidden layers could learn useful internal representations without explicit supervision, unlocking the power of deep learning. + +## Modern Usage + +Every modern deep learning framework (PyTorch, TensorFlow, JAX) implements automatic differentiation, which generalizes backpropagation to arbitrary computation graphs. The core idea remains identical. + +## See Also + +- [[gradient-descent]] diff --git a/examples/ml-research/wiki/concepts/gradient-descent.md b/examples/ml-research/wiki/concepts/gradient-descent.md new file mode 100644 index 0000000..8539644 --- /dev/null +++ b/examples/ml-research/wiki/concepts/gradient-descent.md @@ -0,0 +1,44 @@ +--- +title: "Gradient Descent" +slug: gradient-descent +category: concept +tags: [optimization, training] +sources: [src_bp003] +created: "2026-03-15T14:30:00.000Z" +updated: "2026-03-15T14:30:00.000Z" +summary: "Iterative optimization algorithm that follows the negative gradient to minimize loss" +--- + +# Gradient Descent + +Gradient descent is the foundational optimization algorithm in machine learning. It iteratively adjusts parameters in the direction that most rapidly decreases the loss function — the negative gradient. + +## Update Rule + +``` +θ ← θ - α · ∇L(θ) +``` + +Where θ represents the model parameters, α is the learning rate, and ∇L(θ) is the gradient of the loss function. The gradient tells us which direction increases the loss fastest, so we step in the opposite direction. + +## Variants + +### Stochastic Gradient Descent (SGD) + +Instead of computing the gradient over the entire dataset, SGD estimates it from a single random sample (or mini-batch). This is noisier but dramatically faster per step, and the noise actually helps escape shallow local minima. + +### Adam + +Adaptive Moment Estimation combines momentum (exponential moving average of gradients) with adaptive per-parameter learning rates (based on the second moment of gradients). It's the default optimizer for most deep learning tasks due to its robustness. + +### Learning Rate Schedules + +The learning rate α is crucial — too large and training diverges, too small and it stalls. Common schedules include linear warmup, cosine decay, and step decay. Modern approaches like the 1-cycle policy vary the learning rate dynamically throughout training. + +## Relationship to Backpropagation + +Gradient descent specifies *what to do* with gradients (step downhill). [[Backpropagation]] specifies *how to compute* them efficiently. Together they form the training loop for neural networks. + +## See Also + +- [[backpropagation]] diff --git a/examples/ml-research/wiki/concepts/markov-decision-process.md b/examples/ml-research/wiki/concepts/markov-decision-process.md new file mode 100644 index 0000000..a48d8c2 --- /dev/null +++ b/examples/ml-research/wiki/concepts/markov-decision-process.md @@ -0,0 +1,36 @@ +--- +title: "Markov Decision Process" +slug: markov-decision-process +category: concept +tags: [reinforcement-learning, math, probability] +sources: [src_rl002] +created: "2026-03-15T14:30:00.000Z" +updated: "2026-03-15T14:30:00.000Z" +summary: "Mathematical framework for modeling sequential decision-making under uncertainty" +--- + +# Markov Decision Process + +A Markov Decision Process (MDP) is the formal mathematical framework underlying [[reinforcement-learning]]. It models an agent making a sequence of decisions in an environment where outcomes are partly random and partly under the agent's control. + +## Definition + +An MDP is defined by the tuple (S, A, P, R, γ): + +- **S** — a set of states the environment can be in +- **A** — a set of actions available to the agent +- **P(s'|s,a)** — transition function giving the probability of reaching state s' after taking action a in state s +- **R(s,a)** — reward function, the immediate signal received after taking action a in state s +- **γ** — discount factor (0 ≤ γ ≤ 1), controlling how much future rewards are worth relative to immediate rewards + +## The Markov Property + +The defining assumption: the future depends only on the current state, not on how the agent got there. Formally: P(s_{t+1} | s_t, a_t) = P(s_{t+1} | s_t, a_t, s_{t-1}, a_{t-1}, ...). This "memoryless" property makes the math tractable. + +## Value Functions + +The **state-value function** V_π(s) gives the expected cumulative discounted reward from state s under policy π. The **action-value function** Q_π(s,a) gives the expected return from taking action a in state s, then following π. The Bellman equations express these recursively, forming the basis for most RL algorithms. + +## See Also + +- [[reinforcement-learning]] diff --git a/examples/ml-research/wiki/references/vaswani-et-al.md b/examples/ml-research/wiki/references/vaswani-et-al.md new file mode 100644 index 0000000..30d53c6 --- /dev/null +++ b/examples/ml-research/wiki/references/vaswani-et-al.md @@ -0,0 +1,34 @@ +--- +title: "Vaswani et al." +slug: vaswani-et-al +category: reference +tags: [researchers, google-brain] +sources: [src_att001] +created: "2026-03-15T14:30:00.000Z" +updated: "2026-03-15T14:30:00.000Z" +summary: "Authors of the 2017 'Attention Is All You Need' paper that introduced the transformer" +--- + +# Vaswani et al. + +Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, and Illia Polosukhin authored "Attention Is All You Need" (2017), the paper that introduced the [[transformer-architecture]]. + +## The Paper + +Published at NeurIPS 2017, the paper proposed replacing recurrent and convolutional layers entirely with [[attention-mechanism]] for sequence transduction. It achieved state-of-the-art results on English-German and English-French translation benchmarks while being significantly more parallelizable and faster to train. + +## Impact + +As of 2026, the paper has over 130,000 citations, making it one of the most cited papers in computer science history. The Transformer architecture it introduced is the foundation of virtually every modern large language model (GPT, BERT, PaLM, Claude, Llama, etc.) and has been adapted for vision, audio, proteins, and robotics. + +## Notable Authors + +- **Ashish Vaswani**: Co-founded Essential AI +- **Noam Shazeer**: Co-founded Character.AI, returned to Google +- **Aidan Gomez**: Co-founded Cohere +- **Illia Polosukhin**: Co-founded NEAR Protocol + +## See Also + +- [[attention-mechanism]] +- [[transformer-architecture]] diff --git a/examples/ml-research/wiki/topics/reinforcement-learning.md b/examples/ml-research/wiki/topics/reinforcement-learning.md new file mode 100644 index 0000000..599a300 --- /dev/null +++ b/examples/ml-research/wiki/topics/reinforcement-learning.md @@ -0,0 +1,55 @@ +--- +title: "Reinforcement Learning" +slug: reinforcement-learning +category: topic +tags: [reinforcement-learning, agents, rewards] +sources: [src_rl002] +created: "2026-03-15T14:30:00.000Z" +updated: "2026-03-15T14:30:00.000Z" +summary: "Learning paradigm where agents learn optimal behavior through trial-and-error interaction with an environment" +--- + +# Reinforcement Learning + +Reinforcement learning (RL) is a machine learning paradigm where an agent learns to make decisions by interacting with an environment. Unlike supervised learning (where correct answers are provided) or unsupervised learning (where no feedback exists), RL agents receive reward signals that indicate how good their actions were — but must discover the best strategy through exploration. + +## Core Loop + +1. Agent observes state s_t from the environment +2. Agent selects action a_t based on its policy +3. Environment transitions to state s_{t+1} and emits reward r_t +4. Agent updates its policy to maximize cumulative future reward + +This interaction is formalized as a [[markov-decision-process]]. + +## Exploration vs Exploitation + +The fundamental tension in RL: should the agent exploit what it already knows works, or explore new actions that might yield higher rewards? Common strategies include ε-greedy (random action with probability ε), UCB (upper confidence bounds), and entropy regularization. + +## Key Methods + +### Value-Based Methods + +Learn a value function (Q-values) that estimates expected return for each state-action pair. The agent acts greedily with respect to these values. + +- **Q-Learning**: off-policy, learns optimal Q directly +- **DQN**: Q-learning with neural network function approximation, experience replay, and target networks + +### Policy-Based Methods + +Directly optimize the policy function π(a|s) using gradient ascent on expected return. + +- **REINFORCE**: Monte Carlo policy gradient +- **PPO**: Proximal Policy Optimization, clips the gradient to prevent destructive updates — the most widely used policy gradient method + +### Actor-Critic + +Combines both: an actor (policy) decides actions, a critic (value function) evaluates them. The critic reduces variance of the policy gradient estimate, stabilizing training. + +## Modern RL + +RLHF (Reinforcement Learning from Human Feedback) applies RL to align language models with human preferences. A reward model trained on human comparisons replaces the environment reward, and PPO fine-tunes the LLM to maximize it. + +## See Also + +- [[markov-decision-process]] diff --git a/examples/ml-research/wiki/topics/transformer-architecture.md b/examples/ml-research/wiki/topics/transformer-architecture.md new file mode 100644 index 0000000..ac494ec --- /dev/null +++ b/examples/ml-research/wiki/topics/transformer-architecture.md @@ -0,0 +1,48 @@ +--- +title: "Transformer Architecture" +slug: transformer-architecture +category: topic +tags: [deep-learning, nlp, architecture] +sources: [src_att001] +created: "2026-03-15T14:30:00.000Z" +updated: "2026-03-15T14:30:00.000Z" +summary: "Neural network architecture based entirely on self-attention, replacing recurrence and convolutions" +--- + +# Transformer Architecture + +The Transformer, introduced by [[vaswani-et-al]] in 2017, is a neural network architecture that processes sequences using only [[attention-mechanism]] — no recurrence, no convolutions. It has become the dominant architecture in NLP and is increasingly adopted in vision, audio, and multimodal tasks. + +## Architecture + +### Encoder-Decoder Structure + +The original Transformer uses an encoder-decoder layout: + +- **Encoder**: 6 identical layers, each with multi-head self-attention followed by a position-wise feed-forward network. Residual connections and layer normalization wrap each sub-layer. +- **Decoder**: 6 identical layers with an additional cross-attention sub-layer that attends to encoder output. The self-attention is masked to prevent attending to future positions (causal masking). + +### Positional Encoding + +Since self-attention is permutation-invariant (order-agnostic), the model needs explicit position information. The original Transformer uses sinusoidal positional encodings added to the input embeddings. Modern variants use learned position embeddings or relative position schemes (RoPE, ALiBi). + +## Variants + +- **Encoder-only** (BERT): bidirectional self-attention, used for understanding tasks +- **Decoder-only** (GPT): causal self-attention, used for generation — now the dominant paradigm for LLMs +- **Encoder-decoder** (T5, BART): original layout, used for translation and summarization + +## Why It Works + +1. **Parallelism**: unlike RNNs, all positions are processed simultaneously +2. **Long-range dependencies**: every position can directly attend to every other position, regardless of distance +3. **Scalability**: performance scales reliably with model size, data, and compute (scaling laws) + +## Impact + +The Transformer enabled the modern era of large language models. GPT, BERT, T5, PaLM, Claude, and virtually every major LLM is based on it. The same architecture has been adapted for vision (ViT), protein structure (AlphaFold), and [[reinforcement-learning]] (Decision Transformer). + +## See Also + +- [[attention-mechanism]] +- [[reinforcement-learning]] diff --git a/examples/reading-list/.kb/config.toml b/examples/reading-list/.kb/config.toml new file mode 100644 index 0000000..0596805 --- /dev/null +++ b/examples/reading-list/.kb/config.toml @@ -0,0 +1,21 @@ +[provider] +default = "anthropic" +model = "claude-sonnet-4-20250514" +fast_model = "claude-haiku-4-5-20251001" + +[compile] +auto_index = true +auto_graph = true +max_sources_per_pass = 10 +categories = ["concepts", "topics", "references", "outputs"] +enrich_cross_refs = true + +[search] +engine = "builtin" +max_results = 20 + +[skills] +[skills.hooks] +post-compile = ["summarize"] +post-ingest = [] +post-lint = [] diff --git a/examples/reading-list/.kb/manifest.json b/examples/reading-list/.kb/manifest.json new file mode 100644 index 0000000..795e096 --- /dev/null +++ b/examples/reading-list/.kb/manifest.json @@ -0,0 +1,191 @@ +{ + "version": "1", + "vault": { + "name": "reading-list", + "created": "2026-01-05T10:00:00.000Z", + "lastCompiled": "2026-04-01T16:00:00.000Z", + "provider": "anthropic", + "model": "claude-sonnet-4-20250514" + }, + "sources": { + "src_tfas001": { + "hash": "c1d2e3f4a5b60001", + "ingestedAt": "2026-01-05T10:30:00.000Z", + "lastCompiled": "2026-04-01T16:00:00.000Z", + "sourceType": "file", + "producedArticles": ["cognitive-biases", "system-1-and-system-2", "daniel-kahneman"], + "metadata": { + "title": "Thinking, Fast and Slow — Book Notes", + "author": "Daniel Kahneman", + "date": "2011-10-25", + "wordCount": 8200 + } + }, + "src_doet002": { + "hash": "c1d2e3f4a5b60002", + "ingestedAt": "2026-01-20T14:00:00.000Z", + "lastCompiled": "2026-04-01T16:00:00.000Z", + "sourceType": "file", + "producedArticles": ["affordances", "design-principles", "don-norman"], + "metadata": { + "title": "The Design of Everyday Things — Book Notes", + "author": "Don Norman", + "date": "1988-01-01", + "wordCount": 6500 + } + }, + "src_mm003": { + "hash": "c1d2e3f4a5b60003", + "ingestedAt": "2026-02-10T09:00:00.000Z", + "lastCompiled": "2026-04-01T16:00:00.000Z", + "sourceType": "web", + "originalUrl": "https://fs.blog/mental-models/", + "producedArticles": ["mental-models", "decision-making"], + "metadata": { + "title": "Mental Models: The Best Way to Make Intelligent Decisions", + "wordCount": 5800 + } + }, + "src_ato004": { + "hash": "c1d2e3f4a5b60004", + "ingestedAt": "2026-03-15T11:00:00.000Z", + "lastCompiled": "2026-04-01T16:00:00.000Z", + "sourceType": "file", + "producedArticles": ["habit-formation", "atomic-habits-framework"], + "metadata": { + "title": "Atomic Habits — Book Notes", + "author": "James Clear", + "date": "2018-10-16", + "wordCount": 5400 + } + } + }, + "articles": { + "cognitive-biases": { + "hash": "wiki_hash_r001", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_tfas001"], + "backlinks": ["system-1-and-system-2", "decision-making"], + "forwardLinks": ["system-1-and-system-2", "mental-models", "decision-making"], + "tags": ["psychology", "thinking", "biases"], + "summary": "Systematic patterns of deviation from rational judgment, rooted in System 1 heuristics", + "wordCount": 420, + "category": "concept" + }, + "system-1-and-system-2": { + "hash": "wiki_hash_r002", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_tfas001"], + "backlinks": ["cognitive-biases"], + "forwardLinks": ["cognitive-biases", "decision-making"], + "tags": ["psychology", "thinking", "kahneman"], + "summary": "Kahneman's dual-process theory: fast intuitive System 1 vs slow deliberate System 2", + "wordCount": 390, + "category": "concept" + }, + "mental-models": { + "hash": "wiki_hash_r003", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_mm003"], + "backlinks": ["cognitive-biases", "decision-making", "affordances"], + "forwardLinks": ["decision-making", "cognitive-biases"], + "tags": ["thinking", "frameworks", "decision-making"], + "summary": "Simplified representations of how the world works, used as thinking tools for better decisions", + "wordCount": 360, + "category": "concept" + }, + "affordances": { + "hash": "wiki_hash_r004", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_doet002"], + "backlinks": ["design-principles"], + "forwardLinks": ["design-principles", "mental-models"], + "tags": ["design", "ux", "psychology"], + "summary": "Properties of an object that suggest how it can be used, bridging perception and action", + "wordCount": 310, + "category": "concept" + }, + "habit-formation": { + "hash": "wiki_hash_r005", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_ato004"], + "backlinks": ["atomic-habits-framework"], + "forwardLinks": ["atomic-habits-framework", "system-1-and-system-2"], + "tags": ["habits", "behavior", "psychology"], + "summary": "The neurological loop of cue, craving, response, and reward that creates automatic behaviors", + "wordCount": 350, + "category": "concept" + }, + "decision-making": { + "hash": "wiki_hash_r006", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_mm003"], + "backlinks": ["cognitive-biases", "system-1-and-system-2", "mental-models"], + "forwardLinks": ["mental-models", "cognitive-biases", "system-1-and-system-2"], + "tags": ["thinking", "decision-making", "rationality"], + "summary": "The process of choosing between options, improved by mental models and awareness of cognitive biases", + "wordCount": 440, + "category": "topic" + }, + "design-principles": { + "hash": "wiki_hash_r007", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_doet002"], + "backlinks": ["affordances"], + "forwardLinks": ["affordances"], + "tags": ["design", "ux", "usability"], + "summary": "Core principles from The Design of Everyday Things for creating intuitive, user-friendly products", + "wordCount": 380, + "category": "topic" + }, + "atomic-habits-framework": { + "hash": "wiki_hash_r008", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_ato004"], + "backlinks": ["habit-formation"], + "forwardLinks": ["habit-formation"], + "tags": ["habits", "behavior", "productivity"], + "summary": "James Clear's four laws for building good habits and breaking bad ones", + "wordCount": 370, + "category": "topic" + }, + "daniel-kahneman": { + "hash": "wiki_hash_r009", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_tfas001"], + "backlinks": [], + "forwardLinks": ["system-1-and-system-2", "cognitive-biases"], + "tags": ["people", "psychology", "nobel-prize"], + "summary": "Nobel Prize-winning psychologist who pioneered research on cognitive biases and dual-process theory", + "wordCount": 220, + "category": "reference" + }, + "don-norman": { + "hash": "wiki_hash_r010", + "createdAt": "2026-04-01T16:00:00.000Z", + "lastUpdated": "2026-04-01T16:00:00.000Z", + "derivedFrom": ["src_doet002"], + "backlinks": [], + "forwardLinks": ["affordances", "design-principles"], + "tags": ["people", "design", "ux"], + "summary": "Cognitive scientist and usability engineer who coined the term 'user experience'", + "wordCount": 190, + "category": "reference" + } + }, + "stats": { + "totalSources": 4, + "totalArticles": 10, + "totalWords": 3430, + "lastLintAt": "2026-04-01T16:30:00.000Z" + } +} diff --git a/examples/reading-list/README.md b/examples/reading-list/README.md new file mode 100644 index 0000000..19aead8 --- /dev/null +++ b/examples/reading-list/README.md @@ -0,0 +1,37 @@ +# Reading List Vault + +Personal reading notes synthesized into an interconnected knowledge graph. Demonstrates kib as a tool for connecting ideas across books. + +## Sources + +| Source | Type | Articles Produced | +|--------|------|-------------------| +| Thinking, Fast and Slow (Kahneman) | file | cognitive-biases, system-1-and-system-2, daniel-kahneman | +| The Design of Everyday Things (Norman) | file | affordances, design-principles, don-norman | +| Mental Models (fs.blog) | web | mental-models, decision-making | +| Atomic Habits (Clear) | file | habit-formation, atomic-habits-framework | + +## Knowledge Graph + +``` +daniel-kahneman + │ +system-1-and-system-2 ─── cognitive-biases + │ + mental-models ─── decision-making + +don-norman + │ +affordances ─── design-principles + +habit-formation ─── atomic-habits-framework +``` + +## Try It + +```bash +kib search "cognitive" +kib query "How do System 1 and System 2 affect decision making?" +kib skill run connections +kib skill run find-contradictions +``` diff --git a/examples/reading-list/raw/articles/atomic-habits.md b/examples/reading-list/raw/articles/atomic-habits.md new file mode 100644 index 0000000..0b6b8c4 --- /dev/null +++ b/examples/reading-list/raw/articles/atomic-habits.md @@ -0,0 +1,59 @@ +--- +title: "Atomic Habits — Book Notes" +source_type: file +author: "James Clear" +date: "2018-10-16" +ingested: "2026-03-15" +word_count: 5400 +--- + +# Atomic Habits — Book Notes + +James Clear's practical framework for building good habits and breaking bad ones. The core insight: tiny changes compound over time. Getting 1% better every day makes you 37x better over a year. + +## The Habit Loop + +Every habit follows a four-step loop: + +1. **Cue** — triggers the brain to initiate a behavior +2. **Craving** — the motivational force behind every habit +3. **Response** — the actual habit you perform +4. **Reward** — the end goal that satisfies the craving and teaches the brain + +## The Four Laws of Behavior Change + +### 1st Law: Make It Obvious (Cue) +- **Implementation intention**: "I will [BEHAVIOR] at [TIME] in [LOCATION]" +- **Habit stacking**: "After I [CURRENT HABIT], I will [NEW HABIT]" +- **Environment design**: make cues for good habits visible, invisible for bad ones +- The most powerful trigger isn't motivation — it's environment + +### 2nd Law: Make It Attractive (Craving) +- **Temptation bundling**: pair a habit you need with one you want +- **Join a culture where your desired behavior is normal** +- **Reframe**: "I get to" instead of "I have to" +- Dopamine is released in anticipation of the reward, not the reward itself + +### 3rd Law: Make It Easy (Response) +- **Reduce friction**: decrease the number of steps between you and good habits +- **Two-minute rule**: downscale habits to two minutes. "Read 30 pages" → "Read one page" +- **Automate**: use technology to lock in behavior (auto-invest, app blockers) +- Master the habit of showing up before optimizing + +### 4th Law: Make It Satisfying (Reward) +- **Immediate reward**: add an immediate pleasure to habits with delayed benefits +- **Habit tracking**: "Don't break the chain" — visual measure of progress +- **Never miss twice**: missing once is an accident, twice is the start of a new habit +- The first three laws increase the odds of doing it this time; the fourth increases the odds of repeating it + +## Identity-Based Habits + +Don't focus on goals ("I want to lose weight"). Focus on identity ("I am someone who doesn't miss workouts"). Every action is a vote for the type of person you want to become. The real reason habits matter isn't the results — it's that they change your beliefs about yourself. + +## The Plateau of Latent Potential + +Progress isn't linear. You put in work and see no results for weeks or months — then breakthrough happens all at once. Like an ice cube that doesn't melt at 25°, 26°, 27°... but suddenly melts at 32°. The work wasn't wasted. It was stored. + +## Key Takeaway + +You don't rise to the level of your goals. You fall to the level of your systems. Build the system — the habits will follow. diff --git a/examples/reading-list/raw/articles/design-of-everyday-things.md b/examples/reading-list/raw/articles/design-of-everyday-things.md new file mode 100644 index 0000000..c50d95b --- /dev/null +++ b/examples/reading-list/raw/articles/design-of-everyday-things.md @@ -0,0 +1,57 @@ +--- +title: "The Design of Everyday Things — Book Notes" +source_type: file +author: "Don Norman" +date: "1988-01-01" +ingested: "2026-01-20" +word_count: 6500 +--- + +# The Design of Everyday Things — Book Notes + +Don Norman's foundational text on human-centered design. The core argument: when people have trouble using things, the problem is in the design, not the user. + +## Chapter 1: The Psychopathology of Everyday Things + +### Affordances +An affordance is a relationship between an object's properties and an agent's capabilities. A chair affords sitting. A flat plate on a door affords pushing. A handle affords pulling. Good design makes affordances visible and obvious. + +### Signifiers +Signifiers communicate where the action should take place. A "Push" sign on a door is a signifier. Affordances determine what actions are possible; signifiers communicate where and how to act. Designers should focus on signifiers. + +### Mapping +The relationship between controls and their effects. Natural mapping exploits physical analogies and cultural standards. Moving a steering wheel left turns the car left — that's natural mapping. Light switches in a row controlling lights in a row should match their spatial layout. + +### Feedback +Information about the results of an action. Without feedback, people don't know if their action worked. Feedback must be immediate, informative, and not overwhelming. Too much feedback (beeping everything) is as bad as too little. + +### Conceptual Models +A simplified explanation of how something works. The user's mental model of a system determines how they use it. Good design communicates the correct conceptual model through affordances, signifiers, mapping, and feedback. + +## Chapter 2: The Psychology of Everyday Actions + +### The Gulf of Execution +The gap between what a user wants to do and what the system allows. How do I achieve my goal? Which controls do I use? Good design bridges this gulf with clear affordances and signifiers. + +### The Gulf of Evaluation +The gap between the system's state and the user's understanding of it. Did my action work? What state is the system in now? Good design bridges this gulf with clear feedback and a visible system state. + +### The Seven Stages of Action +1. Form the goal +2. Plan the action +3. Specify the action sequence +4. Perform the action +5. Perceive the state of the world +6. Interpret the perception +7. Compare with the goal + +## Chapter 5: Human Error? No, Bad Design + +Most "human errors" are really design errors. Slips (right intention, wrong action) and mistakes (wrong intention) have different causes and require different solutions. Design for error: make it hard to do irreversible things, easy to discover and correct errors. + +### Swiss Cheese Model +Accidents happen when holes in multiple layers of defense align. No single failure causes disaster — it's always a chain. Good design adds redundant layers with independent failure modes. + +## Key Takeaway + +Design for people as they are, not as you wish they were. Blame the design, not the user. Make important information visible, actions obvious, and errors recoverable. diff --git a/examples/reading-list/raw/articles/mental-models.md b/examples/reading-list/raw/articles/mental-models.md new file mode 100644 index 0000000..818aa0b --- /dev/null +++ b/examples/reading-list/raw/articles/mental-models.md @@ -0,0 +1,45 @@ +--- +title: "Mental Models: The Best Way to Make Intelligent Decisions" +source_type: web +url: "https://fs.blog/mental-models/" +ingested: "2026-02-10" +word_count: 5800 +--- + +# Mental Models: The Best Way to Make Intelligent Decisions + +A mental model is a simplified representation of how something works. You can't keep all the details of the world in your brain, so you use models to simplify the complex into understandable chunks. The quality of your thinking depends on the models in your head. + +## Why Mental Models Matter + +Charlie Munger argues that you need a "latticework of mental models" from multiple disciplines. If you only have a hammer, everything looks like a nail. But if you have a toolkit of models from psychology, physics, economics, biology, and engineering, you can see problems from multiple angles. + +## Core Mental Models + +### First Principles Thinking +Break down complex problems into their fundamental truths and reason up from there. Don't reason by analogy ("this is how it's always been done"). Elon Musk used first principles to question why batteries cost $600/kWh — the raw materials cost $80/kWh, so the cost is in the process, not the physics. + +### Inversion +Instead of asking "How do I achieve X?", ask "What would guarantee failure?" Then avoid those things. Charlie Munger: "All I want to know is where I'm going to die, so I'll never go there." Inversion helps you see obstacles that forward thinking misses. + +### Second-Order Thinking +Consider the consequences of the consequences. First-order thinking asks "What happens next?" Second-order thinking asks "And then what?" Resistance to antibiotics is a second-order consequence of their widespread use. Price controls lead to shortages (second-order) even though they lower prices (first-order). + +### The Map Is Not the Territory +All models are simplifications. The danger is confusing the model with reality. A financial model is not the economy. An org chart is not the culture. Always remember what the model leaves out. + +### Circle of Competence +Know what you know and what you don't. Stay within your circle when making decisions. Expand it deliberately over time. Warren Buffett only invests in businesses he understands — he doesn't try to predict tech trends. + +### Probabilistic Thinking +The world is uncertain. Think in probabilities, not certainties. Base rates matter more than narratives. Bayesian updating: adjust your beliefs as new evidence arrives. A 70% chance is not a certainty — calibrate your confidence. + +### Occam's Razor +The simplest explanation that fits the evidence is usually correct. Don't add complexity unless the simple explanation fails. If your horse has hoof prints and eats hay, it's probably a horse — not a zebra in disguise. + +### Hanlon's Razor +Never attribute to malice that which can be adequately explained by neglect, ignorance, or incompetence. Most screw-ups aren't conspiracies — they're honest mistakes. This model prevents paranoia and preserves relationships. + +## Building Your Latticework + +Read widely across disciplines. When you encounter a useful model, write it down and look for examples. Practice applying models to real situations. The goal isn't to memorize a list — it's to internalize a toolkit that shapes how you see the world. diff --git a/examples/reading-list/raw/articles/thinking-fast-and-slow.md b/examples/reading-list/raw/articles/thinking-fast-and-slow.md new file mode 100644 index 0000000..135ffb0 --- /dev/null +++ b/examples/reading-list/raw/articles/thinking-fast-and-slow.md @@ -0,0 +1,53 @@ +--- +title: "Thinking, Fast and Slow — Book Notes" +source_type: file +author: "Daniel Kahneman" +date: "2011-10-25" +ingested: "2026-01-05" +word_count: 8200 +--- + +# Thinking, Fast and Slow — Book Notes + +Daniel Kahneman's masterwork on how we think. The central thesis: our minds operate using two systems with very different characteristics. + +## Part I: Two Systems + +### System 1 — Fast Thinking +Operates automatically and quickly, with little or no effort and no sense of voluntary control. Examples: detecting hostility in a voice, reading words on a billboard, answering "2 + 2 = ?", driving on an empty road. + +System 1 generates impressions, feelings, and inclinations. When endorsed by System 2, these become beliefs, attitudes, and intentions. System 1 is always running — you can't turn it off. + +### System 2 — Slow Thinking +Allocates attention to effortful mental activities. Associated with the subjective experience of agency, choice, and concentration. Examples: filling out a tax form, checking the validity of a complex argument, counting occurrences of a letter on a page. + +System 2 has limited capacity. When engaged in one demanding task, its ability to handle others is degraded (ego depletion). + +## Part II: Heuristics and Biases + +### Anchoring +Exposure to a number influences subsequent numerical estimates. Even arbitrary anchors (spinning a wheel) affect judgment. Mechanism: System 1 adjusts from the anchor, but adjustment is typically insufficient. + +### Availability Heuristic +We judge the probability of events by how easily examples come to mind. Dramatic events (plane crashes) are overestimated; mundane risks (diabetes) are underestimated. Media coverage distorts perceived risk. + +### Representativeness +We judge probability by similarity to a prototype. "Linda is a bank teller who is active in the feminist movement" seems more likely than "Linda is a bank teller" — the conjunction fallacy. We neglect base rates in favor of stereotypical descriptions. + +### Loss Aversion +Losses loom larger than gains. Losing $100 feels roughly twice as bad as gaining $100 feels good. This explains risk aversion for gains but risk-seeking for losses (people gamble to avoid certain losses). + +## Part III: Overconfidence + +We are systematically overconfident in our predictions. The planning fallacy: projects almost always take longer and cost more than predicted. Experts are often no better than algorithms — and less aware of their limitations. + +### WYSIATI (What You See Is All There Is) +System 1 constructs the best possible story from available information and doesn't account for what it doesn't know. This produces overconfidence, framing effects, and base-rate neglect. + +## Part IV: Two Selves + +The experiencing self lives in the present. The remembering self constructs stories after the fact. The peak-end rule: memories of experiences are dominated by the most intense moment and the ending, not by duration. + +## Key Takeaway + +We cannot eliminate cognitive biases, but we can learn to recognize situations where they're likely to occur and slow down — engaging System 2 before making important decisions. diff --git a/examples/reading-list/wiki/GRAPH.md b/examples/reading-list/wiki/GRAPH.md new file mode 100644 index 0000000..79d124d --- /dev/null +++ b/examples/reading-list/wiki/GRAPH.md @@ -0,0 +1,12 @@ +# Knowledge Graph + +affordances -> design-principles, mental-models +atomic-habits-framework -> habit-formation +cognitive-biases -> system-1-and-system-2, mental-models, decision-making +daniel-kahneman -> system-1-and-system-2, cognitive-biases +decision-making -> mental-models, cognitive-biases, system-1-and-system-2 +design-principles -> affordances +don-norman -> affordances, design-principles +habit-formation -> atomic-habits-framework, system-1-and-system-2 +mental-models -> decision-making, cognitive-biases +system-1-and-system-2 -> cognitive-biases, decision-making diff --git a/examples/reading-list/wiki/INDEX.md b/examples/reading-list/wiki/INDEX.md new file mode 100644 index 0000000..f820ddf --- /dev/null +++ b/examples/reading-list/wiki/INDEX.md @@ -0,0 +1,24 @@ +# Knowledge Base Index + +> 10 articles | 3430 words | Last compiled: 2026-04-01T16:00:00.000Z + +## Concepts + +- **[Affordances](concepts/affordances.md)** — Properties of an object that suggest how it can be used, bridging perception and action `#design` `#ux` `#psychology` +- **[Cognitive Biases](concepts/cognitive-biases.md)** — Systematic patterns of deviation from rational judgment, rooted in System 1 heuristics `#psychology` `#thinking` `#biases` +- **[Habit Formation](concepts/habit-formation.md)** — The neurological loop of cue, craving, response, and reward that creates automatic behaviors `#habits` `#behavior` `#psychology` +- **[Mental Models](concepts/mental-models.md)** — Simplified representations of how the world works, used as thinking tools for better decisions `#thinking` `#frameworks` `#decision-making` +- **[System 1 and System 2](concepts/system-1-and-system-2.md)** — Kahneman's dual-process theory: fast intuitive System 1 vs slow deliberate System 2 `#psychology` `#thinking` `#kahneman` + +## Topics + +- **[Atomic Habits Framework](topics/atomic-habits-framework.md)** — James Clear's four laws for building good habits and breaking bad ones `#habits` `#behavior` `#productivity` +- **[Decision Making](topics/decision-making.md)** — The process of choosing between options, improved by mental models and awareness of cognitive biases `#thinking` `#decision-making` `#rationality` +- **[Design Principles](topics/design-principles.md)** — Core principles from The Design of Everyday Things for creating intuitive, user-friendly products `#design` `#ux` `#usability` + +## References + +- **[Daniel Kahneman](references/daniel-kahneman.md)** — Nobel Prize-winning psychologist who pioneered research on cognitive biases and dual-process theory `#people` `#psychology` `#nobel-prize` +- **[Don Norman](references/don-norman.md)** — Cognitive scientist and usability engineer who coined the term 'user experience' `#people` `#design` `#ux` + +## Outputs diff --git a/examples/reading-list/wiki/concepts/affordances.md b/examples/reading-list/wiki/concepts/affordances.md new file mode 100644 index 0000000..d412dd2 --- /dev/null +++ b/examples/reading-list/wiki/concepts/affordances.md @@ -0,0 +1,43 @@ +--- +title: "Affordances" +slug: affordances +category: concept +tags: [design, ux, psychology] +sources: [src_doet002] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "Properties of an object that suggest how it can be used, bridging perception and action" +--- + +# Affordances + +An affordance is a relationship between the properties of an object and the capabilities of an agent. A chair affords sitting (to a human). A flat plate on a door affords pushing. A handle affords pulling. [[don-norman]] popularized the concept in design, arguing that good design makes affordances discoverable. + +## Affordances vs Signifiers + +Norman later clarified an important distinction: + +- **Affordance**: what actions are actually possible (a button can be pressed) +- **Signifier**: what communicates where and how to act (the label on the button) + +A glass door affords both pushing and pulling — the affordance itself doesn't tell you which. You need a signifier (a handle shape, a push plate, a sign) to communicate the correct action. Designers should focus on signifiers. + +## Types of Affordances + +- **Visible**: the action possibility is obvious (a handle you can grab) +- **Hidden**: the action is possible but not obvious (a hidden button behind a panel) +- **False**: the perceived action is not actually possible (a placebo button at a crosswalk) +- **Negative**: actively preventing actions (a barrier that prevents entry) + +## In Digital Design + +Physical affordances don't translate directly to screens. A button on screen doesn't physically "afford" pressing — it uses visual signifiers (shadow, border, color) to suggest clickability. Flat design's rejection of these signifiers often creates usability problems. + +## Connection to [[mental-models]] + +Affordances shape users' mental models of how things work. When affordances match expectations, interaction is effortless. When they don't, users form incorrect [[mental-models]] and make errors — which are really [[design-principles]] failures. + +## See Also + +- [[design-principles]] +- [[mental-models]] diff --git a/examples/reading-list/wiki/concepts/cognitive-biases.md b/examples/reading-list/wiki/concepts/cognitive-biases.md new file mode 100644 index 0000000..9637f9c --- /dev/null +++ b/examples/reading-list/wiki/concepts/cognitive-biases.md @@ -0,0 +1,51 @@ +--- +title: "Cognitive Biases" +slug: cognitive-biases +category: concept +tags: [psychology, thinking, biases] +sources: [src_tfas001] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "Systematic patterns of deviation from rational judgment, rooted in System 1 heuristics" +--- + +# Cognitive Biases + +Cognitive biases are systematic errors in thinking that affect the decisions and judgments we make. They arise from [[system-1-and-system-2|System 1]]'s reliance on heuristics — mental shortcuts that are usually efficient but sometimes lead us astray. + +## Key Biases + +### Anchoring + +The first piece of information we encounter disproportionately influences subsequent judgments. If asked "Is the population of Turkey greater or less than 5 million?" people estimate lower than if anchored at 65 million. Even arbitrary numbers (spinning a wheel) create anchoring effects. + +### Availability Heuristic + +We estimate probability by how easily examples come to mind. Vivid, recent, or emotionally charged events are overweighted. This is why people overestimate the risk of plane crashes (dramatic, heavily covered) and underestimate the risk of heart disease (common, unremarkable). + +### Loss Aversion + +Losses hurt roughly twice as much as equivalent gains feel good. This explains why people hold losing investments too long, why sellers overvalue what they own (endowment effect), and why we prefer avoiding losses to acquiring gains. + +### Confirmation Bias + +We seek, interpret, and remember information that confirms what we already believe. We unconsciously avoid evidence that contradicts our views. This is perhaps the most pervasive and dangerous bias in everyday reasoning. + +### Sunk Cost Fallacy + +Continuing an endeavor because of previously invested resources (time, money, effort) rather than future value. "We've already spent $2M on this project" should be irrelevant — only future costs and benefits matter. + +## Debiasing + +You can't eliminate biases, but you can mitigate them: + +- Use [[mental-models]] as structured thinking tools +- Seek disconfirming evidence deliberately +- Use checklists and pre-mortems for important [[decision-making]] +- Slow down: engage System 2 when the stakes are high + +## See Also + +- [[system-1-and-system-2]] +- [[mental-models]] +- [[decision-making]] diff --git a/examples/reading-list/wiki/concepts/habit-formation.md b/examples/reading-list/wiki/concepts/habit-formation.md new file mode 100644 index 0000000..e19addc --- /dev/null +++ b/examples/reading-list/wiki/concepts/habit-formation.md @@ -0,0 +1,40 @@ +--- +title: "Habit Formation" +slug: habit-formation +category: concept +tags: [habits, behavior, psychology] +sources: [src_ato004] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "The neurological loop of cue, craving, response, and reward that creates automatic behaviors" +--- + +# Habit Formation + +Habits are behaviors that have become automatic through repetition. They follow a four-step neurological loop: **cue → craving → response → reward**. Once established, habits run on autopilot — transferring from deliberate [[system-1-and-system-2|System 2]] processing to automatic System 1. + +## The Habit Loop + +1. **Cue**: a trigger that initiates the behavior (time, location, emotional state, other people, preceding action) +2. **Craving**: the motivational force — not the habit itself, but the change in state it delivers (the smoker craves nicotine relief, not the cigarette) +3. **Response**: the actual behavior — which can be a thought or an action +4. **Reward**: the end goal that satisfies the craving and teaches the brain to repeat the loop + +## How Habits Form + +Repetition strengthens the neural pathways associated with a behavior. The brain gradually shifts processing from the prefrontal cortex (deliberate, effortful) to the basal ganglia (automatic, effortless). This is why habits are hard to break — the neural pathway remains even when the behavior stops. + +The [[atomic-habits-framework]] provides a practical system for leveraging each stage of the loop to build or break habits deliberately. + +## Environment > Motivation + +The most reliable way to change behavior is to change the environment. Cues trigger habits, and cues live in your environment. Want to eat healthier? Put fruit on the counter and hide the cookies. Want to read more? Put a book on your pillow. Motivation fluctuates; environment is constant. + +## Habit Stacking + +Link a new habit to an existing one: "After I pour my morning coffee, I will write in my journal for two minutes." The existing habit becomes the cue for the new one, piggybacking on an established neural pathway. + +## See Also + +- [[atomic-habits-framework]] +- [[system-1-and-system-2]] diff --git a/examples/reading-list/wiki/concepts/mental-models.md b/examples/reading-list/wiki/concepts/mental-models.md new file mode 100644 index 0000000..197b162 --- /dev/null +++ b/examples/reading-list/wiki/concepts/mental-models.md @@ -0,0 +1,46 @@ +--- +title: "Mental Models" +slug: mental-models +category: concept +tags: [thinking, frameworks, decision-making] +sources: [src_mm003] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "Simplified representations of how the world works, used as thinking tools for better decisions" +--- + +# Mental Models + +A mental model is a compressed representation of how some aspect of the world works. We use mental models constantly — to predict what will happen, to make sense of complex situations, and to guide [[decision-making]]. + +## Why They Matter + +Charlie Munger's "latticework" insight: if you only know one discipline, you force every problem through that lens. A toolkit of models from multiple fields (psychology, physics, economics, biology) lets you triangulate reality from different angles. + +"To the man with only a hammer, every problem looks like a nail." + +## Essential Models + +### First Principles Thinking +Decompose problems to their fundamental truths, then rebuild from there. Resist reasoning by analogy ("how has this been done before?"). Question assumptions at every level. + +### Inversion +Instead of thinking forward, think backward. What would guarantee failure? Avoid those things. What must be true for this to work? Verify each assumption. Inversion reveals hidden obstacles. + +### Second-Order Thinking +Think past the immediate consequence to the consequences of consequences. First-order: "rent control lowers prices." Second-order: "rent control lowers prices, which reduces housing supply, which increases scarcity." Most people stop at first-order effects. + +### Circle of Competence +Know what you know. Know what you don't know. Stay inside your circle for high-stakes decisions. Deliberately expand it over time through study and experience. + +### The Map Is Not the Territory +All models are simplifications. The danger is forgetting what the model omits. A financial model is not the economy. An org chart is not the culture. + +## Relationship to Biases + +Mental models are a corrective to [[cognitive-biases]]. Where biases are automatic (System 1), applying mental models is deliberate (System 2). A well-stocked latticework helps you notice when intuition might be wrong. + +## See Also + +- [[decision-making]] +- [[cognitive-biases]] diff --git a/examples/reading-list/wiki/concepts/system-1-and-system-2.md b/examples/reading-list/wiki/concepts/system-1-and-system-2.md new file mode 100644 index 0000000..d578c8c --- /dev/null +++ b/examples/reading-list/wiki/concepts/system-1-and-system-2.md @@ -0,0 +1,51 @@ +--- +title: "System 1 and System 2" +slug: system-1-and-system-2 +category: concept +tags: [psychology, thinking, kahneman] +sources: [src_tfas001] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "Kahneman's dual-process theory: fast intuitive System 1 vs slow deliberate System 2" +--- + +# System 1 and System 2 + +[[daniel-kahneman]]'s framework for understanding how we think. The mind operates with two distinct modes of processing that interact continuously. + +## System 1 — Fast + +- **Automatic**: operates without effort or voluntary control +- **Always on**: you can't shut it off +- **Pattern-matching**: excels at recognizing faces, reading emotions, completing phrases +- **Associative**: generates impressions, feelings, and intuitions + +System 1 is the source of our gut feelings. It's usually right — but its heuristics produce systematic [[cognitive-biases]] in predictable situations. + +**Examples**: detecting anger in a voice, driving on an empty road, understanding simple sentences, answering "2 + 2 = ?" + +## System 2 — Slow + +- **Effortful**: requires attention and concentration +- **Limited capacity**: can only handle one demanding task at a time +- **Lazy**: defers to System 1 whenever possible +- **Monitoring**: can override System 1, but often doesn't bother + +System 2 is what we think of as "thinking." It's logical, deliberate, and careful — but slow and energy-expensive. + +**Examples**: filling out a tax form, checking the validity of an argument, counting letters on a page, parallel parking in a tight spot + +## How They Interact + +System 1 runs continuously, generating suggestions (impressions, intuitions, intentions). System 2 monitors with low effort and usually endorses System 1's output. When System 1 encounters something surprising or difficult, it calls on System 2 for help. + +The problem: System 2 is lazy. It often rubber-stamps System 1's output even when it shouldn't. This is why [[cognitive-biases]] persist even when we know about them — knowing isn't enough; you need to engage System 2 deliberately. + +## Practical Implication + +For important [[decision-making]], force System 2 engagement: write down your reasoning, consider the opposite, use structured frameworks, and sleep on it. + +## See Also + +- [[cognitive-biases]] +- [[decision-making]] diff --git a/examples/reading-list/wiki/references/daniel-kahneman.md b/examples/reading-list/wiki/references/daniel-kahneman.md new file mode 100644 index 0000000..f604bff --- /dev/null +++ b/examples/reading-list/wiki/references/daniel-kahneman.md @@ -0,0 +1,35 @@ +--- +title: "Daniel Kahneman" +slug: daniel-kahneman +category: reference +tags: [people, psychology, nobel-prize] +sources: [src_tfas001] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "Nobel Prize-winning psychologist who pioneered research on cognitive biases and dual-process theory" +--- + +# Daniel Kahneman + +Daniel Kahneman (1934–2024) was an Israeli-American psychologist who won the 2002 Nobel Prize in Economics for his work on judgment and decision-making under uncertainty, conducted with Amos Tversky. + +## Key Contributions + +- **Prospect Theory** (1979, with Tversky): people evaluate outcomes relative to a reference point, are loss-averse, and overweight small probabilities. Foundational to behavioral economics. +- **Heuristics and Biases** program (1970s-80s, with Tversky): identified systematic [[cognitive-biases]] including anchoring, availability, and representativeness. +- **[[system-1-and-system-2]]** framework: the dual-process model of fast/intuitive vs slow/deliberate thinking, popularized in *Thinking, Fast and Slow* (2011). + +## Major Works + +- *Thinking, Fast and Slow* (2011) — synthesized decades of research into an accessible narrative +- "Judgment Under Uncertainty: Heuristics and Biases" (1974, Science) +- "Prospect Theory: An Analysis of Decision under Risk" (1979, Econometrica) + +## Legacy + +Kahneman's work fundamentally changed how we understand human rationality. He showed that deviations from rational choice aren't random errors — they're systematic and predictable. This insight launched behavioral economics, influenced public policy (nudge theory), and transformed how organizations think about [[decision-making]]. + +## See Also + +- [[system-1-and-system-2]] +- [[cognitive-biases]] diff --git a/examples/reading-list/wiki/references/don-norman.md b/examples/reading-list/wiki/references/don-norman.md new file mode 100644 index 0000000..5482975 --- /dev/null +++ b/examples/reading-list/wiki/references/don-norman.md @@ -0,0 +1,39 @@ +--- +title: "Don Norman" +slug: don-norman +category: reference +tags: [people, design, ux] +sources: [src_doet002] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "Cognitive scientist and usability engineer who coined the term 'user experience'" +--- + +# Don Norman + +Donald Arthur Norman (b. 1935) is an American cognitive scientist, usability engineer, and author. He is best known for his books on design, particularly *The Design of Everyday Things*, and for popularizing the concept of [[affordances]] in interaction design. + +## Key Contributions + +- Coined the term **"user experience"** while at Apple in the 1990s +- Popularized [[affordances]] and signifiers as design concepts +- Developed the "Gulf of Execution" and "Gulf of Evaluation" framework +- Articulated the [[design-principles]] that underpin modern UX: visibility, feedback, constraints, mapping, consistency + +## Career + +- Professor of cognitive science at UC San Diego +- VP of Advanced Technology at Apple (1993-1998) +- Co-founded the Nielsen Norman Group with Jakob Nielsen +- Director of The Design Lab at UC San Diego + +## Major Works + +- *The Design of Everyday Things* (1988, originally "The Psychology of Everyday Things") +- *Emotional Design* (2004) — why attractive things work better +- *Living with Complexity* (2010) — complexity isn't the enemy, confusion is + +## See Also + +- [[affordances]] +- [[design-principles]] diff --git a/examples/reading-list/wiki/topics/atomic-habits-framework.md b/examples/reading-list/wiki/topics/atomic-habits-framework.md new file mode 100644 index 0000000..6e765e8 --- /dev/null +++ b/examples/reading-list/wiki/topics/atomic-habits-framework.md @@ -0,0 +1,64 @@ +--- +title: "Atomic Habits Framework" +slug: atomic-habits-framework +category: topic +tags: [habits, behavior, productivity] +sources: [src_ato004] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "James Clear's four laws for building good habits and breaking bad ones" +--- + +# Atomic Habits Framework + +James Clear's practical system for behavior change, built on the [[habit-formation]] loop. Each law targets one stage of the cue-craving-response-reward cycle. + +## The Four Laws + +### 1st Law: Make It Obvious (Cue) + +Design your environment so cues for good habits are visible and cues for bad habits are invisible. + +- **Implementation intention**: "I will [BEHAVIOR] at [TIME] in [LOCATION]" +- **Habit stacking**: "After [CURRENT HABIT], I will [NEW HABIT]" +- **Environment design**: put the guitar in the living room, not the closet + +**To break a bad habit**: make the cue invisible. Remove the trigger from your environment entirely. + +### 2nd Law: Make It Attractive (Craving) + +The more attractive a behavior, the more likely it becomes habit. + +- **Temptation bundling**: pair something you need to do with something you want to do +- **Social pressure**: join a group where your desired behavior is the norm +- **Reframe the narrative**: "I get to exercise" not "I have to exercise" + +**To break a bad habit**: reframe it to highlight the costs rather than the benefits. + +### 3rd Law: Make It Easy (Response) + +Reduce friction for good habits, increase friction for bad ones. + +- **Two-minute rule**: scale down to "just show up" — "Run 3 miles" becomes "Put on running shoes" +- **Reduce steps**: the fewer steps between you and the behavior, the more likely you'll do it +- **Automate**: auto-invest, app blockers, meal prep + +**To break a bad habit**: add friction. Unplug the TV after each use. Delete social media from your phone. + +### 4th Law: Make It Satisfying (Reward) + +We repeat behaviors that feel good. The first three laws get you to do it *this time*; the fourth gets you to do it *next time*. + +- **Immediate reward**: attach a small pleasure to habits with delayed benefits +- **Habit tracking**: visual progress (a calendar, a streak counter) is inherently satisfying +- **Never miss twice**: one slip is an accident; two is a new habit + +**To break a bad habit**: make it immediately unsatisfying. Accountability partners, public commitment, financial penalties. + +## Identity-Based Change + +The deepest level of habit change is identity change. Don't set goals ("I want to lose weight"). Adopt an identity ("I am someone who moves every day"). Every completed habit is a vote for your new identity. + +## See Also + +- [[habit-formation]] diff --git a/examples/reading-list/wiki/topics/decision-making.md b/examples/reading-list/wiki/topics/decision-making.md new file mode 100644 index 0000000..4022bc2 --- /dev/null +++ b/examples/reading-list/wiki/topics/decision-making.md @@ -0,0 +1,50 @@ +--- +title: "Decision Making" +slug: decision-making +category: topic +tags: [thinking, decision-making, rationality] +sources: [src_mm003] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "The process of choosing between options, improved by mental models and awareness of cognitive biases" +--- + +# Decision Making + +Decision making is the cognitive process of selecting a course of action from multiple alternatives. The quality of our decisions depends on how well we gather information, evaluate options, and account for uncertainty — all of which are affected by [[cognitive-biases]] and improved by [[mental-models]]. + +## Two Modes of Decision Making + +Kahneman's [[system-1-and-system-2]] framework explains why we make both brilliant snap judgments and predictable errors: + +- **Intuitive decisions** (System 1): fast, effortless, good for familiar situations with clear patterns. An experienced firefighter "just knows" when a floor is about to collapse. +- **Analytical decisions** (System 2): slow, deliberate, necessary for novel, complex, or high-stakes situations. Choosing which house to buy requires careful analysis. + +The challenge is knowing which mode to trust. System 1 is reliable when you have significant experience in a regular environment with fast feedback. In irregular, low-feedback environments (stock picking, long-term planning), System 1 intuitions are often wrong. + +## Decision Frameworks + +### Pre-Mortem +Before committing, imagine the decision failed spectacularly. Ask: "What went wrong?" This surfaces risks that optimism blinds you to and counteracts overconfidence. + +### Weighted Pros and Cons +List factors, assign weights by importance, score each option. Crude but effective for forcing explicit trade-offs rather than relying on vague feelings. + +### Reversibility Test +Distinguish one-way doors (irreversible) from two-way doors (easily reversible). For two-way doors, decide fast and iterate. For one-way doors, slow down and analyze. + +### Base Rate Check +Before trusting your assessment, check the base rate: "How often does this type of thing succeed in general?" Your specific optimism should be anchored to the general success rate. + +## Common Traps + +- **Analysis paralysis**: gathering more data when you already have enough to decide +- **Sunk cost fallacy**: continuing because of past investment, not future value +- **Status quo bias**: defaulting to the current option because change feels risky +- **Narrative fallacy**: constructing a compelling story that oversimplifies reality + +## See Also + +- [[mental-models]] +- [[cognitive-biases]] +- [[system-1-and-system-2]] diff --git a/examples/reading-list/wiki/topics/design-principles.md b/examples/reading-list/wiki/topics/design-principles.md new file mode 100644 index 0000000..b654ee9 --- /dev/null +++ b/examples/reading-list/wiki/topics/design-principles.md @@ -0,0 +1,47 @@ +--- +title: "Design Principles" +slug: design-principles +category: topic +tags: [design, ux, usability] +sources: [src_doet002] +created: "2026-04-01T16:00:00.000Z" +updated: "2026-04-01T16:00:00.000Z" +summary: "Core principles from The Design of Everyday Things for creating intuitive, user-friendly products" +--- + +# Design Principles + +[[don-norman]]'s principles for human-centered design, drawn from *The Design of Everyday Things*. The fundamental insight: when users struggle, blame the design, not the user. + +## The Six Principles + +### 1. Visibility +The more visible the functions, the more likely users will know what to do. Hidden controls create confusion. The state of the system should be clearly visible at all times. + +### 2. Feedback +Every action should produce an immediate, informative response. Did my click register? Is the system processing? What changed? Without feedback, users repeat actions, get confused, or give up. + +### 3. Constraints +Limit the possible actions to prevent errors. Physical constraints (a USB plug only fits one way), logical constraints (grayed-out menu items), and cultural constraints (red means stop) all reduce the space of possible mistakes. + +### 4. Mapping +The relationship between controls and their effects should be natural and intuitive. Spatial correspondence (a stove burner layout that matches the knob layout) is the strongest form of mapping. + +### 5. Consistency +Similar things should look similar and work the same way. Internal consistency (within a product) and external consistency (with platform conventions) both reduce learning effort. + +### 6. [[Affordances]] +Design objects so their possible actions are perceivable. The shape, size, and material of a control should suggest how to interact with it. + +## Design for Error + +Errors are inevitable. Good design makes errors: +- **Hard to make**: constraints prevent incorrect actions +- **Easy to discover**: feedback reveals when something went wrong +- **Easy to correct**: undo, confirmation dialogs, non-destructive defaults + +The Swiss Cheese Model: accidents happen when holes in multiple layers of defense align. Add independent layers of protection. + +## See Also + +- [[affordances]] diff --git a/examples/software-docs/.kb/config.toml b/examples/software-docs/.kb/config.toml new file mode 100644 index 0000000..e9fae56 --- /dev/null +++ b/examples/software-docs/.kb/config.toml @@ -0,0 +1,21 @@ +[provider] +default = "openai" +model = "gpt-4o" +fast_model = "gpt-4o-mini" + +[compile] +auto_index = true +auto_graph = true +max_sources_per_pass = 10 +categories = ["concepts", "topics", "references", "outputs"] +enrich_cross_refs = true + +[search] +engine = "builtin" +max_results = 20 + +[skills] +[skills.hooks] +post-compile = [] +post-ingest = [] +post-lint = [] diff --git a/examples/software-docs/.kb/manifest.json b/examples/software-docs/.kb/manifest.json new file mode 100644 index 0000000..ebbfb9c --- /dev/null +++ b/examples/software-docs/.kb/manifest.json @@ -0,0 +1,167 @@ +{ + "version": "1", + "vault": { + "name": "software-docs", + "created": "2026-02-10T08:00:00.000Z", + "lastCompiled": "2026-03-20T10:00:00.000Z", + "provider": "openai", + "model": "gpt-4o" + }, + "sources": { + "src_rest001": { + "hash": "b1c2d3e4f5a60001", + "ingestedAt": "2026-02-10T08:15:00.000Z", + "lastCompiled": "2026-03-20T10:00:00.000Z", + "sourceType": "web", + "originalUrl": "https://restfulapi.net/rest-api-design-tutorial-with-example/", + "producedArticles": ["rest-api", "http-methods", "api-design-patterns"], + "metadata": { + "title": "REST API Tutorial", + "wordCount": 6800 + } + }, + "src_git002": { + "hash": "b1c2d3e4f5a60002", + "ingestedAt": "2026-02-15T09:00:00.000Z", + "lastCompiled": "2026-03-20T10:00:00.000Z", + "sourceType": "github", + "originalUrl": "https://github.com/git/git", + "producedArticles": ["version-control", "git"], + "metadata": { + "title": "Git — distributed version control", + "wordCount": 5200 + } + }, + "src_cicd003": { + "hash": "b1c2d3e4f5a60003", + "ingestedAt": "2026-02-20T14:00:00.000Z", + "lastCompiled": "2026-03-20T10:00:00.000Z", + "sourceType": "web", + "originalUrl": "https://martinfowler.com/articles/continuousIntegration.html", + "producedArticles": ["continuous-integration", "continuous-deployment"], + "metadata": { + "title": "Continuous Integration", + "author": "Martin Fowler", + "date": "2006-05-01", + "wordCount": 9500 + } + }, + "src_12f004": { + "hash": "b1c2d3e4f5a60004", + "ingestedAt": "2026-03-01T11:00:00.000Z", + "lastCompiled": "2026-03-20T10:00:00.000Z", + "sourceType": "web", + "originalUrl": "https://12factor.net/", + "producedArticles": ["twelve-factor-app"], + "metadata": { + "title": "The Twelve-Factor App", + "author": "Adam Wiggins", + "wordCount": 7000 + } + } + }, + "articles": { + "rest-api": { + "hash": "wiki_hash_s001", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_rest001"], + "backlinks": ["api-design-patterns"], + "forwardLinks": ["http-methods", "api-design-patterns"], + "tags": ["api", "web", "http"], + "summary": "Architectural style for designing networked applications using stateless HTTP operations on resources", + "wordCount": 410, + "category": "concept" + }, + "http-methods": { + "hash": "wiki_hash_s002", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_rest001"], + "backlinks": ["rest-api"], + "forwardLinks": ["rest-api"], + "tags": ["api", "http"], + "summary": "Standard HTTP verbs (GET, POST, PUT, DELETE) and their semantics in RESTful APIs", + "wordCount": 320, + "category": "concept" + }, + "version-control": { + "hash": "wiki_hash_s003", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_git002"], + "backlinks": ["git", "continuous-integration"], + "forwardLinks": ["git"], + "tags": ["git", "collaboration", "workflow"], + "summary": "System for tracking changes to files over time, enabling collaboration and history", + "wordCount": 280, + "category": "concept" + }, + "git": { + "hash": "wiki_hash_s004", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_git002"], + "backlinks": ["version-control"], + "forwardLinks": ["version-control", "continuous-integration"], + "tags": ["git", "tools"], + "summary": "Distributed version control system created by Linus Torvalds for Linux kernel development", + "wordCount": 350, + "category": "reference" + }, + "continuous-integration": { + "hash": "wiki_hash_s005", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_cicd003"], + "backlinks": ["git", "continuous-deployment"], + "forwardLinks": ["version-control", "continuous-deployment", "twelve-factor-app"], + "tags": ["ci-cd", "devops", "automation"], + "summary": "Practice of frequently merging code changes into a shared repository with automated build and test verification", + "wordCount": 450, + "category": "topic" + }, + "continuous-deployment": { + "hash": "wiki_hash_s006", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_cicd003"], + "backlinks": ["continuous-integration"], + "forwardLinks": ["continuous-integration", "twelve-factor-app"], + "tags": ["ci-cd", "devops", "automation"], + "summary": "Practice of automatically releasing every change that passes CI to production", + "wordCount": 330, + "category": "topic" + }, + "api-design-patterns": { + "hash": "wiki_hash_s007", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_rest001"], + "backlinks": ["rest-api"], + "forwardLinks": ["rest-api", "http-methods"], + "tags": ["api", "patterns", "design"], + "summary": "Common patterns for building well-structured, consistent, and developer-friendly APIs", + "wordCount": 380, + "category": "topic" + }, + "twelve-factor-app": { + "hash": "wiki_hash_s008", + "createdAt": "2026-03-20T10:00:00.000Z", + "lastUpdated": "2026-03-20T10:00:00.000Z", + "derivedFrom": ["src_12f004"], + "backlinks": ["continuous-integration", "continuous-deployment"], + "forwardLinks": ["continuous-integration"], + "tags": ["architecture", "cloud", "best-practices"], + "summary": "Methodology for building modern, portable, cloud-native applications", + "wordCount": 390, + "category": "topic" + } + }, + "stats": { + "totalSources": 4, + "totalArticles": 8, + "totalWords": 2910, + "lastLintAt": "2026-03-20T10:30:00.000Z" + } +} diff --git a/examples/software-docs/README.md b/examples/software-docs/README.md new file mode 100644 index 0000000..a718179 --- /dev/null +++ b/examples/software-docs/README.md @@ -0,0 +1,34 @@ +# Software Docs Vault + +A software engineering knowledge base covering APIs, version control, CI/CD, and cloud-native patterns. + +## Sources + +| Source | Type | Articles Produced | +|--------|------|-------------------| +| REST API Tutorial | web | rest-api, http-methods, api-design-patterns | +| Git (github.com/git/git) | github | version-control, git | +| Continuous Integration (Martin Fowler) | web | continuous-integration, continuous-deployment | +| The Twelve-Factor App | web | twelve-factor-app | + +## Knowledge Graph + +``` +rest-api ─── http-methods + │ +api-design-patterns + +version-control ─── git + │ +continuous-integration ─── continuous-deployment + │ + twelve-factor-app +``` + +## Try It + +```bash +kib search "deployment" +kib query "What's the difference between CI and CD?" +kib skill run compare --args '{"a": "continuous-integration", "b": "continuous-deployment"}' +``` diff --git a/examples/software-docs/raw/articles/continuous-integration.md b/examples/software-docs/raw/articles/continuous-integration.md new file mode 100644 index 0000000..7b3049d --- /dev/null +++ b/examples/software-docs/raw/articles/continuous-integration.md @@ -0,0 +1,57 @@ +--- +title: "Continuous Integration" +source_type: web +url: "https://martinfowler.com/articles/continuousIntegration.html" +author: "Martin Fowler" +date: "2006-05-01" +ingested: "2026-02-20" +word_count: 9500 +--- + +# Continuous Integration + +Continuous Integration (CI) is a software development practice where team members integrate their work frequently — each person integrates at least daily, leading to multiple integrations per day. Each integration is verified by an automated build (including tests) to detect integration errors as quickly as possible. + +## The Problem + +Without CI, developers work in isolation on their branches for days or weeks. When they finally merge, the integration is painful — merge conflicts, broken tests, incompatible changes. This "integration hell" gets exponentially worse with team size and branch lifetime. + +## Practices + +### Maintain a Single Source Repository + +Use version control (Git) for everything: source code, tests, scripts, configuration. Everyone commits to the mainline (or merges short-lived feature branches frequently). + +### Automate the Build + +A single command should build the entire system from source. Use build tools (Make, Gradle, Webpack, etc.) and avoid manual steps. If a new developer can't build the system in one step on a clean machine, the build is broken. + +### Make Your Build Self-Testing + +Include automated tests in the build. Unit tests, integration tests, and a subset of end-to-end tests should all run on every build. A build that compiles but fails tests is a broken build. + +### Everyone Commits to the Mainline Every Day + +The key practice. Short-lived branches (< 1 day) prevent drift. The longer a branch lives, the harder the merge. Trunk-based development takes this to the extreme: everyone commits directly to main. + +### Every Commit Should Build the Mainline on an Integration Machine + +Don't trust "works on my machine." CI servers (Jenkins, GitHub Actions, CircleCI) build and test every commit in a clean environment. If the build breaks, fixing it is the team's top priority. + +### Keep the Build Fast + +A 10-minute build is a good target. If the build is slow, developers won't integrate frequently. Stage the build: fast unit tests first, slower integration tests second. + +## Continuous Delivery vs Continuous Deployment + +- **Continuous Delivery**: every commit is potentially releasable. Deployment requires a manual approval step. +- **Continuous Deployment**: every commit that passes CI is automatically deployed to production. No manual gates. + +Both extend CI by automating the release pipeline: build → test → stage → deploy. + +## Benefits + +1. **Reduced risk**: small, frequent integrations are easier to debug than big-bang merges +2. **Faster feedback**: broken code is caught in minutes, not days +3. **Always deployable**: the mainline should always be in a releasable state +4. **Team confidence**: comprehensive test suites let you refactor and add features fearlessly diff --git a/examples/software-docs/raw/articles/rest-api-tutorial.md b/examples/software-docs/raw/articles/rest-api-tutorial.md new file mode 100644 index 0000000..1d1c712 --- /dev/null +++ b/examples/software-docs/raw/articles/rest-api-tutorial.md @@ -0,0 +1,61 @@ +--- +title: "REST API Tutorial" +source_type: web +url: "https://restfulapi.net/rest-api-design-tutorial-with-example/" +ingested: "2026-02-10" +word_count: 6800 +--- + +# REST API Tutorial + +REST (Representational State Transfer) is an architectural style for designing networked applications. It relies on a stateless, client-server, cacheable communications protocol — in practice, almost always HTTP. + +## REST Principles + +### Statelessness + +Each request from client to server must contain all the information needed to understand and process the request. The server does not store session state between requests. This simplifies the server, improves scalability, and makes requests independently cacheable. + +### Resource-Based + +REST models everything as resources identified by URIs. A resource can be a user, a document, an image — anything that can be named. Each resource has one or more representations (JSON, XML, HTML). + +### Uniform Interface + +The constraint that distinguishes REST from other architectures. Four sub-constraints: +1. **Resource identification**: resources are identified by URIs +2. **Resource manipulation through representations**: clients hold enough information to modify or delete the resource +3. **Self-descriptive messages**: each message includes enough information to describe how to process it +4. **Hypermedia as the engine of application state (HATEOAS)**: clients discover actions through hypermedia links + +## HTTP Methods + +| Method | Action | Idempotent | Safe | +|--------|--------|------------|------| +| GET | Retrieve a resource | Yes | Yes | +| POST | Create a resource | No | No | +| PUT | Replace a resource entirely | Yes | No | +| PATCH | Partially update a resource | No | No | +| DELETE | Remove a resource | Yes | No | + +## API Design Best Practices + +### URL Design + +Use nouns, not verbs: `/users/123` not `/getUser?id=123`. Use plural nouns: `/users` not `/user`. Nest related resources: `/users/123/posts`. + +### Status Codes + +Use standard HTTP status codes: 200 OK, 201 Created, 204 No Content, 400 Bad Request, 401 Unauthorized, 404 Not Found, 500 Internal Server Error. + +### Versioning + +Version your API to avoid breaking clients. Common approaches: URL path (`/v1/users`), header (`Accept: application/vnd.api.v1+json`), or query parameter (`?version=1`). + +### Pagination + +For collections, support pagination with `limit` and `offset` (or cursor-based). Include total count and next/previous links in the response. + +### Error Handling + +Return structured error responses with a machine-readable error code, human-readable message, and optional details. Always use appropriate HTTP status codes. diff --git a/examples/software-docs/raw/articles/twelve-factor-app.md b/examples/software-docs/raw/articles/twelve-factor-app.md new file mode 100644 index 0000000..9f97f8d --- /dev/null +++ b/examples/software-docs/raw/articles/twelve-factor-app.md @@ -0,0 +1,50 @@ +--- +title: "The Twelve-Factor App" +source_type: web +url: "https://12factor.net/" +author: "Adam Wiggins" +ingested: "2026-03-01" +word_count: 7000 +--- + +# The Twelve-Factor App + +The twelve-factor app is a methodology for building software-as-a-service apps. These best practices enable applications to be built with portability and resilience when deployed to the web. + +## The Twelve Factors + +### I. Codebase +One codebase tracked in version control, many deploys. A one-to-one correlation between the codebase and the app. Multiple apps sharing code is a violation — factor shared code into libraries. + +### II. Dependencies +Explicitly declare and isolate dependencies. Never rely on system-wide packages. Use a dependency declaration manifest (package.json, Gemfile, requirements.txt) and a dependency isolation tool (node_modules, bundler, virtualenv). + +### III. Config +Store config in the environment. Config varies between deploys (staging, production), code does not. Use environment variables, not config files checked into the repo. A litmus test: could the codebase be made open source without compromising credentials? + +### IV. Backing Services +Treat backing services as attached resources. A database, message queue, or SMTP service should be accessible via a URL in config. The app should make no distinction between local and third-party services. + +### V. Build, Release, Run +Strictly separate build and run stages. Build converts code into an executable bundle. Release combines the build with config. Run launches the app. Every release should have a unique ID (timestamp or incrementing number). + +### VI. Processes +Execute the app as one or more stateless processes. Any data that needs to persist must be stored in a stateful backing service (database, object store). Session data goes in a session store (Redis, Memcached), not the filesystem. + +### VII. Port Binding +Export services via port binding. The app is completely self-contained and doesn't rely on injection of a webserver. It exports HTTP as a service by binding to a port and listening for requests. + +### VIII. Concurrency +Scale out via the process model. Rather than running one giant process, run multiple small processes of different types (web, worker, clock). The OS process manager handles restarts and distribution. + +### IX. Disposability +Maximize robustness with fast startup and graceful shutdown. Processes should start in seconds and shut down gracefully on SIGTERM. This enables rapid elastic scaling and fast deploys. + +### X. Dev/Prod Parity +Keep development, staging, and production as similar as possible. The gap between dev and prod causes bugs that are hard to reproduce. Use the same backing services, the same OS, the same versions. + +### XI. Logs +Treat logs as event streams. The app should not concern itself with routing or storage of its output stream. Write to stdout, and let the execution environment collect and route logs. + +### XII. Admin Processes +Run admin/management tasks as one-off processes. Database migrations, console sessions, and one-time scripts should run in the same environment as the app's regular processes, using the same codebase and config. diff --git a/examples/software-docs/raw/repos/git-repo.md b/examples/software-docs/raw/repos/git-repo.md new file mode 100644 index 0000000..4069b2c --- /dev/null +++ b/examples/software-docs/raw/repos/git-repo.md @@ -0,0 +1,68 @@ +--- +title: "Git — distributed version control" +source_type: github +url: "https://github.com/git/git" +ingested: "2026-02-15" +word_count: 5200 +--- + +# Git + +Git is a free and open source distributed version control system designed to handle everything from small to very large projects with speed and efficiency. Created by Linus Torvalds in 2005 for Linux kernel development. + +## Architecture + +Git is a content-addressable filesystem. At its core, it stores snapshots (not diffs) of the project at each commit. Every object (blob, tree, commit, tag) is identified by its SHA-1 hash. + +### Object Types + +- **Blob**: stores file contents (no filename or metadata) +- **Tree**: maps filenames to blobs and other trees (represents a directory) +- **Commit**: points to a tree (project snapshot) plus metadata (author, message, parent commits) +- **Tag**: named pointer to a commit, optionally signed + +### References + +Branches and tags are simply pointers (refs) to commits. `HEAD` points to the current branch. The reflog records where refs have pointed over time, enabling recovery of "lost" commits. + +## Key Commands + +### Everyday Workflow + +```bash +git init # create new repository +git clone # copy remote repository +git add # stage changes +git commit -m "message" # record staged snapshot +git push origin main # upload to remote +git pull origin main # download and merge +``` + +### Branching + +```bash +git branch feature # create branch +git checkout feature # switch to branch +git merge feature # merge branch into current +git rebase main # replay commits onto main +``` + +### History + +```bash +git log --oneline # compact commit history +git diff # unstaged changes +git blame # line-by-line authorship +git bisect # binary search for bug-introducing commit +``` + +## Distributed Model + +Every clone is a full copy of the repository with complete history. Work happens locally — commit, branch, merge all without network access. Collaboration happens through push/pull to shared remotes (GitHub, GitLab, etc.). + +## Why Git Won + +1. **Speed**: local operations are nearly instant +2. **Branching**: lightweight branches make parallel development easy +3. **Integrity**: content-addressed storage means corruption is detectable +4. **Distributed**: no single point of failure, offline work is natural diff --git a/examples/software-docs/wiki/GRAPH.md b/examples/software-docs/wiki/GRAPH.md new file mode 100644 index 0000000..3182eec --- /dev/null +++ b/examples/software-docs/wiki/GRAPH.md @@ -0,0 +1,10 @@ +# Knowledge Graph + +api-design-patterns -> rest-api, http-methods +continuous-deployment -> continuous-integration, twelve-factor-app +continuous-integration -> version-control, git, continuous-deployment, twelve-factor-app +git -> version-control, continuous-integration +http-methods -> rest-api +rest-api -> http-methods, api-design-patterns +twelve-factor-app -> continuous-integration +version-control -> git diff --git a/examples/software-docs/wiki/INDEX.md b/examples/software-docs/wiki/INDEX.md new file mode 100644 index 0000000..eb9da5a --- /dev/null +++ b/examples/software-docs/wiki/INDEX.md @@ -0,0 +1,22 @@ +# Knowledge Base Index + +> 8 articles | 2910 words | Last compiled: 2026-03-20T10:00:00.000Z + +## Concepts + +- **[HTTP Methods](concepts/http-methods.md)** — Standard HTTP verbs (GET, POST, PUT, DELETE) and their semantics in RESTful APIs `#api` `#http` +- **[REST API](concepts/rest-api.md)** — Architectural style for designing networked applications using stateless HTTP operations on resources `#api` `#web` `#http` +- **[Version Control](concepts/version-control.md)** — System for tracking changes to files over time, enabling collaboration and history `#git` `#collaboration` `#workflow` + +## Topics + +- **[API Design Patterns](topics/api-design-patterns.md)** — Common patterns for building well-structured, consistent, and developer-friendly APIs `#api` `#patterns` `#design` +- **[Continuous Deployment](topics/continuous-deployment.md)** — Practice of automatically releasing every change that passes CI to production `#ci-cd` `#devops` `#automation` +- **[Continuous Integration](topics/continuous-integration.md)** — Practice of frequently merging code changes into a shared repository with automated build and test verification `#ci-cd` `#devops` `#automation` +- **[Twelve-Factor App](topics/twelve-factor-app.md)** — Methodology for building modern, portable, cloud-native applications `#architecture` `#cloud` `#best-practices` + +## References + +- **[Git](references/git.md)** — Distributed version control system created by Linus Torvalds for Linux kernel development `#git` `#tools` + +## Outputs diff --git a/examples/software-docs/wiki/concepts/http-methods.md b/examples/software-docs/wiki/concepts/http-methods.md new file mode 100644 index 0000000..4e96aeb --- /dev/null +++ b/examples/software-docs/wiki/concepts/http-methods.md @@ -0,0 +1,52 @@ +--- +title: "HTTP Methods" +slug: http-methods +category: concept +tags: [api, http] +sources: [src_rest001] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "Standard HTTP verbs (GET, POST, PUT, DELETE) and their semantics in RESTful APIs" +--- + +# HTTP Methods + +HTTP methods (also called verbs) define the action to perform on a resource in a [[rest-api]]. Each method has specific semantic guarantees around safety and idempotency. + +## Methods + +### GET + +Retrieve a resource. **Safe** (no side effects) and **idempotent** (repeated calls return the same result). Should never modify data. Responses are cacheable. + +### POST + +Create a new resource or trigger a process. **Not safe**, **not idempotent** — calling POST twice may create two resources. Returns 201 Created with a `Location` header pointing to the new resource. + +### PUT + +Replace a resource entirely. **Idempotent** — putting the same data twice has the same effect as putting it once. The client sends the complete resource representation. + +### PATCH + +Partially update a resource. **Not idempotent** in the general case (though specific implementations may be). The client sends only the fields to change. + +### DELETE + +Remove a resource. **Idempotent** — deleting an already-deleted resource returns 404 (or 204), not an error. The resource should no longer be retrievable after deletion. + +## Safety and Idempotency + +| Method | Safe | Idempotent | +|--------|------|------------| +| GET | Yes | Yes | +| POST | No | No | +| PUT | No | Yes | +| PATCH | No | No | +| DELETE | No | Yes | + +**Safe** means the method doesn't modify server state. **Idempotent** means calling it N times has the same effect as calling it once. These guarantees let clients, caches, and proxies make smart decisions about retries and caching. + +## See Also + +- [[rest-api]] diff --git a/examples/software-docs/wiki/concepts/rest-api.md b/examples/software-docs/wiki/concepts/rest-api.md new file mode 100644 index 0000000..fafd5c3 --- /dev/null +++ b/examples/software-docs/wiki/concepts/rest-api.md @@ -0,0 +1,47 @@ +--- +title: "REST API" +slug: rest-api +category: concept +tags: [api, web, http] +sources: [src_rest001] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "Architectural style for designing networked applications using stateless HTTP operations on resources" +--- + +# REST API + +REST (Representational State Transfer) is an architectural style for building web APIs. It uses standard [[http-methods]] to perform stateless operations on resources identified by URIs. REST has become the dominant style for public-facing web APIs due to its simplicity and alignment with HTTP. + +## Core Constraints + +1. **Client-Server**: separation of concerns between UI and data storage +2. **Stateless**: each request contains all context needed to process it — no server-side sessions +3. **Cacheable**: responses must declare themselves cacheable or non-cacheable +4. **Uniform Interface**: resources are identified by URIs, manipulated through representations, with self-descriptive messages +5. **Layered System**: intermediaries (load balancers, caches, gateways) can be inserted transparently + +## Resource Design + +Everything in REST is a resource with a unique URI: + +``` +GET /users → list all users +GET /users/123 → get user 123 +POST /users → create a new user +PUT /users/123 → replace user 123 +DELETE /users/123 → delete user 123 +``` + +Use nouns (not verbs) in URLs. Nest related resources: `/users/123/posts` for posts belonging to user 123. + +## When to Use REST + +REST works well for CRUD-oriented APIs with clear resource boundaries. For real-time data, consider WebSockets. For complex queries with nested data, consider GraphQL. For internal service-to-service communication, consider gRPC. + +See [[api-design-patterns]] for common patterns used when building REST APIs. + +## See Also + +- [[http-methods]] +- [[api-design-patterns]] diff --git a/examples/software-docs/wiki/concepts/version-control.md b/examples/software-docs/wiki/concepts/version-control.md new file mode 100644 index 0000000..4611deb --- /dev/null +++ b/examples/software-docs/wiki/concepts/version-control.md @@ -0,0 +1,40 @@ +--- +title: "Version Control" +slug: version-control +category: concept +tags: [git, collaboration, workflow] +sources: [src_git002] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "System for tracking changes to files over time, enabling collaboration and history" +--- + +# Version Control + +Version control (also called source control or revision control) is a system that records changes to files over time so you can recall specific versions later. It's the foundation of modern software development, enabling collaboration, experimentation, and auditability. + +## Why Version Control + +- **History**: every change is recorded with who made it, when, and why +- **Collaboration**: multiple people can work on the same codebase without overwriting each other +- **Branching**: experiment in isolation without affecting the main codebase +- **Recovery**: revert to any previous state if something breaks +- **Auditability**: trace any line of code back to the commit that introduced it + +## Centralized vs Distributed + +**Centralized** systems (SVN, Perforce) have a single server that holds the complete history. Clients check out working copies and must be online to commit. + +**Distributed** systems ([[git]], Mercurial) give every developer a full copy of the repository. You can commit, branch, and merge entirely offline. This makes operations fast and eliminates the single point of failure. + +## Branching Strategies + +- **Trunk-based development**: everyone commits to main, short-lived feature branches (< 1 day) +- **Git Flow**: long-lived develop and main branches, feature branches, release branches +- **GitHub Flow**: simplified — main is always deployable, feature branches + pull requests + +The trend is toward trunk-based development, which minimizes merge conflicts and aligns with [[continuous-integration]]. + +## See Also + +- [[git]] diff --git a/examples/software-docs/wiki/references/git.md b/examples/software-docs/wiki/references/git.md new file mode 100644 index 0000000..0451e27 --- /dev/null +++ b/examples/software-docs/wiki/references/git.md @@ -0,0 +1,49 @@ +--- +title: "Git" +slug: git +category: reference +tags: [git, tools] +sources: [src_git002] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "Distributed version control system created by Linus Torvalds for Linux kernel development" +--- + +# Git + +Git is a distributed [[version-control]] system created by Linus Torvalds in 2005 for managing the Linux kernel source code. It is now the de facto standard for version control in software development. + +## Key Concepts + +### Content-Addressable Storage + +Git stores everything as objects identified by their SHA-1 hash. This means identical content always has the same hash — deduplication is automatic and corruption is detectable. + +### Snapshots, Not Diffs + +Each commit records a complete snapshot of the project (as a tree of blobs). Git computes diffs on the fly when needed, rather than storing them. This makes operations like switching branches and comparing arbitrary commits fast. + +### Branches Are Cheap + +A branch is just a pointer (40-byte file) to a commit. Creating, switching, and deleting branches is nearly instant. This encourages experimental branches and supports workflows like [[continuous-integration]]. + +## Common Workflows + +```bash +# Start a feature +git checkout -b feat/new-widget +# ... make changes ... +git add -p # stage selectively +git commit -m "feat: add widget component" +git push -u origin feat/new-widget +# Open PR, get review, merge +``` + +## Ecosystem + +Git's dominance comes partly from its hosting ecosystem. GitHub, GitLab, and Bitbucket provide pull requests, code review, CI/CD integration, issue tracking, and collaboration features on top of Git's core. + +## See Also + +- [[version-control]] +- [[continuous-integration]] diff --git a/examples/software-docs/wiki/topics/api-design-patterns.md b/examples/software-docs/wiki/topics/api-design-patterns.md new file mode 100644 index 0000000..acd3ac4 --- /dev/null +++ b/examples/software-docs/wiki/topics/api-design-patterns.md @@ -0,0 +1,68 @@ +--- +title: "API Design Patterns" +slug: api-design-patterns +category: topic +tags: [api, patterns, design] +sources: [src_rest001] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "Common patterns for building well-structured, consistent, and developer-friendly APIs" +--- + +# API Design Patterns + +These patterns appear repeatedly in well-designed [[rest-api]] services. They address common challenges around pagination, filtering, error handling, and versioning. + +## Pagination + +For endpoints returning collections, support pagination to avoid returning thousands of records: + +- **Offset-based**: `GET /users?offset=20&limit=10` — simple but slow for large offsets +- **Cursor-based**: `GET /users?after=abc123&limit=10` — efficient for large datasets, using an opaque cursor + +Always include pagination metadata in the response: total count (if feasible), next/previous links. + +## Filtering and Sorting + +Allow clients to narrow results without custom endpoints: + +``` +GET /users?role=admin&created_after=2026-01-01&sort=-created_at +``` + +Prefix sort fields with `-` for descending order. Use query parameters for simple filters, reserved keywords for operators (`_gt`, `_lt`, `_contains`). + +## Error Responses + +Return consistent, structured errors with [[http-methods]] status codes: + +```json +{ + "error": { + "code": "VALIDATION_ERROR", + "message": "Email is required", + "details": [ + { "field": "email", "reason": "must not be empty" } + ] + } +} +``` + +Use 4xx for client errors, 5xx for server errors. Never return 200 with an error body. + +## Versioning + +Strategies for evolving an API without breaking existing clients: + +- **URL path**: `/v1/users` — most common, easiest to understand +- **Header**: `Accept: application/vnd.myapp.v2+json` — cleaner URLs, harder to test +- **Query param**: `/users?version=2` — easy to use, not RESTful + +## Rate Limiting + +Protect your API with rate limits. Return `429 Too Many Requests` when exceeded. Include `Retry-After`, `X-RateLimit-Limit`, and `X-RateLimit-Remaining` headers so clients can adapt. + +## See Also + +- [[rest-api]] +- [[http-methods]] diff --git a/examples/software-docs/wiki/topics/continuous-deployment.md b/examples/software-docs/wiki/topics/continuous-deployment.md new file mode 100644 index 0000000..afedc1a --- /dev/null +++ b/examples/software-docs/wiki/topics/continuous-deployment.md @@ -0,0 +1,46 @@ +--- +title: "Continuous Deployment" +slug: continuous-deployment +category: topic +tags: [ci-cd, devops, automation] +sources: [src_cicd003] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "Practice of automatically releasing every change that passes CI to production" +--- + +# Continuous Deployment + +Continuous Deployment extends [[continuous-integration]] by automatically deploying every commit that passes the test suite to production. There are no manual gates — if the build is green, the code ships. + +## How It Works + +``` +commit → build → unit tests → integration tests → staging deploy → smoke tests → production deploy +``` + +Each stage is a gate. If any step fails, the pipeline stops and the team is notified. If everything passes, the change is live in production within minutes of being pushed. + +## Prerequisites + +Continuous deployment requires: + +- **Comprehensive test suite**: you're trusting automated tests to catch everything. Coverage and test quality must be high. +- **Feature flags**: decouple deployment from release. Ship code behind a flag, enable it gradually. +- **Monitoring and alerting**: detect problems in production quickly. Automated rollback when error rates spike. +- **Infrastructure as code**: reproducible environments so deploys are predictable. + +## Continuous Delivery vs Continuous Deployment + +| | Continuous Delivery | Continuous Deployment | +|---|---|---| +| Auto-deploy to staging | Yes | Yes | +| Auto-deploy to production | No (manual gate) | Yes | +| Requires manual approval | Yes | No | + +Continuous Delivery means the code is *always deployable*. Continuous Deployment means it *is always deployed*. The [[twelve-factor-app]] methodology's emphasis on dev/prod parity and disposability supports both approaches. + +## See Also + +- [[continuous-integration]] +- [[twelve-factor-app]] diff --git a/examples/software-docs/wiki/topics/continuous-integration.md b/examples/software-docs/wiki/topics/continuous-integration.md new file mode 100644 index 0000000..d2c9670 --- /dev/null +++ b/examples/software-docs/wiki/topics/continuous-integration.md @@ -0,0 +1,45 @@ +--- +title: "Continuous Integration" +slug: continuous-integration +category: topic +tags: [ci-cd, devops, automation] +sources: [src_cicd003] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "Practice of frequently merging code changes into a shared repository with automated build and test verification" +--- + +# Continuous Integration + +Continuous Integration (CI) is the practice of merging all developers' working copies into a shared mainline frequently — ideally multiple times per day. Each merge triggers an automated build and test suite that verifies the integration didn't break anything. + +## Core Practices + +1. **Maintain a single source repository** — use [[version-control]] ([[git]]) for everything +2. **Automate the build** — one command, from clean checkout to running artifact +3. **Make the build self-testing** — automated tests run on every build +4. **Everyone commits daily** — short-lived branches prevent integration drift +5. **Fix broken builds immediately** — a broken build is the team's top priority +6. **Keep the build fast** — 10 minutes is a good target + +## Why CI Matters + +Without CI, developers work in isolation for days or weeks. The eventual merge produces "integration hell" — merge conflicts, incompatible changes, subtle bugs. CI replaces one painful big-bang integration with many small, easy ones. + +## CI Services + +Popular CI platforms: GitHub Actions, GitLab CI, CircleCI, Jenkins, Buildkite. All follow the same pattern: watch for commits, run a pipeline (build → test → report), notify on failure. + +## CI vs CD + +- **CI**: automatically build and test on every commit +- **[[continuous-deployment]]**: automatically deploy to production when CI passes +- **Continuous Delivery**: like CD but with a manual approval gate before production + +The [[twelve-factor-app]] methodology recommends strict separation of build and run stages, which aligns naturally with CI/CD pipelines. + +## See Also + +- [[version-control]] +- [[continuous-deployment]] +- [[twelve-factor-app]] diff --git a/examples/software-docs/wiki/topics/twelve-factor-app.md b/examples/software-docs/wiki/topics/twelve-factor-app.md new file mode 100644 index 0000000..1868c8c --- /dev/null +++ b/examples/software-docs/wiki/topics/twelve-factor-app.md @@ -0,0 +1,47 @@ +--- +title: "Twelve-Factor App" +slug: twelve-factor-app +category: topic +tags: [architecture, cloud, best-practices] +sources: [src_12f004] +created: "2026-03-20T10:00:00.000Z" +updated: "2026-03-20T10:00:00.000Z" +summary: "Methodology for building modern, portable, cloud-native applications" +--- + +# Twelve-Factor App + +The Twelve-Factor App is a methodology for building software-as-a-service applications that are portable across execution environments, suitable for deployment on cloud platforms, and scalable without significant changes to tooling or architecture. + +## The Factors (Summary) + +| # | Factor | Key Idea | +|---|--------|----------| +| I | Codebase | One repo per app, many deploys | +| II | Dependencies | Explicitly declare and isolate | +| III | Config | Store in environment variables | +| IV | Backing Services | Treat as attached resources via URL | +| V | Build, Release, Run | Strict separation of stages | +| VI | Processes | Stateless, share-nothing | +| VII | Port Binding | Self-contained, export via port | +| VIII | Concurrency | Scale out via process model | +| IX | Disposability | Fast startup, graceful shutdown | +| X | Dev/Prod Parity | Keep environments identical | +| XI | Logs | Treat as event streams to stdout | +| XII | Admin Processes | Run as one-off processes | + +## Why It Matters + +The methodology codifies patterns that experienced teams arrive at independently. It's particularly relevant for: + +- **Cloud-native deployment** (Heroku, AWS, GCP, Kubernetes) — these platforms assume twelve-factor behavior +- **[[continuous-integration]]** / CD pipelines — the build/release/run separation maps directly to CI/CD stages +- **Microservices** — stateless processes, port binding, and backing services are prerequisites + +## Modern Relevance + +Written in 2011, the twelve factors remain relevant. Containerization (Docker) and orchestration (Kubernetes) have made factors like disposability, port binding, and process-based concurrency even more natural. The main update needed is acknowledging service meshes and distributed tracing, which extend factor IV (backing services) to service-to-service communication. + +## See Also + +- [[continuous-integration]]