Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 101 additions & 1 deletion ant/ant.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package ant

import (
"context"
"encoding/json"
"os"
"path/filepath"
"sync"
"time"

"github.com/tamnd/any-cli/kit"
Expand All @@ -32,6 +34,13 @@ type Engine struct {
host *kit.Host
root string // the data tree root ($HOME/data, ANT_DATA-overridable)
now func() time.Time // the fetch clock, injectable so tests are deterministic

// llMu guards llCache, the in-memory index of materialized URIs keyed by the
// listing prefix. A directory walk runs once per prefix; every cache-write
// folds the new URI into the matching listings, so repeat reads (the web
// console's dashboard and browse pages) never re-walk the tree. See LL.
llMu sync.RWMutex
llCache map[string][]string
}

// Option customizes an Engine at New.
Expand All @@ -49,7 +58,7 @@ func New(opts ...Option) (*Engine, error) {
if err != nil {
return nil, err
}
e := &Engine{host: h, now: time.Now}
e := &Engine{host: h, now: time.Now, llCache: map[string][]string{}}
for _, o := range opts {
o(e)
}
Expand All @@ -62,6 +71,17 @@ func New(opts ...Option) (*Engine, error) {
// Root returns the data tree root the Engine writes under.
func (e *Engine) Root() string { return e.root }

// WarmIndex pre-populates the in-memory LL index for every registered domain, so
// the first browse or dashboard request is served from memory rather than paying
// for a cold filesystem walk. It walks only ant's own domain subtrees, never the
// whole shared data root. A long-lived process (ant serve) calls this once in the
// background at startup; it is a no-op to call again.
func (e *Engine) WarmIndex() {
for _, scheme := range e.host.Domains() {
_, _ = e.LL(scheme + "://")
}
}

// Domains returns the registered domains the Engine can address, sorted by
// scheme. It is the analogue of sql.Drivers and backs `ant domains`.
func (e *Engine) Domains() []DomainInfo {
Expand All @@ -82,6 +102,24 @@ func (e *Engine) Domains() []DomainInfo {
return out
}

// Domain returns the descriptor of a single registered domain by scheme or
// alias, the lookup the web console uses to render one domain's detail page.
func (e *Engine) Domain(scheme string) (DomainInfo, bool) {
info, ok := e.host.Domain(scheme)
if !ok {
return DomainInfo{}, false
}
return DomainInfo{
Scheme: info.Scheme,
Aliases: info.Aliases,
Hosts: info.Hosts,
Binary: info.Identity.Binary,
Short: info.Identity.Short,
Site: info.Identity.Site,
Repo: info.Identity.Repo,
}, true
}

// DomainInfo is one registered domain, as `ant domains` prints it.
type DomainInfo struct {
Scheme string `json:"scheme"`
Expand Down Expand Up @@ -134,6 +172,68 @@ func (e *Engine) List(ctx context.Context, u kit.URI, limit int) ([]kit.Envelope
return out, nil
}

// Searchable reports whether a domain (by scheme or alias) supports free-text
// search, so the web console can decide to show a search box for it.
func (e *Engine) Searchable(scheme string) bool { return e.host.Searchable(scheme) }

// Search runs a domain's free-text search and returns the hits as envelopes. A
// hit that is URI-addressable carries its canonical @id, so it links straight to
// get; one that is not still surfaces, wrapped with the scheme as @type and no
// @id. limit caps the result (0 means the op's own default). Search hits are
// previews and are not written to the data tree; dereferencing one caches it.
func (e *Engine) Search(ctx context.Context, scheme, query string, limit int) ([]kit.Envelope, error) {
recs, err := e.host.Search(ctx, scheme, query, limit)
if err != nil {
return nil, err
}
out := make([]kit.Envelope, 0, len(recs))
for _, rec := range recs {
env, err := e.host.Wrap(rec, e.now())
if err != nil {
env = kit.Envelope{Type: scheme, Data: rec}
}
// A search hit often is not itself a mintable resource (it is a preview
// shape, not the record type), so Wrap leaves @id empty. When the hit
// carries a site URL, resolve it back to the canonical URI so the result is
// still one click from its record.
if env.ID == "" {
if u, ok := e.uriFromHit(scheme, rec); ok {
env.ID = u.String()
env.Type = u.Scheme + "/" + u.Authority
}
}
out = append(out, env)
}
return out, nil
}

// uriFromHit recovers a canonical URI from a search hit that did not mint one, by
// resolving a URL-bearing field (url/link/href) through the domain. It is how a
// preview-shaped result becomes dereferenceable.
func (e *Engine) uriFromHit(scheme string, rec any) (kit.URI, bool) {
blob, err := json.Marshal(rec)
if err != nil {
return kit.URI{}, false
}
var fields map[string]any
if err := json.Unmarshal(blob, &fields); err != nil {
return kit.URI{}, false
}
for _, key := range []string{"url", "link", "href", "permalink"} {
s, ok := fields[key].(string)
if !ok || s == "" {
continue
}
if u, err := e.Resolve(s, ""); err == nil {
return u, true
}
if u, err := e.Resolve(s, scheme); err == nil {
return u, true
}
}
return kit.URI{}, false
}

// Links fetches a URI's record and returns its outbound graph edges as URIs.
func (e *Engine) Links(ctx context.Context, u kit.URI) ([]kit.URI, error) {
rec, err := e.host.Get(ctx, u)
Expand Down
100 changes: 100 additions & 0 deletions ant/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package ant

import (
"context"
"encoding/json"
"os"
"strings"

"github.com/tamnd/any-cli/kit"
)

// Fetched is a dereferenced record with the provenance the web console needs:
// whether it came from the on-disk cache or a live fetch, its long-text body
// when it has one, and the canonical envelope JSON, so a renderer can show the
// record's fields in their declared order (a map would lose it).
type Fetched struct {
Env kit.Envelope
Raw json.RawMessage // the indented envelope JSON, as written to disk
Body string
HasBody bool
FromCache bool
}

// Dereference resolves a URI cache-first: it returns the record already
// materialized under the data tree when one is present, and only fetches from
// the network on a cache miss or when refresh forces it. A live fetch is written
// back to the tree (JSON always, plus Markdown when the record has a body) so the
// next read is offline. This is the read path the web console drives, so browsing
// never re-fetches what ant already holds, and the refresh switch is the explicit
// way to pull a fresh copy.
func (e *Engine) Dereference(ctx context.Context, u kit.URI, refresh bool) (Fetched, error) {
if !refresh {
if f, ok := e.readCache(u); ok {
return f, nil
}
}
env, err := e.Get(ctx, u)
if err != nil {
return Fetched{}, err
}
body, hasBody := e.host.Body(env.Data)
// Write the record back so the next read is a cache hit. A write failure must
// not fail the read: the record is already in hand, and a read-only data dir
// should still serve.
_, _ = e.writeEnvelope(u, env, hasBody)
raw, err := json.MarshalIndent(env, "", " ")
if err != nil {
return Fetched{}, err
}
return Fetched{Env: env, Raw: raw, Body: body, HasBody: hasBody, FromCache: false}, nil
}

// Cached reports whether a URI's record is already materialized on disk, so a
// caller can show a cache badge or a refresh affordance without reading the file.
func (e *Engine) Cached(u kit.URI) bool {
_, err := os.Stat(e.dataFile(u, "json"))
return err == nil
}

// Lookup returns a record from the on-disk cache without ever touching the
// network, so the web console can render a cached page instantly and route only a
// miss to a background fetch. ok is false on a miss (absent or unreadable). It is
// the read-only half of Dereference: same cache read, no write-back, no fetch.
func (e *Engine) Lookup(u kit.URI) (Fetched, bool) {
return e.readCache(u)
}

// readCache reads a materialized record from the data tree, returning false on
// any miss (absent or unreadable) so the caller falls through to a live fetch.
func (e *Engine) readCache(u kit.URI) (Fetched, bool) {
blob, err := os.ReadFile(e.dataFile(u, "json"))
if err != nil {
return Fetched{}, false
}
var env kit.Envelope
if err := json.Unmarshal(blob, &env); err != nil {
return Fetched{}, false
}
f := Fetched{Env: env, Raw: blob, FromCache: true}
if body, ok := readBodyFile(e.dataFile(u, "md")); ok {
f.Body, f.HasBody = body, true
}
return f, true
}

// readBodyFile reads an exported Markdown body, stripping the JSON front-matter
// block writeEnvelope writes between the leading "---" fences.
func readBodyFile(path string) (string, bool) {
blob, err := os.ReadFile(path)
if err != nil {
return "", false
}
s := string(blob)
if strings.HasPrefix(s, "---\n") {
if i := strings.Index(s[4:], "\n---\n"); i >= 0 {
s = s[4+i+len("\n---\n"):]
}
}
return strings.TrimLeft(s, "\n"), true
}
84 changes: 84 additions & 0 deletions ant/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package ant_test

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/tamnd/ant/ant"
"github.com/tamnd/any-cli/kit"
)

// TestLLIndexIsCachedAndWriteThrough proves the in-memory listing index: a repeat
// LL is served from memory (so a file written behind the Engine's back is not
// seen), while a record written through the Engine appears at once (write-through
// keeps the index warm without a re-walk). This is what keeps the web console's
// dashboard and browse pages fast as the data tree grows.
func TestLLIndexIsCachedAndWriteThrough(t *testing.T) {
e, root := newEngine(t)
ctx := context.Background()

// Seed the cache entry for the prefix with an initial walk (empty tree).
if got, err := e.LL("fake://"); err != nil || len(got) != 0 {
t.Fatalf("initial LL = %v, %v; want empty", got, err)
}

// Export a record through the Engine: write-through must fold it into the
// already-cached listing.
u, _ := kit.ParseURI("fake://book/b1")
if _, err := e.Export(ctx, u, 0, false); err != nil {
t.Fatal(err)
}
if got, err := e.LL("fake://"); err != nil || !has(got, "fake://book/b1") {
t.Fatalf("after Export, LL = %v, %v; want it to contain b1", got, err)
}

// Write a record file directly to disk, behind the Engine's back. The cache is
// authoritative for the session, so LL must NOT pick it up.
stray := filepath.Join(root, "fake", "book", "stray.json")
if err := os.WriteFile(stray, []byte(`{"@id":"fake://book/stray"}`), 0o644); err != nil {
t.Fatal(err)
}
if got, _ := e.LL("fake://"); has(got, "fake://book/stray") {
t.Errorf("LL saw a file written behind the Engine's back: %v", got)
}

// A fresh Engine on the same root walks from scratch and does see the stray,
// proving the file was really on disk and the cache (not a missing write) hid it.
e2, err := ant.New(ant.WithRoot(root))
if err != nil {
t.Fatal(err)
}
if got, _ := e2.LL("fake://"); !has(got, "fake://book/stray") {
t.Errorf("fresh Engine missed the on-disk stray: %v", got)
}
}

// TestDereferenceWriteBackIndexes proves a cache-first Dereference miss writes the
// record back and folds it into the listing index, so it shows up in browse with
// no re-walk.
func TestDereferenceWriteBackIndexes(t *testing.T) {
e, _ := newEngine(t)
ctx := context.Background()

if got, _ := e.LL("fake://"); len(got) != 0 {
t.Fatalf("expected empty start, got %v", got)
}
u, _ := kit.ParseURI("fake://book/b9")
if _, err := e.Dereference(ctx, u, false); err != nil {
t.Fatal(err)
}
if got, _ := e.LL("fake://"); !has(got, "fake://book/b9") {
t.Errorf("Dereference did not index the written record: %v", got)
}
}

func has(ss []string, want string) bool {
for _, s := range ss {
if s == want {
return true
}
}
return false
}
Loading
Loading