From 86d418417c45518c4bac52d0bdadc8d896e86a81 Mon Sep 17 00:00:00 2001 From: Raphael Sourty Date: Sat, 10 Jan 2026 12:52:07 +0100 Subject: [PATCH 1/3] Improve design --- docs/index.html | 33 +--- docs/style.css | 500 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 403 insertions(+), 130 deletions(-) diff --git a/docs/index.html b/docs/index.html index 0b1a0e93..294e67c8 100644 --- a/docs/index.html +++ b/docs/index.html @@ -60,35 +60,6 @@ } - @@ -219,7 +190,7 @@ graphRootRef.current.render( Date: Sat, 10 Jan 2026 13:33:43 +0100 Subject: [PATCH 2/3] Update design --- Makefile | 8 + api/api.py | 9 + docs/index.html | 211 ++++++++++++++++++++++-- docs/style.css | 110 ++++++++++-- knowledge_database/graph/graph.py | 87 ++++++++-- knowledge_database/pipeline/pipeline.py | 7 + 6 files changed, 390 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index 7be0676b..ec34bed4 100644 --- a/Makefile +++ b/Makefile @@ -5,3 +5,11 @@ launch: local-dev-api: uvicorn api.api:app --reload + +# Start local dev server using uv +dev: + uv run uvicorn api.api:app --reload --port 8000 + +# Install dependencies with uv +install: + uv pip install -r requirements.txt diff --git a/api/api.py b/api/api.py index cc08c987..356612f0 100644 --- a/api/api.py +++ b/api/api.py @@ -17,6 +17,8 @@ origins = [ "https://raphaelsty.github.io", "http://127.0.0.1:8000", + "http://127.0.0.1:3000", + "http://localhost:3000", ] @@ -106,6 +108,13 @@ def plot(k_tags: int, q: str): return knowledge.plot(q=q, k_tags=k_tags) +@app.get("/expand/{node_id}", response_class=ORJSONResponse) +def expand_node(node_id: str): + """Get neighbors of a specific node for progressive graph expansion.""" + nodes, links = knowledge.pipeline.graph.expand(node_id=node_id) + return {"nodes": nodes, "links": links} + + @app.on_event("startup") def start(): """Intialiaze the pipeline.""" diff --git a/docs/index.html b/docs/index.html index 294e67c8..e5ec69dc 100644 --- a/docs/index.html +++ b/docs/index.html @@ -69,6 +69,20 @@
+
+
+ + From documents +
+
+ + From query +
+
+ Click to zoom | Double-click to expand +
+
+ @@ -103,7 +117,12 @@ const [modelStatus, setModelStatus] = useState("Loading Script..."); const [documents, setDocuments] = useState([]); const [isSortedByDate, setIsSortedByDate] = useState(false); - const [resultsReranked, setResultsReranked] = useState(false); // New: Tracks re-ranking status + const [resultsReranked, setResultsReranked] = useState(false); + + // --- Graph Interaction State --- + const [highlightNodes, setHighlightNodes] = useState(new Set()); + const [highlightLinks, setHighlightLinks] = useState(new Set()); + const [graphData, setGraphData] = useState({ nodes: [], links: [] }); // --- Refs --- const searchTimerRef = useRef(null); @@ -111,7 +130,9 @@ const workerRef = useRef(null); const latestQueryIdRef = useRef(0); const graphRootRef = useRef(null); - const rerankTimerRef = useRef(null); // New: Timer for inactivity-based re-ranking + const rerankTimerRef = useRef(null); + const fgRef = useRef(null); + const lastClickTimeRef = useRef(0); // --- Function Declarations (in dependency order) --- @@ -160,7 +181,34 @@ }, [selectedNode, fetchLatest]); // modelStatus dependency removed /** - * Handles hovering over a node in the 3D graph. + * Preprocesses graph data to build neighbor and link references for highlighting. + */ + const preprocessGraphData = useCallback((data) => { + const nodeById = {}; + data.nodes.forEach(node => { + nodeById[node.id] = node; + node.neighbors = []; + node.links = []; + }); + + data.links.forEach(link => { + const sourceId = typeof link.source === 'object' ? link.source.id : link.source; + const targetId = typeof link.target === 'object' ? link.target.id : link.target; + const a = nodeById[sourceId]; + const b = nodeById[targetId]; + if (a && b) { + a.neighbors.push(b); + b.neighbors.push(a); + a.links.push(link); + b.links.push(link); + } + }); + + return data; + }, []); + + /** + * Handles hovering over a node in the 3D graph - triggers search. */ const handleHoverNode = useCallback((hoveredNode) => { if (hoveredNode) { @@ -171,7 +219,101 @@ }, [query, search, documentsToFetch]); /** - * Fetches data and renders the 3D force-directed graph. + * Handles node hover for path highlighting. + */ + const handleGraphNodeHover = useCallback((node) => { + const newHighlightNodes = new Set(); + const newHighlightLinks = new Set(); + + if (node) { + newHighlightNodes.add(node); + if (node.neighbors) { + node.neighbors.forEach(neighbor => newHighlightNodes.add(neighbor)); + } + if (node.links) { + node.links.forEach(link => newHighlightLinks.add(link)); + } + } + + setHighlightNodes(newHighlightNodes); + setHighlightLinks(newHighlightLinks); + + if (node) { + handleHoverNode(node); + } + }, [handleHoverNode]); + + /** + * Handles link hover for highlighting. + */ + const handleGraphLinkHover = useCallback((link) => { + const newHighlightNodes = new Set(); + const newHighlightLinks = new Set(); + + if (link) { + newHighlightLinks.add(link); + newHighlightNodes.add(link.source); + newHighlightNodes.add(link.target); + } + + setHighlightNodes(newHighlightNodes); + setHighlightLinks(newHighlightLinks); + }, []); + + /** + * Handles click on a node - zooms camera to it. + */ + const handleGraphNodeClick = useCallback((node) => { + const now = Date.now(); + const isDoubleClick = now - lastClickTimeRef.current < 300; + lastClickTimeRef.current = now; + + if (isDoubleClick) { + // Double-click: expand neighbors + fetch(`${API_BASE_URL}/expand/${encodeURIComponent(node.id)}`) + .then(res => res.json()) + .then(newData => { + setGraphData(prev => { + const existingNodeIds = new Set(prev.nodes.map(n => n.id)); + const existingLinkKeys = new Set(prev.links.map(l => { + const sId = typeof l.source === 'object' ? l.source.id : l.source; + const tId = typeof l.target === 'object' ? l.target.id : l.target; + return `${sId}-${tId}`; + })); + + const newNodes = newData.nodes.filter(n => !existingNodeIds.has(n.id)); + const newLinks = newData.links.filter(l => { + const key = `${l.source}-${l.target}`; + const reverseKey = `${l.target}-${l.source}`; + return !existingLinkKeys.has(key) && !existingLinkKeys.has(reverseKey); + }); + + const merged = { + nodes: [...prev.nodes, ...newNodes], + links: [...prev.links, ...newLinks] + }; + + return preprocessGraphData(merged); + }); + }) + .catch(error => console.error("[APP] Failed to expand node:", error)); + } else { + // Single click: zoom to node + if (fgRef.current) { + const distance = 120; + const distRatio = 1 + distance / Math.hypot(node.x, node.y, node.z); + fgRef.current.cameraPosition( + { x: node.x * distRatio, y: node.y * distRatio, z: node.z * distRatio }, + node, + 2000 + ); + } + handleHoverNode(node); + } + }, [handleHoverNode, preprocessGraphData]); + + /** + * Fetches data and renders the 3D force-directed graph with enhanced features. */ const plot = useCallback((plotQuery, count) => { const graphContainer = document.getElementById('graph'); @@ -179,37 +321,78 @@ unmountGraph(); return; } - const endpoint = `${API_BASE_URL}/plot/25/${plotQuery.replace("/", "")}`; + const endpoint = `${API_BASE_URL}/plot/12/${plotQuery.replace("/", "")}`; fetch(endpoint) .then(res => res.json()) .then(data => { if (!graphContainer) return; + + const processedData = preprocessGraphData(data); + setGraphData(processedData); + if (!graphRootRef.current) { graphRootRef.current = createRoot(graphContainer); } graphRootRef.current.render( highlightLinks.has(link) ? 0.15 : 0.02} + linkColor={link => highlightLinks.has(link) ? '#ffffff' : 'rgba(255, 255, 255, 0.2)'} + + // Subtle single particles + linkDirectionalParticles={1} + linkDirectionalParticleSpeed={0.003} + linkDirectionalParticleWidth={link => highlightLinks.has(link) ? 1 : 0.5} + linkDirectionalParticleColor={link => highlightLinks.has(link) ? '#ffffff' : 'rgba(255, 255, 255, 0.4)'} + + // Clean text-only nodes - bigger text nodeThreeObject={node => { const sprite = new SpriteText(node.id); - sprite.color = node.color; + const isHighlighted = highlightNodes.size === 0 || highlightNodes.has(node); + + sprite.color = isHighlighted ? node.color : 'rgba(255, 255, 255, 0.2)'; sprite.textHeight = 4; - sprite.fontSize = 50; + sprite.fontSize = 120; sprite.fontFace = "Inter"; + sprite.fontWeight = isHighlighted ? "500" : "400"; + return sprite; }} - onNodeHover={handleHoverNode} + + // Minimal tooltip + nodeLabel={node => ` +
+ ${node.id} +
+ `} + + // Interactions + onNodeClick={handleGraphNodeClick} + onNodeHover={handleGraphNodeHover} + onLinkHover={handleGraphLinkHover} + + // Physics + cooldownTicks={100} + d3AlphaDecay={0.02} + d3VelocityDecay={0.3} + + // Camera - closer zoom + cameraPosition={{ x: 0, y: 0, z: 140 }} /> ); }).catch(error => console.error("[APP] Failed to plot graph:", error)); - }, [isMobile, handleHoverNode, unmountGraph]); + }, [isMobile, unmountGraph, preprocessGraphData, handleGraphNodeClick, handleGraphNodeHover, handleGraphLinkHover, highlightNodes, highlightLinks]); /** * A utility to run search and plot immediately, clearing any pending debounced calls. diff --git a/docs/style.css b/docs/style.css index 1b43479d..78c1c183 100644 --- a/docs/style.css +++ b/docs/style.css @@ -53,10 +53,7 @@ html { } body { - background: var(--bg-primary); - background-image: - radial-gradient(ellipse 80% 50% at 50% -20%, rgba(16, 185, 129, 0.08) 0%, transparent 50%), - radial-gradient(ellipse 60% 40% at 100% 100%, rgba(6, 214, 160, 0.05) 0%, transparent 50%); + background: #080808; min-height: 100vh; margin: 0; padding: 0; @@ -104,8 +101,8 @@ body { right: 0; width: 50%; height: 100vh; - background: var(--bg-primary); - border-left: 1px solid var(--border-subtle); + background: #080808; + border-left: none; } /* Search content container */ @@ -114,13 +111,13 @@ body { float: left; min-height: 100vh; overflow-y: auto; - padding: 24px 32px; + padding: 24px 48px; box-sizing: border-box; } #backsearch { - max-width: 680px; - margin: 0 auto; + max-width: 100%; + margin: 0; } /* ============================================ @@ -129,9 +126,8 @@ body { #search-container { position: fixed; - width: calc(50% - 80px); - max-width: 600px; - left: 40px; + width: calc(50% - 96px); + left: 48px; top: 24px; height: 52px; z-index: 100; @@ -213,8 +209,8 @@ body { #documents { width: 100%; - max-width: 680px; - margin: 90px auto 40px; + max-width: 100%; + margin: 90px 0 40px 0; padding: 0; } @@ -511,3 +507,89 @@ body { .document:nth-child(8) { animation-delay: 0.16s; } .document:nth-child(9) { animation-delay: 0.18s; } .document:nth-child(10) { animation-delay: 0.20s; } + +/* ============================================ + Graph Tooltips & Interactions + ============================================ */ + +/* Override ForceGraph3D tooltip styles */ +.scene-tooltip { + background: rgba(10, 10, 15, 0.95) !important; + backdrop-filter: blur(10px); + -webkit-backdrop-filter: blur(10px); + border: 1px solid var(--border-subtle) !important; + border-radius: var(--radius-md) !important; + padding: 0 !important; + font-family: 'Inter', sans-serif !important; + color: var(--text-primary) !important; + box-shadow: var(--shadow-lg) !important; + max-width: 250px; + pointer-events: none; +} + +/* Graph container enhancements */ +#graph canvas { + cursor: grab; +} + +#graph canvas:active { + cursor: grabbing; +} + +/* Graph Legend */ +.graph-legend { + position: fixed; + bottom: 16px; + right: 16px; + background: rgba(0, 0, 0, 0.6); + backdrop-filter: blur(8px); + -webkit-backdrop-filter: blur(8px); + border: 1px solid rgba(255, 255, 255, 0.06); + border-radius: 6px; + padding: 10px 12px; + z-index: 50; + font-size: 10px; +} + +.graph-legend-item { + display: flex; + align-items: center; + gap: 8px; + margin-bottom: 6px; + color: rgba(255, 255, 255, 0.5); + font-weight: 400; +} + +.graph-legend-item:last-of-type { + margin-bottom: 0; +} + +.graph-legend-dot { + width: 6px; + height: 6px; + border-radius: 50%; + flex-shrink: 0; +} + +.graph-legend-dot.query { + background-color: #86E5FF; +} + +.graph-legend-dot.retrieved { + background-color: #10b981; +} + +.graph-legend-hint { + margin-top: 8px; + padding-top: 8px; + border-top: 1px solid rgba(255, 255, 255, 0.06); + color: rgba(255, 255, 255, 0.3); + font-size: 9px; +} + +/* Hide legend on mobile */ +@media (max-width: 768px) { + .graph-legend { + display: none; + } +} diff --git a/knowledge_database/graph/graph.py b/knowledge_database/graph/graph.py index 3fee3bb4..b1ae1ae3 100644 --- a/knowledge_database/graph/graph.py +++ b/knowledge_database/graph/graph.py @@ -26,6 +26,7 @@ class Graph: def __init__(self, triples): self.graph = nx.Graph() + self._document_counts = {} nodes = { **{node["head"]: True for node in triples}, @@ -41,6 +42,15 @@ def __init__(self, triples): self.graph.add_edge(self.node_to_idx[head], self.node_to_idx[tail]) self.graph.add_edge(self.node_to_idx[tail], self.node_to_idx[head]) + @property + def document_counts(self) -> typing.Dict[str, int]: + """Get document counts, returning empty dict if not set (for backwards compatibility).""" + return getattr(self, '_document_counts', {}) + + def set_document_counts(self, document_counts: typing.Dict[str, int]): + """Set document counts for each tag.""" + self._document_counts = document_counts + def __call__( self, tags: typing.List, @@ -57,9 +67,16 @@ def __call__( idx = self.node_to_idx.get(tag, None) if idx is None: lonely.append(tag) + degree = 0 else: nodes.append(idx) - output_nodes[tag] = {"id": tag, "color": color} + degree = self.graph.degree(idx) + output_nodes[tag] = { + "id": tag, + "color": color, + "degree": degree, + "documentCount": self.document_counts.get(tag, 0), + } paths = [] @@ -81,12 +98,14 @@ def __call__( paths.append(self.walk(start=start, k=k_walk)) for path in paths: - for node in path: - node = self.idx_to_node[node] - if node not in output_nodes: - output_nodes[node] = { - "id": node, + for node_idx in path: + node_name = self.idx_to_node[node_idx] + if node_name not in output_nodes: + output_nodes[node_name] = { + "id": node_name, "color": "#FFFFFF", + "degree": self.graph.degree(node_idx), + "documentCount": self.document_counts.get(node_name, 0), } return list(output_nodes.values()), self.format_triples(paths=paths) @@ -122,25 +141,65 @@ def walk(self, start: int, k): return neighbours def format_triples(self, paths: typing.List[typing.List[str]]): - """Convert nodes as triples.""" + """Convert nodes as triples with edge weights.""" triples = {} for path in paths: for start, end in zip(path[:-1], path[1:]): - if start != end and f"{end}_{start}" not in triples: - triples[f"{start}_{end}"] = True + key = tuple(sorted([start, end])) + if key not in triples: + triples[key] = 1 + else: + triples[key] += 1 + + max_weight = max(triples.values()) if triples else 1 links = [] - for triple in triples: - head, tail = tuple(triple.split("_")) - head = self.idx_to_node[int(head)] - tail = self.idx_to_node[int(tail)] + for (start, end), count in triples.items(): + head = self.idx_to_node[start] + tail = self.idx_to_node[end] links.append( { "source": head, "relation": "link", "target": tail, - "value": 1, + "value": count, + "weight": count / max_weight, } ) return links + + def expand(self, node_id: str, k: int = 10): + """Get neighbors of a specific node for progressive graph expansion.""" + idx = self.node_to_idx.get(node_id) + if idx is None: + return [], [] + + output_nodes = { + node_id: { + "id": node_id, + "color": "#86E5FF", + "degree": self.graph.degree(idx), + "documentCount": self.document_counts.get(node_id, 0), + } + } + + links = [] + for i, neighbor_idx in enumerate(nx.all_neighbors(self.graph, idx)): + if i >= k: + break + neighbor_name = self.idx_to_node[neighbor_idx] + output_nodes[neighbor_name] = { + "id": neighbor_name, + "color": "#FFFFFF", + "degree": self.graph.degree(neighbor_idx), + "documentCount": self.document_counts.get(neighbor_name, 0), + } + links.append({ + "source": node_id, + "target": neighbor_name, + "value": 1, + "weight": 0.5, + }) + + return list(output_nodes.values()), links diff --git a/knowledge_database/pipeline/pipeline.py b/knowledge_database/pipeline/pipeline.py index 3f24198c..1646de8d 100644 --- a/knowledge_database/pipeline/pipeline.py +++ b/knowledge_database/pipeline/pipeline.py @@ -37,6 +37,13 @@ def __init__( self.graph = Graph(triples=triples) self.excluded_tags = {} if excluded_tags is None else excluded_tags + # Calculate document counts per tag + document_counts = {} + for url, document in documents.items(): + for tag in document.get("tags", []) + document.get("extra-tags", []): + document_counts[tag] = document_counts.get(tag, 0) + 1 + self.graph.set_document_counts(document_counts) + self.latest_documents = sorted( [{"url": url, **document} for url, document in documents.items()], key=lambda doc: datetime.datetime.strptime(doc["date"], "%Y-%m-%d"), From 05b295bf4883f0d4c3d877455b75ef3cc15c6fab Mon Sep 17 00:00:00 2001 From: Raphael Sourty Date: Sat, 10 Jan 2026 15:38:50 +0100 Subject: [PATCH 3/3] Update project --- .dockerignore | 2 +- .github/workflows/database.yml | 13 +- .github/workflows/lint.yml | 44 + .gitignore | 2 +- .pre-commit-config.yaml | 22 + Dockerfile | 13 +- LICENSE | 2 +- Makefile | 77 +- api/api.py | 260 ++- database/database.json | 2 +- database/triples.json | 2 +- docs/favicon_io/site.webmanifest | 2 +- docs/index.html | 22 +- docs/pkg/.gitignore | 2 +- docs/pkg/package.json | 2 +- docs/style.css | 29 +- knowledge_database/__init__.py | 53 +- knowledge_database/github/github.py | 130 +- knowledge_database/graph/graph.py | 241 +- knowledge_database/hackernews/hackernews.py | 99 +- knowledge_database/huggingface/huggingface.py | 149 +- knowledge_database/pipeline/pipeline.py | 183 +- knowledge_database/retriever/retriever.py | 132 +- knowledge_database/semanlink/semanlink.py | 102 +- knowledge_database/tags/tags.py | 124 +- knowledge_database/twitter/twitter.py | 157 +- knowledge_database/zotero/zotero.py | 90 +- pyproject.toml | 92 + pytest.ini | 2 +- readme.md | 22 +- requirements.txt | 22 - run.py | 195 +- setup.py | 20 - uv.lock | 1959 +++++++++++++++++ 34 files changed, 3640 insertions(+), 628 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .pre-commit-config.yaml create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100644 setup.py create mode 100644 uv.lock diff --git a/.dockerignore b/.dockerignore index c5ebba77..3874860a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -119,4 +119,4 @@ vite.config.js *.tfstate.* .terraform/ .aws/ -.gcloud/ \ No newline at end of file +.gcloud/ diff --git a/.github/workflows/database.yml b/.github/workflows/database.yml index 33361463..90c53f2e 100644 --- a/.github/workflows/database.yml +++ b/.github/workflows/database.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: checkout repo content - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: install git lfs run: | @@ -20,15 +20,14 @@ jobs: sudo apt-get install git-lfs git lfs install + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.10" + run: uv python install 3.10 - name: install python packages - run: | - python -m pip install --upgrade pip - pip install . + run: uv pip install --system . - name: track large files run: git lfs track "database/pipeline.pkl" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..caf5eb38 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,44 @@ +name: Lint and Test + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Set up Python + run: uv python install 3.11 + + - name: Run Ruff linter + run: uvx ruff check . + + - name: Run Ruff formatter check + run: uvx ruff format --check . + + - name: Run mypy + run: uvx mypy . --ignore-missing-imports + + pre-commit: + name: Pre-commit hooks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Set up Python + run: uv python install 3.11 + + - name: Run pre-commit + run: uvx pre-commit run --all-files diff --git a/.gitignore b/.gitignore index 834f8692..dc32b2ce 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,4 @@ dmypy.json # Pyre type checker .pyre/ -*DS_Store \ No newline at end of file +*DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..1cff9f64 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.0 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.13.0 + hooks: + - id: mypy + additional_dependencies: [] + args: [--ignore-missing-imports] diff --git a/Dockerfile b/Dockerfile index 16f3e0c6..b4d8c250 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,19 @@ FROM python:3.10-slim +# Install uv +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + # Clone the repository WORKDIR /code -# Copy the necessary files (you may skip this if already in the repository) +# Copy the necessary files COPY database/pipeline.pkl /code/database/pipeline.pkl -COPY requirements.txt /code/requirements.txt -COPY setup.py /code/setup.py +COPY pyproject.toml /code/pyproject.toml COPY knowledge_database /code/knowledge_database COPY api /code/api -# Install Python dependencies -RUN pip install pip --upgrade -RUN pip install --no-cache-dir -r requirements.txt +# Install Python dependencies using uv +RUN uv pip install --system . # Set up the secret environment variable for OpenAI API Key RUN --mount=type=secret,id=OPENAI_API_KEY sh -c 'echo "export OPENAI_API_KEY=$(cat /run/secrets/OPENAI_API_KEY)" >> /etc/profile.d/openai.sh' diff --git a/LICENSE b/LICENSE index 77cc3e0c..5bd621a9 100644 --- a/LICENSE +++ b/LICENSE @@ -671,4 +671,4 @@ into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read -. \ No newline at end of file +. diff --git a/Makefile b/Makefile index ec34bed4..879e49da 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,74 @@ -launch: +.PHONY: install install-dev sync dev api run lint lint-fix check pre-commit pre-commit-install docker-build docker-run launch clean + +# Install dependencies +install: + uv sync --no-dev + +# Install with dev dependencies +install-dev: + uv sync --all-extras + +# Sync dependencies (alias for install-dev) +sync: + uv sync --all-extras + +# Start local dev server +dev: + uv run uvicorn api.api:app --reload --port 8000 + +# Start API server (production-like) +api: + uv run uvicorn api.api:app --host 0.0.0.0 --port 8080 + +# Run the data extraction pipeline +run: + uv run python run.py + +# Linting and formatting check +lint: + uv run ruff check . + uv run ruff format --check . + uv run mypy . --ignore-missing-imports + +# Auto-fix linting issues +lint-fix: + uv run ruff check --fix . + uv run ruff format . + +# Run all checks (lint + type check) +check: lint + +# Run pre-commit hooks on all files +pre-commit: + uv run pre-commit run --all-files + +# Install pre-commit hooks +pre-commit-install: + uv run pre-commit install + +# Build Docker image +docker-build: echo ${OPENAI_API_KEY} > mysecret.txt docker build --secret id=OPENAI_API_KEY,src=mysecret.txt -t knowledge . + rm -f mysecret.txt + +# Run Docker container +docker-run: docker run -d --add-host host.docker.internal:host-gateway --name run_knowledge -p 8080:8080 knowledge -local-dev-api: - uvicorn api.api:app --reload +# Build and run Docker (legacy command) +launch: docker-build docker-run -# Start local dev server using uv -dev: - uv run uvicorn api.api:app --reload --port 8000 +# Stop and remove Docker container +docker-stop: + docker stop run_knowledge || true + docker rm run_knowledge || true -# Install dependencies with uv -install: - uv pip install -r requirements.txt +# Clean up +clean: + rm -rf .venv + rm -rf __pycache__ + rm -rf .mypy_cache + rm -rf .ruff_cache + find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true + find . -type f -name "*.pyc" -delete 2>/dev/null || true diff --git a/api/api.py b/api/api.py index 356612f0..336e7557 100644 --- a/api/api.py +++ b/api/api.py @@ -1,30 +1,36 @@ +""" +FastAPI server for the Knowledge Search Engine. + +This module provides REST API endpoints for searching documents, visualizing +the knowledge graph, and interacting with an LLM for document recommendations. +""" + import datetime -import json import pickle -import typing from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import ORJSONResponse, StreamingResponse from openai import OpenAI +# Initialize FastAPI application app = FastAPI( - description="Personnal knowledge graph.", - title="FactGPT", - version="0.0.1", + title="Knowledge Search Engine", + description="Personal knowledge graph with neural search and visualization.", + version="1.0.0", ) -origins = [ +# Configure CORS for frontend access +ALLOWED_ORIGINS = [ "https://raphaelsty.github.io", "http://127.0.0.1:8000", "http://127.0.0.1:3000", "http://localhost:3000", ] - app.add_middleware( CORSMiddleware, - allow_origins=origins, + allow_origins=ALLOWED_ORIGINS, allow_credentials=True, allow_methods=["GET", "POST", "OPTIONS"], allow_headers=["*"], @@ -32,28 +38,66 @@ class Knowledge: - """This class is a wrapper around the pipeline.""" + """ + Wrapper class for the knowledge pipeline. + + Manages the lifecycle of the search pipeline, providing methods for + searching documents and generating graph visualizations. + + Attributes + ---------- + pipeline : Pipeline | None + The loaded knowledge pipeline, or None before initialization. + """ def __init__(self) -> None: self.pipeline = None - def start(self): - """Load the pipeline and the documents.""" + def start(self) -> "Knowledge": + """ + Load the serialized pipeline from disk. + + Returns + ------- + Knowledge + Self reference for method chaining. + """ with open("database/pipeline.pkl", "rb") as f: self.pipeline = pickle.load(f) - return self - def get_latest_documents(self, count: int): - """Returns the most recently added documents.""" + def get_latest_documents(self, count: int) -> list[dict]: + """ + Get the most recently added documents. + + Parameters + ---------- + count : int + Number of documents to return. + + Returns + ------- + list[dict] + List of recent documents sorted by date. + """ return self.pipeline.get_latest_documents(count=count) - def search( - self, - q: str, - tags: str, - ) -> typing.Dict: - """Returns the documents.""" + def search(self, q: str, tags: str) -> list[dict]: + """ + Search for documents matching a query. + + Parameters + ---------- + q : str + Search query string. + tags : str + Whether to filter by tags ("true" or other). + + Returns + ------- + list[dict] + List of matching documents ranked by relevance. + """ return self.pipeline.search(q=q, tags=tags) def plot( @@ -62,8 +106,26 @@ def plot( k_tags: int, k_yens: int = 1, k_walk: int = 3, - ) -> typing.Dict: - """Returns the graph.""" + ) -> dict: + """ + Generate knowledge graph data for visualization. + + Parameters + ---------- + q : str + Search query to build the graph around. + k_tags : int + Maximum number of tags to include. + k_yens : int, default=1 + Number of shortest paths between tags. + k_walk : int, default=3 + Number of neighbors for random walks. + + Returns + ------- + dict + Dictionary with 'nodes' and 'links' for graph visualization. + """ nodes, links = self.pipeline.plot( q=q, k_tags=k_tags, @@ -73,56 +135,139 @@ def plot( return {"nodes": nodes, "links": links} +# Global knowledge instance knowledge = Knowledge() @app.get("/latest/{count}") -def get_latest(count: int): - """Returns the most recently added documents.""" +def get_latest(count: int) -> dict: + """ + Get the most recently added documents. + + Parameters + ---------- + count : int + Number of documents to return. + + Returns + ------- + dict + Dictionary containing 'documents' list. + """ documents = knowledge.get_latest_documents(count=count) return {"documents": documents} @app.get("/search/{sort}/{tags}/{q}") -def search(tags: str, sort: bool, q: str): - """Search for documents.""" +def search(tags: str, sort: bool, q: str) -> dict: + """ + Search for documents with optional sorting and tag filtering. + + Parameters + ---------- + tags : str + Tag filter mode ("null" for no filter, any other value to enable). + sort : bool + Whether to sort results by date (newest first). + q : str + Search query string. + + Returns + ------- + dict + Dictionary containing 'documents' list. + """ tags = tags != "null" documents = knowledge.search(q=q, tags=tags) + + # Sort by date if requested if bool(sort): documents = [ document for _, document in sorted( [(document["date"], document) for document in documents], - key=lambda document: datetime.datetime.strptime( - document[0], "%Y-%m-%d" - ), + key=lambda doc: datetime.datetime.strptime(doc[0], "%Y-%m-%d"), reverse=True, ) ] + return {"documents": documents} @app.get("/plot/{k_tags}/{q}", response_class=ORJSONResponse) -def plot(k_tags: int, q: str): - """Plot tags.""" +def plot(k_tags: int, q: str) -> dict: + """ + Generate knowledge graph visualization data. + + Parameters + ---------- + k_tags : int + Maximum number of tags to include in the graph. + q : str + Search query to build the graph around. + + Returns + ------- + dict + Dictionary with 'nodes' and 'links' for D3/ForceGraph visualization. + """ return knowledge.plot(q=q, k_tags=k_tags) @app.get("/expand/{node_id}", response_class=ORJSONResponse) -def expand_node(node_id: str): - """Get neighbors of a specific node for progressive graph expansion.""" +def expand_node(node_id: str) -> dict: + """ + Expand a node to show its neighbors. + + Used for progressive graph exploration where users click nodes + to reveal their connections. + + Parameters + ---------- + node_id : str + The tag name of the node to expand. + + Returns + ------- + dict + Dictionary with 'nodes' and 'links' for the expanded subgraph. + """ nodes, links = knowledge.pipeline.graph.expand(node_id=node_id) return {"nodes": nodes, "links": links} @app.on_event("startup") -def start(): - """Intialiaze the pipeline.""" +def start() -> Knowledge: + """ + Initialize the knowledge pipeline on server startup. + + Returns + ------- + Knowledge + The initialized knowledge instance. + """ return knowledge.start() async def async_chat(query: str, content: str): - """Re-rank the documents using ChatGPT.""" + """ + Stream LLM responses for document recommendations. + + Uses GPT-4 to analyze retrieved documents and provide natural language + recommendations based on the user's query. + + Parameters + ---------- + query : str + The user's search query. + content : str + Formatted string of retrieved document metadata. + + Yields + ------ + str + Incrementally built response text as tokens arrive. + """ client = OpenAI() response = client.chat.completions.create( @@ -130,13 +275,15 @@ async def async_chat(query: str, content: str): messages=[ { "role": "system", - "content": """ - You are a helpful assistant designed to output JSON. - """, + "content": "You are a helpful assistant designed to output JSON.", }, { "role": "user", - "content": f"Hi, answer in comprehensible english, do not reply with json, among the set of documents retrieved, which documents are related to my query: {query}, set of documents: {content}.", + "content": ( + f"Hi, answer in comprehensible english, do not reply with json, " + f"among the set of documents retrieved, which documents are related " + f"to my query: {query}, set of documents: {content}." + ), }, ], max_tokens=200, @@ -157,18 +304,39 @@ async def async_chat(query: str, content: str): @app.get("/chat/{k_tags}/{q}") -async def chat(k_tags: int, q: str): - """LLM recommendation.""" +async def chat(k_tags: int, q: str) -> StreamingResponse: + """ + Get LLM-powered document recommendations. + + Searches for relevant documents and streams GPT-4's analysis + of which documents best match the query. + + Parameters + ---------- + k_tags : int + Not used (kept for API compatibility). + q : str + Search query for finding relevant documents. + + Returns + ------- + StreamingResponse + Streaming text response from the LLM. + """ documents = knowledge.search(q=q, tags=False) + + # Format documents for LLM context content = "" for document in documents: content += "title: " + document["title"] + "\n" content += "summary: " + document["summary"][:30] + "\n" - content += "targs: " + ( - ", ".join(document["tags"] + document["extra-tags"]) + "\n" - ) + content += "tags: " + (", ".join(document["tags"] + document["extra-tags"]) + "\n") content += "url: " + document["url"] + "\n\n" + + # Truncate to fit context window content = "title: ".join(content[:3000].split("title:")[:-1]) + return StreamingResponse( - async_chat(query=q, content=content), media_type="text/plain" + async_chat(query=q, content=content), + media_type="text/plain", ) diff --git a/database/database.json b/database/database.json index a3b3d42b..ebaca642 100644 --- a/database/database.json +++ b/database/database.json @@ -51600,4 +51600,4 @@ "robotics" ] } -} \ No newline at end of file +} diff --git a/database/triples.json b/database/triples.json index d6a63cf7..27b1647b 100644 --- a/database/triples.json +++ b/database/triples.json @@ -237703,4 +237703,4 @@ "head": "memoryvla", "tail": "pretrained models" } -] \ No newline at end of file +] diff --git a/docs/favicon_io/site.webmanifest b/docs/favicon_io/site.webmanifest index 45dc8a20..1dd91123 100644 --- a/docs/favicon_io/site.webmanifest +++ b/docs/favicon_io/site.webmanifest @@ -1 +1 @@ -{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"} \ No newline at end of file +{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"} diff --git a/docs/index.html b/docs/index.html index e5ec69dc..1b13dc35 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,9 +10,9 @@ - + - + @@ -26,13 +26,13 @@ - - + + - +