Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -556,8 +556,18 @@ body { margin: 0; padding: 0; font-family: var(--vscode-font-family); color: var
}
}
function fmtAge(ts) {
const days = Math.floor((Date.now() / 1000 - parseFloat(ts)) / 86400);
if (days === 0) return 'today';
const secs = Math.floor(Date.now() / 1000 - parseFloat(ts));
if (secs < 0) return 'just now';
const days = Math.floor(secs / 86400);
if (days === 0) {
if (secs < 60) return 'just now';
if (secs < 3600) {
const m = Math.floor(secs / 60);
return m + ' minute' + (m !== 1 ? 's' : '') + ' ago';
}
const h = Math.floor(secs / 3600);
return h + ' hour' + (h !== 1 ? 's' : '') + ' ago';
}
if (days === 1) return 'yesterday';
if (days < 30) return days + ' days ago';
if (days < 365) return Math.floor(days / 30) + ' months ago';
Expand Down Expand Up @@ -641,6 +651,8 @@ body { margin: 0; padding: 0; font-family: var(--vscode-font-family); color: var
const by = project.last_modified_by != null ? project.last_modified_by : null;
metaParts.push('last modified ' + age + (by ? ' by ' + by : ''));
}
if (project.scanned_at != null)
metaParts.push('scanned ' + fmtAge(project.scanned_at));
if (metaParts.length > 0) {
const meta = document.createElement('div');
meta.className = 'meta';
Expand Down Expand Up @@ -914,15 +926,60 @@ body { margin: 0; padding: 0; font-family: var(--vscode-font-family); color: var
body.innerHTML = sanitizeHtml(html);
w.appendChild(body);
} else {
// Datasets (and other content) may carry rich previews in
// metadata.html_repr (an HTML fragment) and metadata.thumbnail
// (a data: image URL). Embed those rather than dumping their
// (often huge) raw strings into the YAML tree.
const meta = (kind === 'content' && data && typeof data === 'object'
&& data.metadata && typeof data.metadata === 'object') ? data.metadata : null;
const htmlRepr = meta && typeof meta.html_repr === 'string' ? meta.html_repr : null;
const thumb = meta && typeof meta.thumbnail === 'string' ? meta.thumbnail : null;

const tree = document.createElement('div');
tree.className = 'tree yaml';
tree.appendChild(renderYaml(stripKlass(data)));
tree.appendChild(renderYaml(stripPreview(stripKlass(data))));
w.appendChild(tree);

if (thumb) w.appendChild(thumbnailImg(thumb));
if (htmlRepr) {
const body = document.createElement('div');
body.className = 'widget-html';
body.innerHTML = sanitizeHtml(htmlRepr);
w.appendChild(body);
}
}

return w;
}

function stripPreview(obj) {
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return obj;
if (!obj.metadata || typeof obj.metadata !== 'object' || Array.isArray(obj.metadata)) return obj;
const meta = {};
let changed = false;
for (const k of Object.keys(obj.metadata)) {
if (k === 'html_repr' || k === 'thumbnail') { changed = true; continue; }
meta[k] = obj.metadata[k];
}
if (!changed) return obj;
const out = {};
for (const k of Object.keys(obj)) out[k] = obj[k];
out.metadata = meta;
return out;
}

function thumbnailImg(src) {
const wrap = document.createElement('div');
wrap.className = 'widget-html';
if (/^data:image\//i.test(src)) {
const img = document.createElement('img');
img.src = src;
img.alt = 'thumbnail';
wrap.appendChild(img);
}
return wrap;
}

function sanitizeHtml(html) {
const tpl = document.createElement('template');
tpl.innerHTML = String(html);
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ dependencies = [
"fsspec",
"click",
"jinja2",
"intake==2.1.0a2"
]

[project.optional-dependencies]
test = ["pytest", "pytest-cov", "django", "streamlit", "copier", "jinja2-time", "flask",
"maturin", "uv", "briefcase"]
"maturin", "uv", "briefcase", "textual"]
qt = ["pyqt>5,<6", "pyqtwebengin>5,<6"]
textual = ["textual>=0.80"]
ipywidget = ["anywidget>=0.9", "ipywidgets>=8", "ipython"]
Expand Down
9 changes: 0 additions & 9 deletions src/projspec/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,6 @@ def version():
default=False,
help="JSON output, for projects only",
)
@click.option(
"--html-out",
is_flag=True,
default=False,
help="HTML output, for projects only",
)
@click.option(
"--walk", is_flag=True, help="Descend into child directories of each match"
)
Expand All @@ -112,7 +106,6 @@ def scan(
types,
xtypes,
json_out,
html_out,
walk,
summary,
library,
Expand Down Expand Up @@ -146,8 +139,6 @@ def scan(
else:
if json_out:
print(json.dumps(proj.to_dict(compact=False)))
elif html_out:
print(proj._repr_html_())
else:
print(proj)

Expand Down
42 changes: 41 additions & 1 deletion src/projspec/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from typing import Any

conf: dict[str, dict[str, Any]] = {}
conf: dict[str, Any] = {}
default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/projspec")


Expand Down Expand Up @@ -33,12 +33,19 @@ def coerce(template, val):
def defaults():
return {
"library_path": f"{conf_dir()}/library.json",
"auto_rescan": 7 * 24 * 60 * 60, # one week, in seconds
"scan_types": [".py", ".yaml", ".yml", ".toml", ".json", ".md"],
"scan_max_files": 100,
"scan_max_size": 5 * 2**10,
"remote_artifact_status": False,
"capture_artifact_output": True,
"preferred_install_methods": ["conda", "pip"],
"data_min_fraction": 0.5,
"data_min_file_size": 1024 * 1024,
"data_min_total_size": 10 * 1024 * 1024,
"data_min_play_size": 1, # 64 * 1024,
"data_consolidate_min_group": 3,
"data_inspect_max_datasets": 50,
"excludes": [
"bld",
"build",
Expand All @@ -56,6 +63,11 @@ def defaults():

config_doc = {
"library_path": "location of persisted project objects",
"auto_rescan": (
"maximum age (seconds) of a project loaded from the library before it "
"is automatically rescanned and re-saved. Set to 0 to disable "
"automatic rescanning. Default is one week."
),
"scan_types": "files extensions automatically read for scanning",
"scan_max_files": "don't scan files if more than this number in the project",
"scan_max_size": "don't scan files bigger than this (in bytes)",
Expand All @@ -68,6 +80,34 @@ def defaults():
"ordered list of preferred installer names for install_tool(), "
"e.g. ['uv', 'conda', 'pip']. Empty list uses the platform default."
),
"data_min_fraction": (
"fraction (0-1) of a project's total bytes that must be data files "
"before a code/other project is also reported as a DataProject. Data "
"below this fraction is only scanned if the project matches no other "
"type, or individual files exceed data_min_file_size."
),
"data_min_file_size": (
"a single data file at or above this size (bytes) is considered "
"significant enough to scan even in an otherwise code project."
),
"data_min_total_size": (
"minimum total size (bytes) of candidate data before a directory that "
"also matches another project type is additionally reported as a "
"DataProject (used together with data_min_fraction)."
),
"data_min_play_size": (
"floor (bytes) below which even a directory that matches no other "
"project type is dismissed as toy/play data and not reported as a "
"DataProject."
),
"data_consolidate_min_group": (
"minimum number of numbered/related files (e.g. 001.csv, 002.csv) that "
"are consolidated into a single dataset."
),
"data_inspect_max_datasets": (
"do not run intake inspection if more than this many distinct datasets "
"are found in a directory (avoids huge scans)."
),
"excludes": (
"directory names to skip when walking a project tree for child projects "
"and file statistics. Directories whose names start with '.' or '_' are "
Expand Down
14 changes: 11 additions & 3 deletions src/projspec/content/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@
PipelineStage,
ServiceDependency,
)
from projspec.content.data import TabularData, IntakeSource
from projspec.content.data import (
Dataset,
FrictionlessData,
IntakeSource,
TabularData,
)
from projspec.content.env_var import EnvironmentVariables
from projspec.content.environment import Environment, Stack, Precision
from projspec.content.executable import Command
from projspec.content.metadata import DescriptiveMetadata, License
from projspec.content.metadata import Citation, DescriptiveMetadata, License
from projspec.content.package import PythonPackage
from projspec.content.vcs import VCSInfo

Expand All @@ -22,10 +27,13 @@
"GithubAction",
"PipelineStage",
"ServiceDependency",
"TabularData",
"Dataset",
"FrictionlessData",
"IntakeSource",
"TabularData",
"EnvironmentVariables",
"Command",
"Citation",
"License",
"DescriptiveMetadata",
"PythonPackage",
Expand Down
Loading
Loading