Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## [0.6.9] - 2026-04-27

### Fixed
- `Manifest.scan` no longer crashes on `Errno::EPERM`/`EACCES` encountered during corpus walk (common on macOS for TCC-protected paths like `~/Library/Accounts`). Unreadable subdirs are pruned with a debug log; scan continues. Replaced `Find.find` with a recursive walker that rescues per-dir; also tolerates `Errno::ELOOP` and `Errno::ENOENT` for files that disappear mid-scan.

> **Version note**: `0.6.8` is reserved for the companion `fix/content-hash-md5-match-apollo-schema` PR (chunker SHA-256 → MD5 hash fix). Both branches target `0.6.7` as their merge base; this PR claims `0.6.9` to avoid intra-batch collision.

## [0.6.7] - 2026-04-15

### Fixed
Expand Down
24 changes: 16 additions & 8 deletions lib/legion/extensions/knowledge/helpers/manifest.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# frozen_string_literal: true

require 'digest'
require 'find'

module Legion
module Extensions
Expand All @@ -12,19 +11,28 @@ module Manifest

def scan(path:, extensions: %w[.md .txt .docx .pdf])
results = []
walk(path, extensions, results)
results
end

Find.find(path) do |entry|
basename = ::File.basename(entry)
Find.prune if basename.start_with?('.')

next unless ::File.file?(entry)
next unless extensions.include?(::File.extname(entry).downcase)
def walk(entry, extensions, results)
basename = ::File.basename(entry)
return if basename.start_with?('.')

if ::File.directory?(entry)
::Dir.children(entry).each { |c| walk(::File.join(entry, c), extensions, results) }
elsif ::File.file?(entry) && extensions.include?(::File.extname(entry).downcase)
results << build_entry(entry)
end
rescue Errno::EPERM, Errno::EACCES, Errno::ELOOP, Errno::ENOENT => e
log.debug("[manifest] skipping unreadable #{entry}: #{e.class}: #{e.message}")
end
private_class_method :walk

results
def log
Legion::Logging
end
private_class_method :log

def diff(current:, previous:)
current_map = current.to_h { |e| [e[:path], e[:sha256]] }
Expand Down
2 changes: 1 addition & 1 deletion lib/legion/extensions/knowledge/version.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
module Legion
module Extensions
module Knowledge
VERSION = '0.6.7'
VERSION = '0.6.9'
end
end
end
113 changes: 113 additions & 0 deletions spec/legion/extensions/knowledge/helpers/manifest_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,119 @@
expect(result.first[:path]).to include('nested.md')
end
end

it 'treats extension filter as case-insensitive' do
Dir.mktmpdir do |dir|
File.write(File.join(dir, 'UPPER.MD'), '# upper')
File.write(File.join(dir, 'mixed.TxT'), 'mixed')
result = manifest.scan(path: dir)
basenames = result.map { |e| File.basename(e[:path]) }
expect(basenames).to contain_exactly('UPPER.MD', 'mixed.TxT')
end
end

it 'skips dot-directories and does not recurse into them' do
Dir.mktmpdir do |dir|
hidden = File.join(dir, '.hidden_dir')
FileUtils.mkdir_p(hidden)
File.write(File.join(hidden, 'inside.md'), 'secret')
File.write(File.join(dir, 'visible.md'), 'visible')
result = manifest.scan(path: dir)
paths = result.map { |e| e[:path] }
expect(paths).not_to include(a_string_matching(%r{/\.hidden_dir/}))
expect(paths.map { |p| File.basename(p) }).to eq(['visible.md'])
end
end

it 'skips unreadable directories and continues scanning siblings' do
Dir.mktmpdir do |tmp|
readable = File.join(tmp, 'readable')
locked = File.join(tmp, 'locked')
FileUtils.mkdir_p(readable)
FileUtils.mkdir_p(locked)
File.write(File.join(readable, 'a.md'), 'hello')
File.write(File.join(locked, 'b.md'), 'nope')

allow(Dir).to receive(:children).and_call_original
allow(Dir).to receive(:children).with(locked).and_raise(Errno::EPERM)

results = manifest.scan(path: tmp)
paths = results.map { |r| r[:path] }
expect(paths).to include(end_with('/readable/a.md'))
expect(paths).not_to include(end_with('/locked/b.md'))
end
end

it 'skips unreadable directories raising Errno::EACCES' do
Dir.mktmpdir do |tmp|
readable = File.join(tmp, 'readable')
locked = File.join(tmp, 'locked')
FileUtils.mkdir_p(readable)
FileUtils.mkdir_p(locked)
File.write(File.join(readable, 'a.md'), 'hello')

allow(Dir).to receive(:children).and_call_original
allow(Dir).to receive(:children).with(locked).and_raise(Errno::EACCES)

results = manifest.scan(path: tmp)
expect(results.map { |r| File.basename(r[:path]) }).to eq(['a.md'])
end
end

it 'skips multiple unreadable subdirs at different depths without failing' do
Dir.mktmpdir do |tmp|
# tmp/
# top.md <- readable
# locked1/ <- EPERM
# ok/
# mid.md <- readable
# locked2/ <- EACCES
File.write(File.join(tmp, 'top.md'), 'top')

locked1 = File.join(tmp, 'locked1')
FileUtils.mkdir_p(locked1)

ok = File.join(tmp, 'ok')
FileUtils.mkdir_p(ok)
File.write(File.join(ok, 'mid.md'), 'mid')

locked2 = File.join(ok, 'locked2')
FileUtils.mkdir_p(locked2)

allow(Dir).to receive(:children).and_call_original
allow(Dir).to receive(:children).with(locked1).and_raise(Errno::EPERM)
allow(Dir).to receive(:children).with(locked2).and_raise(Errno::EACCES)

results = manifest.scan(path: tmp)
basenames = results.map { |r| File.basename(r[:path]) }.sort
expect(basenames).to eq(%w[mid.md top.md])
end
end

it 'skips files that disappear between listing and read (ENOENT)' do
Dir.mktmpdir do |tmp|
good = File.join(tmp, 'good.md')
gone = File.join(tmp, 'gone.md')
File.write(good, 'keep me')
File.write(gone, 'disappear')

allow(File).to receive(:size).and_call_original
allow(File).to receive(:size).with(gone).and_raise(Errno::ENOENT)

results = manifest.scan(path: tmp)
expect(results.map { |r| File.basename(r[:path]) }).to eq(['good.md'])
end
end

it 'does not crash when the scan root itself is unreadable' do
Dir.mktmpdir do |tmp|
allow(File).to receive(:directory?).and_call_original
allow(File).to receive(:directory?).with(tmp).and_raise(Errno::EPERM)

expect { manifest.scan(path: tmp) }.not_to raise_error
expect(manifest.scan(path: tmp)).to eq([])
end
end
end

describe '.diff' do
Expand Down
Loading