diff --git a/gitoxide-core/src/pack/explode.rs b/gitoxide-core/src/pack/explode.rs index eaa248b73af..ab02627ea57 100644 --- a/gitoxide-core/src/pack/explode.rs +++ b/gitoxide-core/src/pack/explode.rs @@ -105,6 +105,18 @@ impl gix::objs::Write for OutputWriter { } } + fn write_buf_with_known_id( + &self, + kind: object::Kind, + from: &[u8], + id: ObjectId, + ) -> Result { + match self { + OutputWriter::Loose(db) => db.write_buf_with_known_id(kind, from, id), + OutputWriter::Sink(db) => db.write_buf_with_known_id(kind, from, id), + } + } + fn write_stream( &self, kind: object::Kind, @@ -116,6 +128,19 @@ impl gix::objs::Write for OutputWriter { OutputWriter::Sink(db) => db.write_stream(kind, size, from), } } + + fn write_stream_with_known_id( + &self, + kind: object::Kind, + size: u64, + from: &mut dyn Read, + id: ObjectId, + ) -> Result { + match self { + OutputWriter::Loose(db) => db.write_stream_with_known_id(kind, size, from, id), + OutputWriter::Sink(db) => db.write_stream_with_known_id(kind, size, from, id), + } + } } impl OutputWriter { diff --git a/gix-object/src/traits/_impls.rs b/gix-object/src/traits/_impls.rs index 385e62363ea..e887b4e8bd1 100644 --- a/gix-object/src/traits/_impls.rs +++ b/gix-object/src/traits/_impls.rs @@ -16,9 +16,28 @@ where (*self).write_buf(object, from) } + fn write_buf_with_known_id( + &self, + object: Kind, + from: &[u8], + id: ObjectId, + ) -> Result { + (*self).write_buf_with_known_id(object, from, id) + } + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { (*self).write_stream(kind, size, from) } + + fn write_stream_with_known_id( + &self, + kind: Kind, + size: u64, + from: &mut dyn Read, + id: ObjectId, + ) -> Result { + (*self).write_stream_with_known_id(kind, size, from, id) + } } impl crate::Write for Arc @@ -33,9 +52,28 @@ where self.deref().write_buf(object, from) } + fn write_buf_with_known_id( + &self, + object: Kind, + from: &[u8], + id: ObjectId, + ) -> Result { + self.deref().write_buf_with_known_id(object, from, id) + } + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { self.deref().write_stream(kind, size, from) } + + fn write_stream_with_known_id( + &self, + kind: Kind, + size: u64, + from: &mut dyn Read, + id: ObjectId, + ) -> Result { + self.deref().write_stream_with_known_id(kind, size, from, id) + } } impl crate::Write for Rc @@ -50,9 +88,28 @@ where self.deref().write_buf(object, from) } + fn write_buf_with_known_id( + &self, + object: Kind, + from: &[u8], + id: ObjectId, + ) -> Result { + self.deref().write_buf_with_known_id(object, from, id) + } + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { self.deref().write_stream(kind, size, from) } + + fn write_stream_with_known_id( + &self, + kind: Kind, + size: u64, + from: &mut dyn Read, + id: ObjectId, + ) -> Result { + self.deref().write_stream_with_known_id(kind, size, from, id) + } } impl WriteTo for &T diff --git a/gix-object/src/traits/mod.rs b/gix-object/src/traits/mod.rs index f0078fd9345..66aa0bc44fa 100644 --- a/gix-object/src/traits/mod.rs +++ b/gix-object/src/traits/mod.rs @@ -15,6 +15,16 @@ pub trait Write { fn write_buf(&self, object: crate::Kind, mut from: &[u8]) -> Result { self.write_stream(object, from.len() as u64, &mut from) } + /// As [`write_buf`](Write::write_buf), but the object `id` has already been computed by the caller. + /// + /// Implementations may trust the given `id` and avoid computing it again. Callers must make sure `id` matches + /// the provided `object` and `from` bytes. + fn write_buf_with_known_id( + &self, + object: crate::Kind, + from: &[u8], + id: gix_hash::ObjectId, + ) -> Result; /// As [`write`](Write::write), but takes an input stream. /// This is commonly used for writing blobs directly without reading them to memory first. fn write_stream( @@ -23,6 +33,17 @@ pub trait Write { size: u64, from: &mut dyn io::Read, ) -> Result; + /// As [`write_stream`](Write::write_stream), but the object `id` has already been computed by the caller. + /// + /// Implementations may trust the given `id` and avoid computing it again. Callers must make sure `id` matches + /// the provided `kind`, `size` and stream contents. + fn write_stream_with_known_id( + &self, + kind: crate::Kind, + size: u64, + from: &mut dyn io::Read, + id: gix_hash::ObjectId, + ) -> Result; } /// Writing of objects to a `Write` implementation diff --git a/gix-odb/src/cache.rs b/gix-odb/src/cache.rs index c116c0e6e2f..e85127c5e77 100644 --- a/gix-odb/src/cache.rs +++ b/gix-odb/src/cache.rs @@ -153,6 +153,25 @@ mod impls { ) -> Result { self.inner.write_stream(kind, size, from) } + + fn write_buf_with_known_id( + &self, + kind: Kind, + from: &[u8], + id: ObjectId, + ) -> Result { + self.inner.write_buf_with_known_id(kind, from, id) + } + + fn write_stream_with_known_id( + &self, + kind: Kind, + size: u64, + from: &mut dyn Read, + id: ObjectId, + ) -> Result { + self.inner.write_stream_with_known_id(kind, size, from, id) + } } impl gix_object::Find for Cache diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs index 5412f8b97b1..cff4b3d4ccb 100644 --- a/gix-odb/src/memory.rs +++ b/gix-odb/src/memory.rs @@ -227,6 +227,38 @@ where map.borrow_mut().insert(id, (kind, buf)); Ok(id) } + + fn write_buf_with_known_id( + &self, + kind: gix_object::Kind, + from: &[u8], + id: gix_hash::ObjectId, + ) -> Result { + let Some(map) = self.memory.as_ref() else { + return self.inner.write_buf_with_known_id(kind, from, id); + }; + + map.borrow_mut().insert(id, (kind, from.to_owned())); + Ok(id) + } + + fn write_stream_with_known_id( + &self, + kind: gix_object::Kind, + size: u64, + from: &mut dyn std::io::Read, + id: gix_hash::ObjectId, + ) -> Result { + let Some(map) = self.memory.as_ref() else { + return self.inner.write_stream_with_known_id(kind, size, from, id); + }; + + let mut buf = Vec::new(); + from.read_to_end(&mut buf)?; + + map.borrow_mut().insert(id, (kind, buf)); + Ok(id) + } } impl Deref for Proxy { diff --git a/gix-odb/src/sink.rs b/gix-odb/src/sink.rs index c06df83631c..ae22efa16bc 100644 --- a/gix-odb/src/sink.rs +++ b/gix-odb/src/sink.rs @@ -20,6 +20,15 @@ impl Sink { } impl gix_object::Write for Sink { + fn write_buf_with_known_id( + &self, + kind: gix_object::Kind, + mut from: &[u8], + id: gix_hash::ObjectId, + ) -> Result { + self.write_stream_with_known_id(kind, from.len() as u64, &mut from, id) + } + fn write_stream( &self, kind: gix_object::Kind, @@ -55,4 +64,38 @@ impl gix_object::Write for Sink { Ok(hasher.try_finalize()?) } + + fn write_stream_with_known_id( + &self, + kind: gix_object::Kind, + mut size: u64, + from: &mut dyn io::Read, + id: gix_hash::ObjectId, + ) -> Result { + let mut buf = [0u8; u16::MAX as usize]; + let header = gix_object::encode::loose_header(kind, size); + + let possibly_compress = |buf: &[u8]| -> io::Result<()> { + if let Some(compressor) = self.compressor.as_ref() { + compressor.try_borrow_mut().expect("no recursion").write_all(buf)?; + } + Ok(()) + }; + + possibly_compress(&header).map_err(Box::new)?; + + while size != 0 { + let bytes = (size as usize).min(buf.len()); + from.read_exact(&mut buf[..bytes]).map_err(Box::new)?; + possibly_compress(&buf[..bytes]).map_err(Box::new)?; + size -= bytes as u64; + } + if let Some(compressor) = self.compressor.as_ref() { + let mut c = compressor.borrow_mut(); + c.flush().map_err(Box::new)?; + c.reset(); + } + + Ok(id) + } } diff --git a/gix-odb/src/store_impls/dynamic/write.rs b/gix-odb/src/store_impls/dynamic/write.rs index d0b8735121f..c7c82ab22f7 100644 --- a/gix-odb/src/store_impls/dynamic/write.rs +++ b/gix-odb/src/store_impls/dynamic/write.rs @@ -43,4 +43,47 @@ where } }) } + + fn write_buf_with_known_id( + &self, + kind: Kind, + from: &[u8], + id: ObjectId, + ) -> Result { + let mut snapshot = self.snapshot.borrow_mut(); + Ok(match snapshot.loose_dbs.first() { + Some(ldb) => ldb.write_buf_with_known_id(kind, from, id)?, + None => { + let new_snapshot = self + .store + .load_one_index(self.refresh, snapshot.marker) + .map_err(Box::new)? + .expect("there is always at least one ODB, and this code runs only once for initialization"); + *snapshot = new_snapshot; + snapshot.loose_dbs[0].write_buf_with_known_id(kind, from, id)? + } + }) + } + + fn write_stream_with_known_id( + &self, + kind: Kind, + size: u64, + from: &mut dyn Read, + id: ObjectId, + ) -> Result { + let mut snapshot = self.snapshot.borrow_mut(); + Ok(match snapshot.loose_dbs.first() { + Some(ldb) => ldb.write_stream_with_known_id(kind, size, from, id)?, + None => { + let new_snapshot = self + .store + .load_one_index(self.refresh, snapshot.marker) + .map_err(Box::new)? + .expect("there is always at least one ODB, and this code runs only once for initialization"); + *snapshot = new_snapshot; + snapshot.loose_dbs[0].write_stream_with_known_id(kind, size, from, id)? + } + }) + } } diff --git a/gix-odb/src/store_impls/loose/write.rs b/gix-odb/src/store_impls/loose/write.rs index d94c16556bd..cfc64d024fe 100644 --- a/gix-odb/src/store_impls/loose/write.rs +++ b/gix-odb/src/store_impls/loose/write.rs @@ -64,6 +64,29 @@ impl gix_object::Write for Store { Ok(self.finalize_object(to)?) } + fn write_buf_with_known_id( + &self, + kind: gix_object::Kind, + from: &[u8], + id: gix_hash::ObjectId, + ) -> Result { + let mut to = self.compressed_tempfile().map_err(Box::new)?; + to.write_all(&gix_object::encode::loose_header(kind, from.len() as u64)) + .map_err(|err| Error::Io { + source: err.into(), + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + to.write_all(from).map_err(|err| Error::Io { + source: err.into(), + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + Ok(self.finalize_object_at(id, to)?) + } + /// Write the given stream in `from` to disk with at least one syscall. /// /// This will cost at least 4 IO operations. @@ -91,6 +114,32 @@ impl gix_object::Write for Store { to.flush().map_err(Box::new)?; Ok(self.finalize_object(to)?) } + + fn write_stream_with_known_id( + &self, + kind: gix_object::Kind, + size: u64, + mut from: &mut dyn io::Read, + id: gix_hash::ObjectId, + ) -> Result { + let mut to = self.compressed_tempfile().map_err(Box::new)?; + to.write_all(&gix_object::encode::loose_header(kind, size)) + .map_err(|err| Error::Io { + source: err.into(), + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + io::copy(&mut from, &mut to) + .map_err(|err| Error::Io { + source: err.into(), + message: "stream all data into tempfile in", + path: self.path.to_owned(), + }) + .map_err(Box::new)?; + to.flush().map_err(Box::new)?; + Ok(self.finalize_object_at(id, to)?) + } } type CompressedTempfile = deflate::Write; @@ -106,7 +155,13 @@ impl Store { } impl Store { + /// A compressed tempfile, with auto-hashing. fn dest(&self) -> Result, Error> { + Ok(gix_hash::io::Write::new(self.compressed_tempfile()?, self.object_hash)) + } + + /// A compressed tempfile, without hasher. + fn compressed_tempfile(&self) -> Result { #[cfg_attr(not(unix), allow(unused_mut))] let mut builder = tempfile::Builder::new(); #[cfg(unix)] @@ -115,14 +170,13 @@ impl Store { let perms = std::fs::Permissions::from_mode(0o444); builder.permissions(perms); } - Ok(gix_hash::io::Write::new( - deflate::Write::new(builder.tempfile_in(&self.path).map_err(|err| Error::Io { + Ok(deflate::Write::new(builder.tempfile_in(&self.path).map_err(|err| { + Error::Io { source: err.into(), message: "create named temp file in", path: self.path.to_owned(), - })?), - self.object_hash, - )) + } + })?)) } fn finalize_object( @@ -134,6 +188,14 @@ impl Store { message: "hash tempfile in", path: self.path.to_owned(), })?; + self.finalize_object_at(id, file) + } + + fn finalize_object_at( + &self, + id: gix_hash::ObjectId, + file: CompressedTempfile, + ) -> Result { let object_path = loose::hash_path(&id, self.path.clone()); let object_dir = object_path .parent() diff --git a/gix-odb/tests/odb/memory.rs b/gix-odb/tests/odb/memory.rs index 8abf3f33f03..d0d97d8cace 100644 --- a/gix-odb/tests/odb/memory.rs +++ b/gix-odb/tests/odb/memory.rs @@ -101,6 +101,66 @@ fn with_memory() -> crate::Result { Ok(()) } +#[test] +fn with_memory_trusts_known_id() -> crate::Result { + let odb = db()?; + let kind = gix_object::Kind::Blob; + let bytes = b"content"; + let id = gix_hash::Kind::Sha1.null(); + + assert_eq!(odb.write_buf_with_known_id(kind, bytes, id)?, id); + assert_eq!(odb.num_objects_in_memory(), 1); + + let mut buf = Vec::new(); + let object = odb.find(&id, &mut buf)?; + assert_eq!(object.kind, kind); + assert_eq!(object.data, bytes); + + let stream_bytes = b"streamed content"; + let stream_id = gix_object::compute_hash(gix_hash::Kind::Sha1, kind, stream_bytes)?; + assert_eq!( + odb.write_stream_with_known_id(kind, stream_bytes.len() as u64, &mut stream_bytes.as_slice(), stream_id)?, + stream_id + ); + assert_eq!(odb.num_objects_in_memory(), 2); + + let object = odb.find(&stream_id, &mut buf)?; + assert_eq!(object.kind, kind); + assert_eq!(object.data, stream_bytes); + + Ok(()) +} + +#[test] +fn without_memory_forwards_known_id_writes() -> crate::Result { + let (mut odb, _tmp) = db_rw()?; + odb.take_object_memory().expect("it starts out with memory set"); + + let kind = gix_object::Kind::Blob; + let bytes = b"content"; + let id = gix_hash::Kind::Sha1.null(); + + assert_eq!(odb.write_buf_with_known_id(kind, bytes, id)?, id); + + let mut buf = Vec::new(); + let object = odb.find(&id, &mut buf)?; + assert_eq!(object.kind, kind); + assert_eq!(object.data, bytes); + + let stream_bytes = b"streamed content"; + let stream_id = gix_object::compute_hash(gix_hash::Kind::Sha1, kind, stream_bytes)?; + assert_eq!( + odb.write_stream_with_known_id(kind, stream_bytes.len() as u64, &mut stream_bytes.as_slice(), stream_id)?, + stream_id + ); + + let object = odb.find(&stream_id, &mut buf)?; + assert_eq!(object.kind, kind); + assert_eq!(object.data, stream_bytes); + + Ok(()) +} + fn db() -> crate::Result> { let odb = gix_odb::at(crate::scripted_fixture_read_only("repo_with_loose_objects.sh")?.join(".git/objects"))?; Ok(gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1)) diff --git a/gix-odb/tests/odb/store/loose.rs b/gix-odb/tests/odb/store/loose.rs index e6b19f46bc1..79cfc19a51e 100644 --- a/gix-odb/tests/odb/store/loose.rs +++ b/gix-odb/tests/odb/store/loose.rs @@ -73,6 +73,19 @@ mod write { db.try_find(&oid, &mut buf2)?.expect("id present").decode()?, obj.decode()? ); + let actual = db.write_buf_with_known_id(obj.kind, obj.data, oid)?; + assert_eq!(actual, oid); + assert_eq!( + db.try_find(&oid, &mut buf2)?.expect("id present").decode()?, + obj.decode()? + ); + let mut from = obj.data; + let actual = db.write_stream_with_known_id(obj.kind, obj.data.len() as u64, &mut from, oid)?; + assert_eq!(actual, oid); + assert_eq!( + db.try_find(&oid, &mut buf2)?.expect("id present").decode()?, + obj.decode()? + ); } Ok(()) } diff --git a/gix/src/repository/impls.rs b/gix/src/repository/impls.rs index 57a01bd58c1..2b55fb8a0de 100644 --- a/gix/src/repository/impls.rs +++ b/gix/src/repository/impls.rs @@ -111,7 +111,7 @@ impl gix_object::Write for crate::Repository { if self.objects.exists(&oid) { return Ok(oid); } - self.objects.write_buf(object, from) + self.objects.write_buf_with_known_id(object, from, oid) } fn write_stream( @@ -127,6 +127,33 @@ impl gix_object::Write for crate::Repository { } self.write_buf(kind, &buf) } + + fn write_buf_with_known_id( + &self, + object: gix_object::Kind, + from: &[u8], + id: gix_hash::ObjectId, + ) -> Result { + if self.objects.exists(&id) { + return Ok(id); + } + self.objects.write_buf_with_known_id(object, from, id) + } + + fn write_stream_with_known_id( + &self, + kind: gix_object::Kind, + size: u64, + from: &mut dyn std::io::Read, + id: gix_hash::ObjectId, + ) -> Result { + let mut buf = self.empty_reusable_buffer(); + let bytes = std::io::copy(from, buf.deref_mut())?; + if size != bytes { + return Err(format!("Found {bytes} bytes in stream, but had {size} bytes declared").into()); + } + self.write_buf_with_known_id(kind, &buf, id) + } } impl gix_object::FindHeader for crate::Repository { diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index 7b2c0d8b98d..131ca3810bc 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -265,7 +265,7 @@ impl crate::Repository { } self.objects - .write_buf(kind, buf) + .write_buf_with_known_id(kind, buf, oid) .map(|oid| oid.attach(self)) .map_err(Into::into) } @@ -295,7 +295,7 @@ impl crate::Repository { return Ok(oid.attach(self)); } self.objects - .write_buf(gix_object::Kind::Blob, bytes) + .write_buf_with_known_id(gix_object::Kind::Blob, bytes, oid) .map_err(Into::into) .map(|oid| oid.attach(self)) } @@ -322,7 +322,7 @@ impl crate::Repository { } self.objects - .write_buf(gix_object::Kind::Blob, buf) + .write_buf_with_known_id(gix_object::Kind::Blob, buf, oid) .map_err(Into::into) .map(|oid| oid.attach(self)) }