summary refs log tree commit diff
path: root/third_party/cachunker/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/cachunker/src/lib.rs')
-rw-r--r--third_party/cachunker/src/lib.rs81
1 files changed, 81 insertions, 0 deletions
diff --git a/third_party/cachunker/src/lib.rs b/third_party/cachunker/src/lib.rs
new file mode 100644
index 0000000..10c7f00
--- /dev/null
+++ b/third_party/cachunker/src/lib.rs
@@ -0,0 +1,81 @@
+// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+// SPDX-License-Identifier: LGPL-2.1-or-later
+
+use std::mem;
+
+pub struct Chunker<'a> {
+	state: sys::CaChunker,
+	buffer: &'a [u8],
+}
+
+impl<'a> Chunker<'a> {
+	pub fn from(buffer: &'a [u8]) -> Self {
+		Self {
+			state: sys::CaChunker::new(),
+			buffer,
+		}
+	}
+
+	/// SAFETY: `idx` must be an in-bounds index for `self.buffer`
+	unsafe fn cut(&mut self, idx: usize) -> &'a [u8] {
+		let ret;
+		(ret, self.buffer) = (
+			self.buffer.get_unchecked(..idx),
+			self.buffer.get_unchecked(idx..),
+		);
+		ret
+	}
+}
+
+impl<'a> Iterator for Chunker<'a> {
+	type Item = &'a [u8];
+	fn next(&mut self) -> Option<Self::Item> {
+		if self.buffer.is_empty() {
+			return None;
+		}
+
+		Some(match self.state.scan(self.buffer) {
+			Some(n) => unsafe { self.cut(n) },
+			None => mem::take(&mut self.buffer),
+		})
+	}
+}
+
+mod sys {
+	#[repr(C)]
+	#[derive(Clone)]
+	pub struct CaChunker {
+		h: u32,
+
+		window_size: usize,
+		chunk_size: usize,
+
+		chunk_size_min: usize,
+		chunk_size_max: usize,
+		chunk_size_avg: usize,
+
+		discriminator: usize,
+
+		window: [u8; 48],
+	}
+
+	impl CaChunker {
+		pub fn new() -> CaChunker {
+			unsafe { &ca_chunker_init }.clone()
+		}
+
+		pub fn scan(&mut self, data: &[u8]) -> Option<usize> {
+			unsafe {
+				match ca_chunker_scan(self, data.as_ptr(), data.len()) {
+					usize::MAX => None,
+					n => Some(n),
+				}
+			}
+		}
+	}
+
+	extern "C" {
+		static ca_chunker_init: CaChunker;
+		fn ca_chunker_scan(c: *mut CaChunker, p: *const u8, n: usize) -> usize;
+	}
+}