summary refs log tree commit diff
path: root/third_party/cachunker/src/lib.rs
blob: 10c7f00c345ad51b5b466aa38ee0e745a6faab20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
// SPDX-License-Identifier: LGPL-2.1-or-later

use std::mem;

pub struct Chunker<'a> {
	state: sys::CaChunker,
	buffer: &'a [u8],
}

impl<'a> Chunker<'a> {
	pub fn from(buffer: &'a [u8]) -> Self {
		Self {
			state: sys::CaChunker::new(),
			buffer,
		}
	}

	/// SAFETY: `idx` must be an in-bounds index for `self.buffer`
	unsafe fn cut(&mut self, idx: usize) -> &'a [u8] {
		let ret;
		(ret, self.buffer) = (
			self.buffer.get_unchecked(..idx),
			self.buffer.get_unchecked(idx..),
		);
		ret
	}
}

impl<'a> Iterator for Chunker<'a> {
	type Item = &'a [u8];
	fn next(&mut self) -> Option<Self::Item> {
		if self.buffer.is_empty() {
			return None;
		}

		Some(match self.state.scan(self.buffer) {
			Some(n) => unsafe { self.cut(n) },
			None => mem::take(&mut self.buffer),
		})
	}
}

mod sys {
	#[repr(C)]
	#[derive(Clone)]
	pub struct CaChunker {
		h: u32,

		window_size: usize,
		chunk_size: usize,

		chunk_size_min: usize,
		chunk_size_max: usize,
		chunk_size_avg: usize,

		discriminator: usize,

		window: [u8; 48],
	}

	impl CaChunker {
		pub fn new() -> CaChunker {
			unsafe { &ca_chunker_init }.clone()
		}

		pub fn scan(&mut self, data: &[u8]) -> Option<usize> {
			unsafe {
				match ca_chunker_scan(self, data.as_ptr(), data.len()) {
					usize::MAX => None,
					n => Some(n),
				}
			}
		}
	}

	extern "C" {
		static ca_chunker_init: CaChunker;
		fn ca_chunker_scan(c: *mut CaChunker, p: *const u8, n: usize) -> usize;
	}
}