summary refs log tree commit diff
path: root/ripple/fossil
diff options
context:
space:
mode:
authoredef <edef@unfathomable.blue>2021-08-14 21:28:14 +0000
committeredef <edef@unfathomable.blue>2021-08-14 21:28:14 +0000
commitdb7c54f92f386a94db8af7a12626d2657b4dd640 (patch)
tree4baba57bac54c68823a834c0f8aa97b24cfec7a2 /ripple/fossil
parentdcae0f9c8a94f05bf55cf9b6fbc773502ab5784f (diff)
ripple/fossil: a basic content-addressable store
Fossil stores content-addressed blobs of file contents and
Protobuf-encoded directory listings, backed by Sled.

Change-Id: I8b49de6342218ca00755cec980b1d0cfb18878a7
Diffstat (limited to 'ripple/fossil')
-rw-r--r--ripple/fossil/.gitignore4
-rw-r--r--ripple/fossil/Cargo.toml17
-rw-r--r--ripple/fossil/build.rs9
-rw-r--r--ripple/fossil/src/bin/add.rs31
-rw-r--r--ripple/fossil/src/bin/extract.rs57
-rw-r--r--ripple/fossil/src/lib.rs206
-rw-r--r--ripple/fossil/src/store.proto28
7 files changed, 352 insertions, 0 deletions
diff --git a/ripple/fossil/.gitignore b/ripple/fossil/.gitignore
new file mode 100644
index 0000000..be75022
--- /dev/null
+++ b/ripple/fossil/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+# SPDX-License-Identifier: OSL-3.0
+
+/target
diff --git a/ripple/fossil/Cargo.toml b/ripple/fossil/Cargo.toml
new file mode 100644
index 0000000..a88a5f8
--- /dev/null
+++ b/ripple/fossil/Cargo.toml
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+# SPDX-License-Identifier: OSL-3.0
+
+[package]
+name = "fossil"
+version = "0.1.0"
+edition = "2018"
+
+[dependencies]
+prost = "0.8.0"
+bytes = "1.0.1"
+blake3 = { version = "0.3.8", features = ["rayon"] }
+sled = "0.34.6"
+byteorder = "1.4.3"
+
+[build-dependencies]
+prost-build = "0.8.0"
diff --git a/ripple/fossil/build.rs b/ripple/fossil/build.rs
new file mode 100644
index 0000000..412c2d2
--- /dev/null
+++ b/ripple/fossil/build.rs
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+// SPDX-License-Identifier: OSL-3.0
+
+use std::io::Result;
+
+fn main() -> Result<()> {
+	prost_build::compile_protos(&["src/store.proto"], &["src/"])?;
+	Ok(())
+}
diff --git a/ripple/fossil/src/bin/add.rs b/ripple/fossil/src/bin/add.rs
new file mode 100644
index 0000000..114f893
--- /dev/null
+++ b/ripple/fossil/src/bin/add.rs
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+// SPDX-License-Identifier: OSL-3.0
+
+use {
+	fossil::Directory,
+	prost::Message,
+	std::{
+		env,
+		io::{self, Write},
+		path::Path,
+	},
+};
+
+fn main() {
+	let store = fossil::Store::open("fossil.db").unwrap();
+	let mut root = Directory::new();
+
+	for name in env::args().skip(1) {
+		let path = Path::new(&name);
+		let name = path
+			.file_name()
+			.and_then(|s| s.to_str())
+			.expect("invalid path")
+			.to_owned();
+
+		root.children.insert(name, store.add_path(path));
+	}
+
+	let mut stdout = io::stdout();
+	stdout.write_all(&root.into_pb().encode_to_vec()).unwrap();
+}
diff --git a/ripple/fossil/src/bin/extract.rs b/ripple/fossil/src/bin/extract.rs
new file mode 100644
index 0000000..f83ce0e
--- /dev/null
+++ b/ripple/fossil/src/bin/extract.rs
@@ -0,0 +1,57 @@
+// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+// SPDX-License-Identifier: OSL-3.0
+
+use {
+	fossil::{store, Directory},
+	prost::Message,
+	std::{
+		fs,
+		io::{self, Read, Write},
+		os::unix::{fs::symlink, prelude::OpenOptionsExt},
+		path::Path,
+	},
+};
+
+fn main() {
+	let store = fossil::Store::open("fossil.db").unwrap();
+	let root = {
+		let mut stdin = io::stdin();
+
+		let mut bytes = Vec::new();
+		stdin.read_to_end(&mut bytes).unwrap();
+
+		let pb = store::Directory::decode(&*bytes).unwrap();
+		Directory::from_pb(pb)
+	};
+
+	let root_path = Path::new(".");
+	extract(&store, root_path, &root);
+}
+
+fn extract(store: &fossil::Store, path: &Path, dir: &Directory) {
+	for (name, node) in &dir.children {
+		let path = path.join(name);
+		match node.clone() {
+			fossil::Node::Directory { r#ref } => {
+				let blob = store.read_blob(r#ref);
+				let pb = store::Directory::decode(&*blob).unwrap();
+				fs::create_dir(&path).unwrap();
+				extract(store, &path, &Directory::from_pb(pb));
+			}
+			fossil::Node::File { r#ref, executable } => {
+				let mode = if executable { 0o755 } else { 0o644 };
+				let mut f = fs::OpenOptions::new()
+					.write(true)
+					.create_new(true)
+					.mode(mode)
+					.open(path)
+					.unwrap();
+				let blob = store.read_blob(r#ref);
+				f.write_all(&blob).unwrap();
+			}
+			fossil::Node::Link { target } => {
+				symlink(target, path).unwrap();
+			}
+		}
+	}
+}
diff --git a/ripple/fossil/src/lib.rs b/ripple/fossil/src/lib.rs
new file mode 100644
index 0000000..6fb5269
--- /dev/null
+++ b/ripple/fossil/src/lib.rs
@@ -0,0 +1,206 @@
+// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+// SPDX-License-Identifier: OSL-3.0
+
+use {
+	byteorder::{BigEndian, ByteOrder},
+	prost::Message,
+	std::{collections::BTreeMap, fs, io, os::unix::fs::PermissionsExt, path::Path},
+};
+
+pub mod store {
+	include!(concat!(env!("OUT_DIR"), "/fossil.store.rs"));
+}
+
+const DIGEST_BYTES: usize = blake3::OUT_LEN;
+const OFFSET_BYTES: usize = 4;
+
+pub struct Store {
+	db: sled::Db,
+}
+
+impl Store {
+	pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Store> {
+		let db = sled::open(path)?;
+		Ok(Store { db })
+	}
+
+	pub fn add_path<P: AsRef<Path>>(&self, path: P) -> Node {
+		let path = path.as_ref();
+		let meta = fs::symlink_metadata(path).unwrap();
+
+		match meta.file_type() {
+			ty if ty.is_dir() => {
+				let mut d = Directory::new();
+
+				for entry in path.read_dir().unwrap() {
+					let entry = entry.unwrap();
+					let name = entry.file_name().into_string().unwrap();
+					d.children.insert(name, self.add_path(entry.path()));
+				}
+
+				let blob = d.into_pb().encode_to_vec();
+
+				Node::Directory {
+					r#ref: self.write_blob(&blob),
+				}
+			}
+			ty if ty.is_file() => {
+				let executable = (meta.permissions().mode() & 0o100) != 0;
+
+				let blob = fs::read(path).unwrap();
+				Node::File {
+					executable,
+					r#ref: self.write_blob(&blob),
+				}
+			}
+			ty if ty.is_symlink() => {
+				let target = path
+					.read_link()
+					.unwrap()
+					.to_str()
+					.expect("symlink target is invalid UTF-8")
+					.to_owned();
+
+				Node::Link { target }
+			}
+			_ => panic!("not a symlink or a regular file"),
+		}
+	}
+
+	fn write_blob(&self, data: &[u8]) -> Digest {
+		let digest = {
+			let mut h = blake3::Hasher::new();
+			h.update_with_join::<blake3::join::RayonJoin>(&data);
+			*h.finalize().as_bytes()
+		};
+
+		// TODO(edef): maybe don't use the default tree?
+		// we should probably have a "blob" tree,
+		// and reserve the default tree for DB metadata
+
+		self.db
+			.transaction::<_, _, sled::Error>(|db| {
+				for (n, chunk) in data.chunks(4096).enumerate() {
+					let mut key = [0u8; DIGEST_BYTES + OFFSET_BYTES];
+					key[..DIGEST_BYTES].copy_from_slice(&digest);
+					BigEndian::write_u32(&mut key[DIGEST_BYTES..], n as u32);
+					db.insert(&key[..], chunk)?;
+				}
+				Ok(())
+			})
+			.unwrap();
+
+		digest.into()
+	}
+
+	pub fn read_blob(&self, r#ref: Digest) -> Vec<u8> {
+		let mut buffer = Vec::new();
+		let mut h = blake3::Hasher::new();
+		for element in self.db.scan_prefix(r#ref.as_bytes()) {
+			let (_, chunk) = element.unwrap();
+			h.update(&chunk);
+			buffer.extend_from_slice(&chunk);
+		}
+
+		if buffer.len() == 0 {
+			panic!("blob not found");
+		}
+
+		if h.finalize() != r#ref {
+			panic!("hash mismatch");
+		}
+
+		buffer
+	}
+}
+
+pub type Digest = blake3::Hash;
+
+pub struct Directory {
+	pub children: BTreeMap<String, Node>,
+}
+
+#[derive(Clone)]
+pub enum Node {
+	Directory { r#ref: Digest },
+	File { r#ref: Digest, executable: bool },
+	Link { target: String },
+}
+
+impl Directory {
+	pub fn new() -> Directory {
+		Directory {
+			children: BTreeMap::new(),
+		}
+	}
+
+	pub fn into_pb(self) -> store::Directory {
+		let mut d = store::Directory::default();
+
+		for (name, node) in self.children.into_iter() {
+			match node {
+				Node::Directory { r#ref } => d.directories.push(store::DirectoryNode {
+					name,
+					r#ref: r#ref.as_bytes().to_vec(),
+				}),
+				Node::File { r#ref, executable } => d.files.push(store::FileNode {
+					name,
+					r#ref: r#ref.as_bytes().to_vec(),
+					executable,
+				}),
+				Node::Link { target } => d.links.push(store::LinkNode { name, target }),
+			}
+		}
+
+		d
+	}
+
+	pub fn from_pb(pb: store::Directory) -> Directory {
+		let mut children = BTreeMap::new();
+
+		for child in pb.directories {
+			children.insert(
+				child.name,
+				Node::Directory {
+					r#ref: digest_from_bytes(&child.r#ref),
+				},
+			);
+		}
+
+		for child in pb.files {
+			children.insert(
+				child.name,
+				Node::File {
+					r#ref: digest_from_bytes(&child.r#ref),
+					executable: child.executable,
+				},
+			);
+		}
+
+		for child in pb.links {
+			children.insert(
+				child.name,
+				Node::Link {
+					target: child.target,
+				},
+			);
+		}
+
+		Directory { children }
+	}
+}
+
+#[track_caller]
+fn digest_from_bytes(bytes: &[u8]) -> Digest {
+	if bytes.len() != DIGEST_BYTES {
+		panic!(
+			"digest is {} bytes, expecting {} bytes",
+			bytes.len(),
+			DIGEST_BYTES
+		);
+	}
+
+	let mut buffer = [0; DIGEST_BYTES];
+	buffer.copy_from_slice(bytes);
+	buffer.into()
+}
diff --git a/ripple/fossil/src/store.proto b/ripple/fossil/src/store.proto
new file mode 100644
index 0000000..58832f0
--- /dev/null
+++ b/ripple/fossil/src/store.proto
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
+// SPDX-License-Identifier: OSL-3.0
+
+syntax = "proto3";
+
+package fossil.store;
+
+message Directory {
+    repeated DirectoryNode directories = 1;
+    repeated FileNode files = 2;
+    repeated LinkNode links = 3;
+}
+
+message DirectoryNode {
+    string name = 1;
+    bytes ref = 2;
+}
+
+message FileNode {
+    string name = 1;
+    bytes ref = 2;
+    bool executable = 3;
+}
+
+message LinkNode {
+    string name = 1;
+    string target = 2;
+}