subcoin_utxo_snapshot/
lib.rs

1mod compressor;
2mod script;
3mod serialize;
4#[cfg(test)]
5mod tests;
6
7use self::compressor::ScriptCompression;
8use self::serialize::write_compact_size;
9use bitcoin::BlockHash;
10use bitcoin::consensus::encode::Encodable;
11use bitcoin::hashes::Hash;
12use compressor::compress_amount;
13use std::collections::{BTreeMap, HashSet};
14use std::fs::File;
15use std::io::Write;
16use std::path::{Path, PathBuf};
17use subcoin_primitives::runtime::Coin;
18use txoutset::var_int::VarInt;
19
20const SNAPSHOT_MAGIC_BYTES: [u8; 5] = [b'u', b't', b'x', b'o', 0xff];
21
22/// Groups UTXOs by `txid` into a lexicographically ordered `BTreeMap` (same as the order stored by
23/// Bitcoin Core in leveldb).
24///
25/// NOTE: this requires substantial RAM.
26pub fn group_utxos_by_txid(
27    utxos: impl IntoIterator<Item = Utxo>,
28) -> BTreeMap<bitcoin::Txid, Vec<OutputEntry>> {
29    let mut map: BTreeMap<bitcoin::Txid, Vec<OutputEntry>> = BTreeMap::new();
30
31    for utxo in utxos {
32        map.entry(utxo.txid).or_default().push(OutputEntry {
33            vout: utxo.vout,
34            coin: utxo.coin,
35        });
36    }
37
38    map
39}
40
41// Equivalent function in Rust for serializing an OutPoint and Coin
42//
43// https://github.com/bitcoin/bitcoin/blob/6f9db1ebcab4064065ccd787161bf2b87e03cc1f/src/kernel/coinstats.cpp#L51
44pub fn tx_out_ser(outpoint: bitcoin::OutPoint, coin: &Coin) -> bitcoin::io::Result<Vec<u8>> {
45    let mut data = Vec::new();
46
47    // Serialize the OutPoint (txid and vout)
48    outpoint.consensus_encode(&mut data)?;
49
50    // Serialize the coin's height and coinbase flag
51    let height_and_coinbase = (coin.height << 1) | (coin.is_coinbase as u32);
52    height_and_coinbase.consensus_encode(&mut data)?;
53
54    let txout = bitcoin::TxOut {
55        value: bitcoin::Amount::from_sat(coin.amount),
56        script_pubkey: bitcoin::ScriptBuf::from_bytes(coin.script_pubkey.clone()),
57    };
58
59    // Serialize the actual UTXO (value and script)
60    txout.consensus_encode(&mut data)?;
61
62    Ok(data)
63}
64
65/// Represents a UTXO output in the snapshot format.
66///
67/// A combination of the output index (vout) and associated coin data.
68#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
69pub struct OutputEntry {
70    /// The output index within the transaction.
71    pub vout: u32,
72    /// The coin data associated with this output.
73    pub coin: Coin,
74}
75
76/// Represents a single UTXO (Unspent Transaction Output) in Bitcoin.
77#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
78pub struct Utxo {
79    /// The transaction ID that contains this UTXO.
80    pub txid: bitcoin::Txid,
81    /// The output index within the transaction.
82    pub vout: u32,
83    /// The coin data associated with this UTXO (e.g., amount and any relevant metadata).
84    pub coin: Coin,
85}
86
87impl From<(bitcoin::Txid, u32, Coin)> for Utxo {
88    fn from((txid, vout, coin): (bitcoin::Txid, u32, Coin)) -> Self {
89        Self { txid, vout, coin }
90    }
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94struct SnapshotMetadata {
95    version: u16,
96    supported_versions: HashSet<u16>,
97    network_magic: [u8; 4],
98    base_blockhash: [u8; 32],
99    coins_count: u64,
100}
101
102impl SnapshotMetadata {
103    const VERSION: u16 = 2;
104
105    pub fn new(network_magic: [u8; 4], base_blockhash: [u8; 32], coins_count: u64) -> Self {
106        let supported_versions = HashSet::from([Self::VERSION]);
107        Self {
108            version: Self::VERSION,
109            supported_versions,
110            network_magic,
111            base_blockhash,
112            coins_count,
113        }
114    }
115
116    pub fn serialize<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
117        writer.write_all(&SNAPSHOT_MAGIC_BYTES)?;
118        writer.write_all(&self.version.to_le_bytes())?;
119        writer.write_all(&self.network_magic)?;
120        writer.write_all(&self.base_blockhash)?;
121        writer.write_all(&self.coins_count.to_le_bytes())?;
122        Ok(())
123    }
124
125    #[allow(unused)]
126    pub fn deserialize<R: std::io::Read>(
127        reader: &mut R,
128        expected_network_magic: &[u8],
129    ) -> std::io::Result<Self> {
130        use std::io::{Error, ErrorKind};
131
132        let mut magic_bytes = [0; SNAPSHOT_MAGIC_BYTES.len()];
133        reader.read_exact(&mut magic_bytes)?;
134        if magic_bytes != SNAPSHOT_MAGIC_BYTES {
135            return Err(Error::new(
136                ErrorKind::InvalidData,
137                format!(
138                    "Invalid UTXO snapshot magic bytes (expected: {SNAPSHOT_MAGIC_BYTES:?}, got: {magic_bytes:?})"
139                ),
140            ));
141        }
142
143        let mut version_bytes = [0; 2];
144        reader.read_exact(&mut version_bytes)?;
145        let version = u16::from_le_bytes(version_bytes);
146
147        let supported_versions = HashSet::from([Self::VERSION]);
148        if !supported_versions.contains(&version) {
149            return Err(Error::new(
150                ErrorKind::InvalidData,
151                format!("Unsupported snapshot version: {version}"),
152            ));
153        }
154
155        let mut network_magic = [0u8; 4];
156        reader.read_exact(&mut network_magic)?;
157        if network_magic != expected_network_magic {
158            return Err(Error::new(ErrorKind::InvalidData, "Network magic mismatch"));
159        }
160
161        let mut base_blockhash = [0; 32];
162        reader.read_exact(&mut base_blockhash)?;
163
164        let mut coins_count_bytes = [0; 8];
165        reader.read_exact(&mut coins_count_bytes)?;
166        let coins_count = u64::from_le_bytes(coins_count_bytes);
167
168        Ok(Self {
169            version,
170            supported_versions,
171            network_magic,
172            base_blockhash,
173            coins_count,
174        })
175    }
176}
177
178/// Responsible for dumping the UTXO set snapshot compatible with Bitcoin Core.
179///
180/// The format of generated snapshot is compatible with Bitcoin Core 28.0.
181pub struct UtxoSnapshotGenerator {
182    output_filepath: PathBuf,
183    output_file: File,
184    network: bitcoin::Network,
185}
186
187impl UtxoSnapshotGenerator {
188    /// Constructs a new instance of [`UtxoSnapshotGenerator`].
189    pub fn new(output_filepath: PathBuf, output_file: File, network: bitcoin::Network) -> Self {
190        Self {
191            output_filepath,
192            output_file,
193            network,
194        }
195    }
196
197    /// Returns the path of generated snapshot file.
198    pub fn path(&self) -> &Path {
199        &self.output_filepath
200    }
201
202    /// Writes a single entry of UTXO.
203    pub fn write_utxo_entry(
204        &mut self,
205        txid: bitcoin::Txid,
206        vout: u32,
207        coin: Coin,
208    ) -> std::io::Result<()> {
209        let Coin {
210            is_coinbase,
211            amount,
212            height,
213            script_pubkey,
214        } = coin;
215
216        let outpoint = bitcoin::OutPoint { txid, vout };
217
218        let mut data = Vec::new();
219
220        let amount = txoutset::Amount::new(amount);
221
222        let code = txoutset::Code {
223            height,
224            is_coinbase,
225        };
226        let script = txoutset::Script::from_bytes(script_pubkey);
227
228        outpoint.consensus_encode(&mut data)?;
229        code.consensus_encode(&mut data)?;
230        amount.consensus_encode(&mut data)?;
231        script.consensus_encode(&mut data)?;
232
233        let _ = self.output_file.write(data.as_slice())?;
234
235        Ok(())
236    }
237
238    /// Writes the metadata of snapshot.
239    pub fn write_metadata(
240        &mut self,
241        bitcoin_block_hash: BlockHash,
242        coins_count: u64,
243    ) -> std::io::Result<()> {
244        write_snapshot_metadata(
245            &mut self.output_file,
246            self.network,
247            bitcoin_block_hash,
248            coins_count,
249        )
250    }
251
252    /// Write the UTXO snapshot at the specified block to a file.
253    ///
254    /// NOTE: Do not use it in production.
255    pub fn generate_snapshot_in_mem(
256        &mut self,
257        bitcoin_block_hash: BlockHash,
258        utxos_count: u64,
259        utxos: impl IntoIterator<Item = Utxo>,
260    ) -> std::io::Result<()> {
261        generate_snapshot_in_mem_inner(
262            &mut self.output_file,
263            self.network,
264            bitcoin_block_hash,
265            utxos_count,
266            utxos,
267        )
268    }
269
270    /// Writes UTXO entries for a given transaction.
271    pub fn write_coins(
272        &mut self,
273        txid: bitcoin::Txid,
274        coins: Vec<OutputEntry>,
275    ) -> std::io::Result<()> {
276        write_coins(&mut self.output_file, txid, coins)
277    }
278}
279
280fn write_snapshot_metadata<W: std::io::Write>(
281    writer: &mut W,
282    network: bitcoin::Network,
283    bitcoin_block_hash: BlockHash,
284    coins_count: u64,
285) -> std::io::Result<()> {
286    let snapshot_metadata = SnapshotMetadata::new(
287        network.magic().to_bytes(),
288        bitcoin_block_hash.to_byte_array(),
289        coins_count,
290    );
291
292    snapshot_metadata.serialize(writer)?;
293
294    Ok(())
295}
296
297/// Write the UTXO snapshot at the specified block using the given writer.
298///
299/// NOTE: Do not use it in production.
300fn generate_snapshot_in_mem_inner<W: std::io::Write>(
301    writer: &mut W,
302    network: bitcoin::Network,
303    bitcoin_block_hash: BlockHash,
304    utxos_count: u64,
305    utxos: impl IntoIterator<Item = Utxo>,
306) -> std::io::Result<()> {
307    write_snapshot_metadata(writer, network, bitcoin_block_hash, utxos_count)?;
308
309    for (txid, coins) in group_utxos_by_txid(utxos) {
310        write_coins(writer, txid, coins)?;
311    }
312
313    Ok(())
314}
315
316pub fn write_coins<W: std::io::Write>(
317    writer: &mut W,
318    txid: bitcoin::Txid,
319    mut coins: Vec<OutputEntry>,
320) -> std::io::Result<()> {
321    coins.sort_by_key(|output_entry| output_entry.vout);
322
323    let mut data = Vec::new();
324    txid.consensus_encode(&mut data)?;
325    writer.write_all(&data)?;
326
327    write_compact_size(writer, coins.len() as u64)?;
328
329    for OutputEntry { vout, coin } in coins {
330        write_compact_size(writer, vout as u64)?;
331        serialize_coin(writer, coin)?;
332    }
333
334    Ok(())
335}
336
337fn serialize_coin<W: std::io::Write>(writer: &mut W, coin: Coin) -> std::io::Result<()> {
338    let Coin {
339        is_coinbase,
340        amount,
341        height,
342        script_pubkey,
343    } = coin;
344
345    // https://github.com/bitcoin/bitcoin/blob/0903ce8dbc25d3823b03d52f6e6bff74d19e801e/src/coins.h#L62
346    let code = (height << 1) | is_coinbase as u32;
347
348    let mut data = Vec::new();
349    VarInt::new(code as u64).consensus_encode(&mut data)?;
350    VarInt::new(compress_amount(amount)).consensus_encode(&mut data)?;
351    writer.write_all(&data)?;
352
353    ScriptCompression(script_pubkey).serialize(writer)?;
354
355    Ok(())
356}