編集距離k以内の塩基配列("ATGC"のみ許可)を深さ優先探索で列挙するRustコードを書いたので備忘録として残しておきます。もうちょっとコードをキレイにしたかったのとテストコードは宿題にします。
use bio::alphabets; #[derive(Clone, Copy)] enum Operations { Match, Subst(u8), Del, Ins(u8), } struct NeighborSeq { origin_seq: Vec<u8>, stack: Vec<StateNS>, curr: Vec<Operations>, alphabet: alphabets::Alphabet, allow_outside_insersion: bool, } struct StateNS { is_preorder: bool, // preorder(true) or postorder(false) is_root: bool, // length n: usize, // n used character k: usize, // rest of distance ops: Operations, } impl NeighborSeq { fn new(origin_seq: &[u8], distance: usize, allow_outside_insersion: bool) -> Self { let stack = vec![StateNS{ is_preorder: true, is_root: true, n: origin_seq.len(), k: distance, ops: Operations::Del, // ops is dummy }]; NeighborSeq { origin_seq: origin_seq.to_vec(), stack: stack, curr: Vec::new(), alphabet: alphabets::Alphabet::new(b"ATGC"), allow_outside_insersion: allow_outside_insersion, } } fn push_state(&mut self, n: usize, k: usize, ops: Operations) { let mut push = |nn, kk, ops| { self.stack.push(StateNS { is_preorder: false, is_root: false, n: nn, k: kk, ops: ops }); self.stack.push(StateNS { is_preorder: true, is_root: false, n: nn, k: kk, ops: ops }); }; match ops { Operations::Match => { if n > 0 { push(n-1, k, ops) } }, Operations::Subst(c) => { let l = self.origin_seq.len(); let seq = &self.origin_seq; if n > 0 && k > 0 && c != seq[l - n] { push(n-1, k-1, ops) } } Operations::Del => { if n > 0 && k > 0 { push(n-1, k-1, ops) } }, Operations::Ins(_) => { if k > 0 { push(n, k-1, ops) } }, }; } fn apply_ops(&self) -> Vec<u8> { let mut seq = Vec::new(); let mut i = 0; for &ops in self.curr.iter() { match ops { Operations::Match => { seq.push(self.origin_seq[i]); i += 1; }, Operations::Subst(c) => { seq.push(c); i += 1; }, Operations::Del => { i += 1; }, Operations::Ins(c) => { seq.push(c); } } } seq } } impl Iterator for NeighborSeq { type Item = Vec<u8>; fn next(&mut self) -> Option<Self::Item> { while !self.stack.is_empty() { let state = self.stack.pop().unwrap(); if !state.is_root { if state.is_preorder { self.curr.push(state.ops); } else { self.curr.pop(); continue; } } // match self.push_state(state.n, state.k, Operations::Match); // del self.push_state(state.n, state.k, Operations::Del); let symbols = self.alphabet.symbols.clone(); // subst for c in symbols.iter() { let c = c as u8; self.push_state(state.n, state.k, Operations::Subst(c)); } // ins let is_outside = state.is_root || state.n == 0; if self.allow_outside_insersion || !is_outside { for c in symbols.iter() { let c = c as u8; self.push_state(state.n, state.k, Operations::Ins(c)); } } if state.n == 0 { break; } } if self.stack.is_empty() { None } else { Some(self.apply_ops()) } } } fn main() { for (i, x) in NeighborSeq::new(b"AA", 2, true).enumerate() { println!("{}: {}", i, std::str::from_utf8(&x).unwrap()); } println!("---"); for (i, x) in NeighborSeq::new(b"AA", 2, false).enumerate() { println!("{}: {}", i, std::str::from_utf8(&x).unwrap()); } }