Rustで編集距離k以内の塩基配列を全列挙する

編集距離k以内の塩基配列("ATGC"のみ許可)を深さ優先探索で列挙するRustコードを書いたので備忘録として残しておきます。もうちょっとコードをキレイにしたかったのとテストコードは宿題にします。

use bio::alphabets;

#[derive(Clone, Copy)]
enum Operations {
    Match,
    Subst(u8),
    Del,
    Ins(u8),
}

struct NeighborSeq {
    origin_seq: Vec<u8>,
    stack: Vec<StateNS>,
    curr: Vec<Operations>,
    alphabet: alphabets::Alphabet,
    allow_outside_insersion: bool,
}

struct StateNS {
    is_preorder: bool,  // preorder(true) or postorder(false)
    is_root: bool, // length
    n: usize, // n used character
    k: usize,  // rest of distance
    ops: Operations,
}

impl NeighborSeq {
    fn new(origin_seq: &[u8], distance: usize, allow_outside_insersion: bool) -> Self {
        let stack = vec![StateNS{
            is_preorder: true,
            is_root: true,
            n: origin_seq.len(),
            k: distance,
            ops: Operations::Del, // ops is dummy
        }];
        NeighborSeq { 
            origin_seq: origin_seq.to_vec(),
            stack: stack,
            curr: Vec::new(),
            alphabet: alphabets::Alphabet::new(b"ATGC"),
            allow_outside_insersion: allow_outside_insersion,
        }
    }

    fn push_state(&mut self, n: usize, k: usize, ops: Operations) {
        let mut push = |nn, kk, ops| {
            self.stack.push(StateNS {
                is_preorder: false, is_root: false, n: nn, k: kk, ops: ops
            });
            self.stack.push(StateNS {
                is_preorder: true, is_root: false, n: nn, k: kk, ops: ops
            });
        };
        match ops {
            Operations::Match => {
                if n > 0 { push(n-1, k, ops) }
            },
            Operations::Subst(c) => {
                let l = self.origin_seq.len();
                let seq = &self.origin_seq;
                if n > 0 && k > 0 && c != seq[l - n] { push(n-1, k-1, ops) }
            }
            Operations::Del => {
                if n > 0 && k > 0 { push(n-1, k-1, ops) }
            },
            Operations::Ins(_) => {
                if k > 0 { push(n, k-1, ops) }
            },
        };
    }

    fn apply_ops(&self) -> Vec<u8> {
        let mut seq = Vec::new();
        let mut i = 0;
        for &ops in self.curr.iter() {
            match ops {
                Operations::Match => {
                    seq.push(self.origin_seq[i]);
                    i += 1;
                },
                Operations::Subst(c) => {
                    seq.push(c);
                    i += 1;
                },
                Operations::Del => {
                    i += 1;
                },
                Operations::Ins(c) => {
                    seq.push(c);
                }
            }
        }
        seq
    }
}

impl Iterator for NeighborSeq {
    type Item = Vec<u8>;

    fn next(&mut self) -> Option<Self::Item> {
        while !self.stack.is_empty() {
            let state = self.stack.pop().unwrap();
            if !state.is_root {
                if state.is_preorder {
                    self.curr.push(state.ops);
                } else {
                    self.curr.pop();
                    continue;
                }
            }
            // match
            self.push_state(state.n, state.k, Operations::Match);
            // del
            self.push_state(state.n, state.k, Operations::Del);

            let symbols = self.alphabet.symbols.clone();

            // subst
            for c in symbols.iter() {
                let c = c as u8;
                self.push_state(state.n, state.k, Operations::Subst(c));
            }
            // ins
            let is_outside = state.is_root || state.n == 0;
            if self.allow_outside_insersion || !is_outside {
                for c in symbols.iter() {
                    let c = c as u8;
                    self.push_state(state.n, state.k, Operations::Ins(c));
                }
            }

            if state.n == 0 {
                break;
            }
        }

        if self.stack.is_empty() {
            None
        } else {
            Some(self.apply_ops())
        }
    }
}

fn main() {
    for (i, x) in NeighborSeq::new(b"AA", 2, true).enumerate() {
        println!("{}: {}", i, std::str::from_utf8(&x).unwrap());
    }
    println!("---");
    for (i, x) in NeighborSeq::new(b"AA", 2, false).enumerate() {
        println!("{}: {}", i, std::str::from_utf8(&x).unwrap());
    }
}