Source code for krakenplot

import csv
import re

from pathlib import Path
from typing import Mapping

import ete3
import toytree
from toytree.Toytree import ToyTree

__all__ = ['KrakenSummary']

INDENT_UNIT: int = 2


[docs]class KrakenSummary(object):
    """Visualize and interrogate the summary report from the taxonomic classifier Kraken.

    Args:
        infile: The path to the Kraken summary metric text file.
        ignore_unclassified: Whether to ignore *unclassified* assignments.

    """
    def __init__(self, infile: Path, ignore_unclassified: bool = False) -> None:
        self.infile = Path(infile).expanduser().resolve()
        self.ignore_unclassified = ignore_unclassified

        nodes: Mapping[int, ete3.coretype.tree.TreeNode] = {}
        current_root_index: int = 0
        re_indent = re.compile(r'^\s*')

        tree: ete3.coretype.tree.TreeNode = ete3.Tree()
        root = tree.add_child(name='root')

        nodes[current_root_index] = root

        with self.infile.open() as handle:
            for line in csv.reader(handle, delimiter='\t'):
                fraction, cumulative, count, order, tax_id, taxa_entry = line
                indents = re_indent.match(taxa_entry)

                if not indents:
                    continue
                else:
                    indent_size = len(indents.group())
                taxa_name = re_indent.sub('', taxa_entry)

                if taxa_name == 'root':
                    continue

                if not ignore_unclassified and taxa_name == 'unclassified':
                    tree.add_child(name='unclassified')

                if indent_size >= current_root_index:
                    parent = nodes[current_root_index]
                else:
                    parent = nodes[indent_size - INDENT_UNIT]

                child = parent.add_child(name=taxa_name)
                current_root_index = indent_size
                nodes[current_root_index] = child

        self.tree = tree

    @property
    def newick(self) -> str:
        """Return a Newick string of this phylogeny."""
        newick: str = self.tree.write()
        return newick

    @property
    def toytree(self) -> ToyTree:
        """Return a ToyTree object of this phylogeny."""
        tree: ToyTree = toytree.tree(self.newick)
        return tree

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}('
            f'"{self.infile}", '
            f'ignore_unclassified={self.ignore_unclassified})'
        )