Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,13 @@ Use `--summary` to get summary statistics (output to stdout on completion)

```json
{
"total_taxon_count": 2,
"taxons_identified": [
0,
1
],
"missing_taxon_ids": [
999999999
]
],
"reads_extracted_per_taxon": {
"0": 745591,
"1": 1646
Expand All @@ -174,6 +177,20 @@ Use `--summary` to get summary statistics (output to stdout on completion)
}
```

Fields:

- `taxons_identified`: Taxon IDs found in the Kraken report/output based on the requested taxids (includes
parents/children if used).
- `missing_taxon_ids`: Requested taxon IDs that were not found in the Kraken report.
- `reads_extracted_per_taxon`: Number of reads extracted per identified taxon ID (0 indicates no direct assignments, but
present due to children/parents).
- `total_reads_in`: Total reads parsed from the input file(s).
- `total_reads_out`: Total reads written to the output file(s).
- `proportion_extracted`: `total_reads_out / total_reads_in`.
- `input_format`: `single` or `paired` input mode.
- `output_format`: `fastq` or `fasta`, depending on `--output-fasta`.
- `kractor_version`: Version of kractor that produced the summary.

### Arguments:

### Required:
Expand Down
48 changes: 43 additions & 5 deletions src/kractor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize)]
struct Summary {
total_taxon_count: usize,
taxons_identified: Vec<i32>,
reads_extracted_per_taxon: FxHashMap<i32, usize>,
total_reads_in: usize,
total_reads_out: usize,
Expand Down Expand Up @@ -88,6 +88,7 @@ impl Kractor {
fn process_reads(&mut self) -> Result<()> {
let paired = self.args.input.len() == 2;
let input_format = if paired { "paired" } else { "single" };
let reads_extracted_per_taxon = self.get_reads_extracted_per_taxon();

if paired {
let ((reads_parsed1, reads_output1), (reads_parsed2, reads_output2)) =
Expand All @@ -105,8 +106,8 @@ impl Kractor {
let reads_out = reads_output1 + reads_output2;

self.summary = Some(Summary {
total_taxon_count: self.taxon_ids.len(),
reads_extracted_per_taxon: self.reads_per_taxon.clone(),
taxons_identified: self.taxon_ids.clone(),
reads_extracted_per_taxon: reads_extracted_per_taxon.clone(),
total_reads_in: reads_in,
total_reads_out: reads_out,
proportion_extracted: reads_out as f64 / reads_in as f64,
Expand All @@ -133,8 +134,8 @@ impl Kractor {
let reads_out = reads_output1;

self.summary = Some(Summary {
total_taxon_count: self.taxon_ids.len(),
reads_extracted_per_taxon: self.reads_per_taxon.clone(),
taxons_identified: self.taxon_ids.clone(),
reads_extracted_per_taxon,
missing_taxon_ids: self.missing_taxon_ids.clone(),
total_reads_in: reads_in,
total_reads_out: reads_out,
Expand Down Expand Up @@ -162,6 +163,14 @@ impl Kractor {
Ok(())
}

fn get_reads_extracted_per_taxon(&self) -> FxHashMap<i32, usize> {
let mut reads_extracted_per_taxon = self.reads_per_taxon.clone();
for taxon_id in &self.taxon_ids {
reads_extracted_per_taxon.entry(*taxon_id).or_insert(0);
}
reads_extracted_per_taxon
}

pub fn run(&mut self) -> Result<()> {
info!(
"Starting kractor at {}",
Expand Down Expand Up @@ -236,4 +245,33 @@ mod tests {
let kractor = Kractor::new(args);
assert!(kractor.validate_outputs().is_err());
}

#[test]
fn test_get_reads_extracted_per_taxon() {
let input_files = vec![PathBuf::from("input.fastq")];
let args = Cli {
input: input_files,
output: vec![PathBuf::from("output.fastq")],
kraken: PathBuf::from("kraken_output.txt"),
report: None,
taxid: vec![2901879, 227984],
output_type: None,
compression_level: niffler::Level::One,
parents: false,
children: false,
exclude: false,
output_fasta: false,
summary: false,
no_report_header_detect: false,
verbose: false,
};
let mut kractor = Kractor::new(args);
kractor.taxon_ids = vec![2901879, 227984];
kractor.reads_per_taxon.insert(227984, 257);

let reads_extracted_per_taxon = kractor.get_reads_extracted_per_taxon();

assert_eq!(reads_extracted_per_taxon.get(&2901879), Some(&0));
assert_eq!(reads_extracted_per_taxon.get(&227984), Some(&257));
}
}