diff --git a/README.md b/README.md index 62ba924..f6b9adf 100755 --- a/README.md +++ b/README.md @@ -157,10 +157,13 @@ Use `--summary` to get summary statistics (output to stdout on completion) ```json { - "total_taxon_count": 2, + "taxons_identified": [ + 0, + 1 + ], "missing_taxon_ids": [ 999999999 - ] + ], "reads_extracted_per_taxon": { "0": 745591, "1": 1646 @@ -174,6 +177,20 @@ Use `--summary` to get summary statistics (output to stdout on completion) } ``` +Fields: + +- `taxons_identified`: Taxon IDs found in the Kraken report/output based on the requested taxids (includes + parents/children if used). +- `missing_taxon_ids`: Requested taxon IDs that were not found in the Kraken report. +- `reads_extracted_per_taxon`: Number of reads extracted per identified taxon ID (0 indicates no direct assignments, but + present due to children/parents). +- `total_reads_in`: Total reads parsed from the input file(s). +- `total_reads_out`: Total reads written to the output file(s). +- `proportion_extracted`: `total_reads_out / total_reads_in`. +- `input_format`: `single` or `paired` input mode. +- `output_format`: `fastq` or `fasta`, depending on `--output-fasta`. +- `kractor_version`: Version of kractor that produced the summary. + ### Arguments: ### Required: diff --git a/src/kractor.rs b/src/kractor.rs index 1f9cd2f..3ee9795 100644 --- a/src/kractor.rs +++ b/src/kractor.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize)] struct Summary { - total_taxon_count: usize, + taxons_identified: Vec, reads_extracted_per_taxon: FxHashMap, total_reads_in: usize, total_reads_out: usize, @@ -88,6 +88,7 @@ impl Kractor { fn process_reads(&mut self) -> Result<()> { let paired = self.args.input.len() == 2; let input_format = if paired { "paired" } else { "single" }; + let reads_extracted_per_taxon = self.get_reads_extracted_per_taxon(); if paired { let ((reads_parsed1, reads_output1), (reads_parsed2, reads_output2)) = @@ -105,8 +106,8 @@ impl Kractor { let reads_out = reads_output1 + reads_output2; self.summary = Some(Summary { - total_taxon_count: self.taxon_ids.len(), - reads_extracted_per_taxon: self.reads_per_taxon.clone(), + taxons_identified: self.taxon_ids.clone(), + reads_extracted_per_taxon: reads_extracted_per_taxon.clone(), total_reads_in: reads_in, total_reads_out: reads_out, proportion_extracted: reads_out as f64 / reads_in as f64, @@ -133,8 +134,8 @@ impl Kractor { let reads_out = reads_output1; self.summary = Some(Summary { - total_taxon_count: self.taxon_ids.len(), - reads_extracted_per_taxon: self.reads_per_taxon.clone(), + taxons_identified: self.taxon_ids.clone(), + reads_extracted_per_taxon, missing_taxon_ids: self.missing_taxon_ids.clone(), total_reads_in: reads_in, total_reads_out: reads_out, @@ -162,6 +163,14 @@ impl Kractor { Ok(()) } + fn get_reads_extracted_per_taxon(&self) -> FxHashMap { + let mut reads_extracted_per_taxon = self.reads_per_taxon.clone(); + for taxon_id in &self.taxon_ids { + reads_extracted_per_taxon.entry(*taxon_id).or_insert(0); + } + reads_extracted_per_taxon + } + pub fn run(&mut self) -> Result<()> { info!( "Starting kractor at {}", @@ -236,4 +245,33 @@ mod tests { let kractor = Kractor::new(args); assert!(kractor.validate_outputs().is_err()); } + + #[test] + fn test_get_reads_extracted_per_taxon() { + let input_files = vec![PathBuf::from("input.fastq")]; + let args = Cli { + input: input_files, + output: vec![PathBuf::from("output.fastq")], + kraken: PathBuf::from("kraken_output.txt"), + report: None, + taxid: vec![2901879, 227984], + output_type: None, + compression_level: niffler::Level::One, + parents: false, + children: false, + exclude: false, + output_fasta: false, + summary: false, + no_report_header_detect: false, + verbose: false, + }; + let mut kractor = Kractor::new(args); + kractor.taxon_ids = vec![2901879, 227984]; + kractor.reads_per_taxon.insert(227984, 257); + + let reads_extracted_per_taxon = kractor.get_reads_extracted_per_taxon(); + + assert_eq!(reads_extracted_per_taxon.get(&2901879), Some(&0)); + assert_eq!(reads_extracted_per_taxon.get(&227984), Some(&257)); + } }