From 0768a50b018df3c213df5ac90d05c625aff07e1a Mon Sep 17 00:00:00 2001 From: Hernan Morales Date: Tue, 28 Apr 2026 00:49:51 -0300 Subject: [PATCH] Add BioSummarizedExperiment with tests. --- .../BaselineOfBioSmalltalk.class.st | 4 +- ...BioRangedSummarizedExperimentTest.class.st | 128 ++++++ .../BioSummarizedExperimentTest.class.st | 168 ++++++++ .../BioSummarizedExperiment-Tests/package.st | 1 + .../BioRangedSummarizedExperiment.class.st | 236 +++++++++++ .../BioSummarizedExperiment.class.st | 393 ++++++++++++++++++ repository/BioSummarizedExperiment/package.st | 1 + repository/BioTools/BioGenomicRanges.class.st | 2 +- 8 files changed, 931 insertions(+), 2 deletions(-) create mode 100644 repository/BioSummarizedExperiment-Tests/BioRangedSummarizedExperimentTest.class.st create mode 100644 repository/BioSummarizedExperiment-Tests/BioSummarizedExperimentTest.class.st create mode 100644 repository/BioSummarizedExperiment-Tests/package.st create mode 100644 repository/BioSummarizedExperiment/BioRangedSummarizedExperiment.class.st create mode 100644 repository/BioSummarizedExperiment/BioSummarizedExperiment.class.st create mode 100644 repository/BioSummarizedExperiment/package.st diff --git a/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st b/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st index 0af2294d..b068c74a 100644 --- a/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st +++ b/repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st @@ -115,7 +115,9 @@ BaselineOfBioSmalltalk >> baselineCommonPackages: spec [ package: 'BioTools-Tests' with: [ spec requires: #('BioTools' ). ]; package: 'BioWrapperTests' with: [ spec requires: #('BioTools-Tests' ) ]; package: 'BioPhysics' with: [ spec requires: #('BioTools') ]; - package: 'BioWrappers' with: [ spec requires: #('BioTools' ) ] + package: 'BioWrappers' with: [ spec requires: #('BioTools' ) ]; + package: 'BioSummarizedExperiment' with: [ spec requires: #('BioTools' 'DataFrame') ]; + package: 'BioSummarizedExperiment-Tests' with: [ spec requires: #('BioSummarizedExperiment') ] ] diff --git a/repository/BioSummarizedExperiment-Tests/BioRangedSummarizedExperimentTest.class.st b/repository/BioSummarizedExperiment-Tests/BioRangedSummarizedExperimentTest.class.st new file mode 100644 index 00000000..4b75262b --- /dev/null +++ b/repository/BioSummarizedExperiment-Tests/BioRangedSummarizedExperimentTest.class.st @@ -0,0 +1,128 @@ +Class { + #name : 'BioRangedSummarizedExperimentTest', + #superclass : 'TestCase', + #category : 'BioSummarizedExperiment-Tests', + #package : 'BioSummarizedExperiment-Tests' +} + +{ #category : 'instance creation' } +BioRangedSummarizedExperimentTest >> newTestRSE [ + "Create a test RangedSummarizedExperiment with DataFrame." + + | matrix rowData colData ranges | + matrix := Array2D rows: 3 columns: 2. + matrix at: 1 at: 1 put: 100. + matrix at: 1 at: 2 put: 150. + matrix at: 2 at: 1 put: 200. + matrix at: 2 at: 2 put: 250. + matrix at: 3 at: 1 put: 50. + matrix at: 3 at: 2 put: 75. + rowData := DataFrame + withRows: #( #( 1 'BRCA1' ) #( 2 'TP53' ) #( 3 'EGFR' ) ) + columnNames: #( 'id' 'gene' ). + colData := DataFrame + withRows: #( #( 1 'S1' ) #( 2 'S2' ) ) + columnNames: #( 'id' 'sample' ). + ranges := BioGenomicRanges new + ranges: (BioIRanges starts: #( 1000 3000 5000 ) ends: #( 2000 4000 6000 )); + seqnames: { 'chr1' . 'chr2' . 'chr3' }; + strands: #( $+ $- $+ ). + ^ BioRangedSummarizedExperiment new + assay: matrix; + rowRanges: ranges; + rowData: rowData; + colData: colData; + yourself +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testByChromosome [ + + | rse chr1 | + rse := self newTestRSE. + chr1 := rse byChromosome: 'chr1'. + self assert: chr1 notNil +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testCreation [ + + | rse | + rse := BioRangedSummarizedExperiment new. + self assert: rse assays notNil. + self assert: rse rowRanges isNil +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testGranges [ + + | rse gr | + rse := self newTestRSE. + gr := rse granges. + self assert: gr notNil. + self assert: gr size equals: 3 +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testInheritsFromSummarizedExperiment [ + + | rse | + rse := BioRangedSummarizedExperiment new. + self assert: (rse isKindOf: BioSummarizedExperiment) +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testOverlaps [ + + | rse overlaps | + rse := self newTestRSE. + overlaps := rse overlaps: 'chr1' from: 1500 to: 2500. + self assert: overlaps notNil +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testPythonExampleSize [ + + | rse | + rse := BioRangedSummarizedExperiment new. + self assert: rse dim key equals: 0. + self assert: rse dim value equals: 0 +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testRowRanges [ + + | rse ranges | + rse := self newTestRSE. + ranges := rse rowRanges. + self assert: ranges notNil. + self assert: ranges size equals: 3 +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testSeqnames [ + + | rse seqs | + rse := self newTestRSE. + seqs := rse seqnames. + self assert: seqs size equals: 3 +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testStartEndWidth [ + + | rse | + rse := self newTestRSE. + self assert: rse start size equals: 3. + self assert: rse end size equals: 3. + self assert: rse width size equals: 3 +] + +{ #category : 'tests' } +BioRangedSummarizedExperimentTest >> testStrand [ + + | rse strands | + rse := self newTestRSE. + strands := rse strand. + self assert: strands size equals: 3 +] diff --git a/repository/BioSummarizedExperiment-Tests/BioSummarizedExperimentTest.class.st b/repository/BioSummarizedExperiment-Tests/BioSummarizedExperimentTest.class.st new file mode 100644 index 00000000..bead2650 --- /dev/null +++ b/repository/BioSummarizedExperiment-Tests/BioSummarizedExperimentTest.class.st @@ -0,0 +1,168 @@ +Class { + #name : 'BioSummarizedExperimentTest', + #superclass : 'TestCase', + #category : 'BioSummarizedExperiment-Tests', + #package : 'BioSummarizedExperiment-Tests' +} + +{ #category : 'instance creation' } +BioSummarizedExperimentTest >> newTestSE [ + "Create a test SummarizedExperiment with 4 genes x 3 samples." + + | se matrix rowData colData | + matrix := Array2D rows: 4 columns: 3. + matrix at: 1 at: 1 put: 10. + matrix at: 1 at: 2 put: 20. + matrix at: 1 at: 3 put: 30. + matrix at: 2 at: 1 put: 15. + matrix at: 2 at: 2 put: 25. + matrix at: 2 at: 3 put: 35. + matrix at: 3 at: 1 put: 5. + matrix at: 3 at: 2 put: 10. + matrix at: 3 at: 3 put: 15. + matrix at: 4 at: 1 put: 8. + matrix at: 4 at: 2 put: 16. + matrix at: 4 at: 3 put: 24. + rowData := DataFrame + withRows: #( #( 1 'BRCA1' ) #( 2 'TP53' ) #( 3 'EGFR' ) #( 4 'MYC' ) ) + columnNames: #( 'id' 'gene' ). + colData := DataFrame + withRows: #( #( 1 'S1' 'control' ) #( 2 'S2' 'treated' ) #( 3 'S3' 'treated' ) ) + columnNames: #( 'id' 'sample' 'condition' ). + se := BioSummarizedExperiment + assay: matrix + rowData: rowData + colData: colData. + ^ se +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testAssay [ + + | se matrix | + se := BioSummarizedExperiment new. + matrix := Array2D rows: 2 columns: 2. + matrix atRow: 1 put: #( 1 2 ). + matrix atRow: 2 put: #( 3 4 ). + se assay: matrix. + self assert: se assay notNil. + self deny: se assays isEmpty +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testAssayAtPut [ + + | se matrix | + se := BioSummarizedExperiment new. + matrix := Array2D rows: 2 columns: 2. + se assayAt: 'counts' put: matrix. + se assayAt: 'logcounts' put: matrix. + self assert: (se assayAt: 'counts') notNil. + self assert: (se assayAt: 'logcounts') notNil. + self assert: se assayNames size equals: 2 +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testColData [ + + | se | + se := self newTestSE. + self assert: se colCount equals: 3. + self assert: (se colData first at: 'condition') equals: 'control' +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testColNames [ + + | se names | + se := self newTestSE. + names := se colNames. + self assert: names size equals: 3 +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testCreation [ + + | se | + se := BioSummarizedExperiment new. + self assert: se assays notNil. + self assertEmpty: se assays. + self assert: se rowCount equals: 0. + self assert: se colCount equals: 0 +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testDim [ + + | se | + se := self newTestSE. + self assert: se dim key equals: 4. "rows" + self assert: se dim value equals: 3 "cols" +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testFactoryMethod [ + + | se matrix rowData colData | + matrix := Array2D rows: 2 columns: 2. + rowData := DataFrame + withRows: #( #( 1 'gene1' ) #( 2 'gene2' ) ) + columnNames: #( 'id' 'name' ). + colData := DataFrame + withRows: #( #( 1 'sample1' ) #( 2 'sample2' ) ) + columnNames: #( 'id' 'name' ). + se := BioSummarizedExperiment + assay: matrix + rowData: rowData + colData: colData. + self assert: se rowCount equals: 2. + self assert: se colCount equals: 2 +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testMetadata [ + + | se | + se := BioSummarizedExperiment new. + se metadata at: 'experiment' put: 'RNA-seq'. + se metadata at: 'date' put: '2024-01-15'. + self assert: (se metadata at: 'experiment') equals: 'RNA-seq' +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testRowData [ + + | se | + se := self newTestSE. + self assert: se rowCount equals: 4. + self assert: (se rowData first at: 'gene') equals: 'BRCA1' +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testRowNames [ + + | se names | + se := self newTestSE. + names := se rowNames. + self assert: names size equals: 4 +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testSubetByRowIndex [ + + | se subset | + se := self newTestSE. + subset := se atRowIndex: 1 to: 2. + self assert: subset rowCount equals: 2. + self assert: subset colCount equals: 3 +] + +{ #category : 'tests' } +BioSummarizedExperimentTest >> testSubsetByColIndex [ + + | se subset | + se := self newTestSE. + subset := se atColIndex: 2 to: 3. + self assert: subset rowCount equals: 4. + self assert: subset colCount equals: 2 +] diff --git a/repository/BioSummarizedExperiment-Tests/package.st b/repository/BioSummarizedExperiment-Tests/package.st new file mode 100644 index 00000000..4a89cfb6 --- /dev/null +++ b/repository/BioSummarizedExperiment-Tests/package.st @@ -0,0 +1 @@ +Package { #name : 'BioSummarizedExperiment-Tests' } diff --git a/repository/BioSummarizedExperiment/BioRangedSummarizedExperiment.class.st b/repository/BioSummarizedExperiment/BioRangedSummarizedExperiment.class.st new file mode 100644 index 00000000..27935377 --- /dev/null +++ b/repository/BioSummarizedExperiment/BioRangedSummarizedExperiment.class.st @@ -0,0 +1,236 @@ +Class { + #name : 'BioRangedSummarizedExperiment', + #superclass : 'BioSummarizedExperiment', + #instVars : [ + 'rowRanges' + ], + #category : 'BioSummarizedExperiment', + #package : 'BioSummarizedExperiment' +} + +{ #category : 'as yet unclassified' } +BioRangedSummarizedExperiment class >> fromRanges: seqnames start: starts end: ends strand: strands assay: matrix colData: colInfo [ + "Create a RangedSummarizedExperiment from arrays of genomic coordinates." + + | rse ir ranges | + ir := BioIRanges new + starts: starts; + ends: ends. + ranges := BioGenomicRanges new + ranges: ir; + seqnames: seqnames; + strands: strands. + rse := self new + assay: matrix; + rowRanges: ranges; + colData: colInfo; + yourself. + ^ rse +] + +{ #category : 'as yet unclassified' } +BioRangedSummarizedExperiment class >> pythonExample [ + "Create a SummarizedExperiment matching the Python example. + Uses DataFrame for rowData and colData." + + | nrows ncols rse seqnames strands matrix ir ranges colData rowData treatments c1 c2 c3 | + nrows := 200. + ncols := 6. + c1 := String withAll: #( $c $h $r $1 ). + c2 := String withAll: #( $c $h $r $2 ). + c3 := String withAll: #( $c $h $r $3 ). + seqnames := Array new: nrows. + 1 to: nrows do: [ :i | + | patternIdx chr | + patternIdx := i - 1 \\ 10 + 1. + chr := patternIdx <= 2 + ifTrue: [ c1 ] + ifFalse: [ + patternIdx <= 6 + ifTrue: [ c2 ] + ifFalse: [ c3 ] ]. + seqnames at: i put: chr ]. + + strands := Array new: nrows. + #( $- $+ $+ $* $* $+ $+ $+ $- $- ) withIndexDo: [ :char :idx | + 1 to: 20 do: [ :rep | strands at: idx + (rep - 1 * 10) put: char ] ]. + + matrix := Array2D rows: nrows columns: ncols. + 1 to: nrows do: [ :i | + 1 to: ncols do: [ :j | + matrix at: i at: j put: 100 atRandom asFloat / 100.0 ] ]. + + ir := BioIRanges new. + ir starts: ((1 to: nrows) collect: [ :i | 99 + i ]). + ir ends: ((1 to: nrows) collect: [ :i | 109 + i ]). + ranges := BioGenomicRanges new + ranges: ir; + seqnames: seqnames; + strands: strands. + + colData := DataFrame + withRows: + ((1 to: ncols) collect: [ :i | { (i \\ 2 = 1) } ]) + columnNames: #( 'treatment' ). + treatments := (colData column: 'treatment') asArray collect: [ :v | + v + ifTrue: [ 'ChIP' ] + ifFalse: [ 'Input' ] ]. + colData column: 'treatment' put: treatments. + + rowData := DataFrame + withRows: ((1 to: nrows) collect: [ :i | + { + (i - 1). + (100 atRandom / 100.0) } ]) + columnNames: #( 'score' 'GC' ). + + rse := self new + assay: matrix; + rowRanges: ranges; + colData: colData; + rowData: rowData. + ^ rse +] + +{ #category : 'as yet unclassified' } +BioRangedSummarizedExperiment class >> randomWithRows: nrows cols: ncols seqnames: seqnames start: startOffset end: width strand: strands [ + "Create a RangedSummarizedExperiment with random assay and specified genomic coordinates." + + | matrix ir ranges colData rse | + matrix := Array2D rows: nrows columns: ncols. + 1 to: nrows do: [ :i | + 1 to: ncols do: [ :j | + matrix at: i at: j put: 100 atRandom asFloat / 100.0 ] ]. + ir := BioIRanges new. + ir starts: ((1 to: nrows) collect: [ :i | startOffset + i - 1 ]). + ir ends: + ((1 to: nrows) collect: [ :i | startOffset + width + i - 2 ]). + ranges := BioGenomicRanges new + ranges: ir; + seqnames: seqnames; + strands: strands. + colData := (1 to: ncols) collect: [ :i | Dictionary new ]. + rse := self new + assay: matrix; + rowRanges: ranges; + colData: colData; + yourself. + ^ rse +] + +{ #category : 'converting' } +BioRangedSummarizedExperiment >> asBioGenomicRanges [ + "Answer rowRanges or nil." + + ^ rowRanges +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> byChromosome: seqname [ + "Return a new RangedSummarizedExperiment for one chromosome." + + | indices | + rowRanges ifNil: [ ^ nil ]. + indices := (1 to: rowRanges size) select: [ :i | + (rowRanges seqnames at: i) = seqname ]. + indices ifEmpty: [ ^ nil ]. + ^ self atRowIndex: indices first to: indices last +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> end [ + "Answer end positions from rowRanges." + + ^ rowRanges ifNil: [ #( ) ] ifNotNil: [ rowRanges end ] +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> granges [ + "Answer rowRanges as BioGenomicRanges (alias)." + + ^ rowRanges +] + +{ #category : 'querying' } +BioRangedSummarizedExperiment >> overlaps: seqname from: startPos to: endPos [ + "Find features overlapping a genomic range." + + | queryIR queryRange | + rowRanges ifNil: [ ^ #( ) ]. + queryIR := BioIRanges new + starts: (Array with: startPos); + ends: (Array with: endPos). + queryRange := BioGenomicRanges new + ranges: queryIR; + seqnames: (Array with: seqname); + strands: (Array with: $+). + ^ rowRanges findOverlaps: queryRange +] + +{ #category : 'querying' } +BioRangedSummarizedExperiment >> overlaps: seqname from: startPos to: endPos strand: strandSymbol [ + "Find features overlapping a genomic range on specific strand." + + | queryRange | + rowRanges ifNil: [ ^ #( ) ]. + queryRange := BioGenomicRange new + seqname: seqname; + start: startPos; + end: endPos; + strand: strandSymbol; + yourself. + ^ rowRanges findOverlaps: queryRange +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> rowRanges [ + "Answer the BioGenomicRanges for rows." + + ^ rowRanges +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> rowRanges: genomicRanges [ + "Set the genomic ranges for rows." + + rowRanges := genomicRanges +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> seqnames [ + "Answer sequence names from rowRanges." + + ^ rowRanges ifNil: [ #( ) ] ifNotNil: [ rowRanges seqnames ] +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> start [ + "Answer start positions from rowRanges." + + ^ rowRanges ifNil: [ #( ) ] ifNotNil: [ rowRanges start ] +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> strand [ + "Answer strands from rowRanges." + + ^ rowRanges ifNil: [ #( ) ] ifNotNil: [ rowRanges strands ] +] + +{ #category : 'querying' } +BioRangedSummarizedExperiment >> subsetByOverlaps: seqname from: startPos to: endPos [ + "Return a new RangedSummarizedExperiment subsetted to overlapping features." + + | indices | + indices := self overlaps: seqname from: startPos to: endPos. + indices ifEmpty: [ ^ nil ]. + ^ self atRowIndex: indices to: indices last +] + +{ #category : 'accessing' } +BioRangedSummarizedExperiment >> width [ + "Answer widths from rowRanges." + + ^ rowRanges ifNil: [ #( ) ] ifNotNil: [ rowRanges width ] +] diff --git a/repository/BioSummarizedExperiment/BioSummarizedExperiment.class.st b/repository/BioSummarizedExperiment/BioSummarizedExperiment.class.st new file mode 100644 index 00000000..b3c14b88 --- /dev/null +++ b/repository/BioSummarizedExperiment/BioSummarizedExperiment.class.st @@ -0,0 +1,393 @@ +Class { + #name : 'BioSummarizedExperiment', + #superclass : 'Object', + #instVars : [ + 'assays', + 'rowData', + 'colData', + 'metadata', + 'assayNames' + ], + #category : 'BioSummarizedExperiment', + #package : 'BioSummarizedExperiment' +} + +{ #category : 'as yet unclassified' } +BioSummarizedExperiment class >> assay: aMatrix rowData: aDf colData: anotherDf [ + "Create a new SummarizedExperiment with given assay and metadata." + + | se | + se := self new. + se assay: aMatrix. + se rowData: aDf. + se colData: anotherDf. + ^ se +] + +{ #category : 'as yet unclassified' } +BioSummarizedExperiment class >> fromDataFrame: rowDataDf colDataFrame: colDataDf assay: aMatrix [ + "Create a SummarizedExperiment from DataFrames for row and column metadata." + + | se | + se := self new. + se assay: aMatrix. + se rowData: rowDataDf. + se colData: colDataDf. + ^ se +] + +{ #category : 'as yet unclassified' } +BioSummarizedExperiment class >> new [ + + ^ super new initialize +] + +{ #category : 'as yet unclassified' } +BioSummarizedExperiment class >> randomWithRows: nrows cols: ncols [ + "Create a SummarizedExperiment with random assay data." + + | se matrix | + matrix := Array2D rows: nrows columns: ncols. + 1 to: nrows do: [ :i | + 1 to: ncols do: [ :j | + matrix at: i at: j put: 100 atRandom asFloat / 100.0 ] ]. + se := self new. + se assay: matrix. + ^ se +] + +{ #category : 'adding' } +BioSummarizedExperiment >> addRowMetadata: key at: index put: value [ + "Add a metadata value to rowData at index." + + | row | + row := rowData at: index ifAbsentPut: [ Dictionary new ]. + row at: key put: value +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> assay [ + + self assayNames ifEmpty: [ ^ nil ]. + ^ self assayAt: self assayNames first +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> assay: matrix [ + + assayNames ifEmpty: [ assayNames add: 'counts' ]. + ^ self assayAt: assayNames first put: matrix +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> assayAt: name [ + + ^ assays at: name +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> assayAt: name put: matrix [ + + (assayNames includes: name) ifFalse: [ assayNames add: name ]. + ^ assays at: name put: matrix +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> assayNames [ + + ^ assayNames ifNil: [ assays keys ] +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> assays [ + + ^ assays +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> assays: aDictionary [ + + assays := aDictionary +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> atColIndex: index [ + + | newSE | + newSE := self class new. + newSE assays: (assays associationsCollect: [ :assoc | + assoc key -> (assoc value collect: [ :row | row at: index ]) ]). + newSE rowData: rowData copy. + newSE colData: (colData at: index ifAbsent: [ nil ]). + newSE metadata: metadata copy. + ^ newSE +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> atColIndex: start to: end [ + "Answer a new SummarizedExperiment subsetted by column range." + + | newSE newAssays | + newSE := self class new. + newAssays := Dictionary new. + assays keysAndValuesDo: [ :key :matrix | + | subMatrix | + subMatrix := Array2D + rows: matrix numberOfRows + columns: end - start + 1. + 1 to: matrix numberOfRows do: [ :i | + | colIdx | + 1 to: end - start + 1 do: [ :j | + colIdx := start + j - 1. + subMatrix at: i at: j put: (matrix at: i at: colIdx) ] ]. + newAssays at: key put: subMatrix ]. + newSE assays: newAssays. + newSE rowData: rowData copy. + newSE colData: (colData rowsFrom: start to: end). + newSE metadata: metadata copy. + ^ newSE +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> atRowIndex: index [ + + | newSE | + newSE := self class new. + newSE assays: (assays associationsCollect: [ :assoc | + assoc key -> (assoc value at: index) ]). + newSE rowData: (rowData at: index ifAbsent: [ nil ]). + newSE colData: colData copy. + newSE metadata: metadata copy. + ^ newSE +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> atRowIndex: start to: end [ + "Answer a new SummarizedExperiment subsetted by row range." + + | newSE newAssays | + newSE := self class new. + newAssays := Dictionary new. + assays keysAndValuesDo: [ :key :matrix | + | subMatrix | + subMatrix := Array2D + rows: end - start + 1 + columns: matrix numberOfColumns. + 1 to: end - start + 1 do: [ :i | + | rowIdx | + rowIdx := start + i - 1. + 1 to: matrix numberOfColumns do: [ :j | + subMatrix at: i at: j put: (matrix at: rowIdx at: j) ] ]. + newAssays at: key put: subMatrix ]. + newSE assays: newAssays. + newSE rowData: (rowData rowsFrom: start to: end). + newSE colData: colData copy. + newSE metadata: metadata copy. + ^ newSE +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> colColumn: columnName [ + "Answer a column from colData. Handles both DataFrame and collection of Dictionary." + + (colData isKindOf: DataFrame) + ifTrue: [ ^ colData column: columnName ]. + ^ colData collect: [ :row | row at: columnName ifAbsent: [ nil ] ] +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> colCount [ + "Answer the number of columns (samples)." + + ^ colData ifNil: [ 0 ] ifNotNil: [ :df | df numberOfRows ] +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> colData [ + + ^ colData +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> colData: aCollection [ + + colData := aCollection +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> colMeans [ + "Answer array of column means from primary assay." + + | sums | + sums := self colSums. + ^ sums collect: [ :s | s / self rowCount asFloat ] +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> colNames [ + "Answer column names from colData." + + ^ colData rowNames +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> colSums [ + "Answer array of column sums from primary assay." + + | matrix sums | + matrix := self assay. + sums := Array new: self colCount. + 1 to: self colCount do: [ :j | + | sum | + sum := 0. + 1 to: self rowCount do: [ :i | sum := sum + (matrix at: i at: j) ]. + sums at: j put: sum ]. + ^ sums +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> dim [ + + ^ self rowCount -> self colCount +] + +{ #category : 'initialization' } +BioSummarizedExperiment >> initialize [ + + super initialize. + assays := Dictionary new. + assayNames := OrderedCollection new. + rowData := DataFrame new. + colData := DataFrame new. + metadata := Dictionary new +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> metadata [ + + ^ metadata +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> metadata: aDictionary [ + + metadata := aDictionary +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> rowColumn: columnName [ + "Answer a column from rowData. Handles both DataFrame and collection of Dictionary." + + (rowData isKindOf: DataFrame) + ifTrue: [ ^ rowData column: columnName ]. + ^ rowData collect: [ :row | row at: columnName ifAbsent: [ nil ] ] +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> rowCount [ + "Answer the number of rows (features)." + + ^ rowData ifNil: [ 0 ] ifNotNil: [ :df | df numberOfRows ] +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> rowData [ + + ^ rowData +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> rowData: aCollection [ + + rowData := aCollection +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> rowMeans [ + "Answer array of row means from primary assay." + + | sums | + sums := self rowSums. + ^ sums collect: [ :s | s / self colCount asFloat ] +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> rowNames [ + "Answer row names from rowData." + + ^ rowData rowNames +] + +{ #category : 'accessing' } +BioSummarizedExperiment >> rowSums [ + "Answer array of row sums from primary assay." + + | matrix sums | + matrix := self assay. + sums := Array new: self rowCount. + 1 to: self rowCount do: [ :i | + | sum | + sum := 0. + 1 to: self colCount do: [ :j | sum := sum + (matrix at: i at: j) ]. + sums at: i put: sum ]. + ^ sums +] + +{ #category : 'initialization' } +BioSummarizedExperiment >> setColMetadata: key to: values [ + "Set a metadata column for all columns. Works with both DataFrame and collection." + + (colData isKindOf: DataFrame) ifTrue: [ + colData addColumn: values named: key. + ^ self ]. + values withIndexDo: [ :val :i | (colData at: i) at: key put: val ] +] + +{ #category : 'initialization' } +BioSummarizedExperiment >> setRowMetadata: key to: values [ + "Set a metadata column for all rows. Works with both DataFrame and collection." + + (rowData isKindOf: DataFrame) ifTrue: [ + rowData addColumn: values named: key. + ^ self ]. + values withIndexDo: [ :val :i | (rowData at: i) at: key put: val ] +] + +{ #category : 'printing' } +BioSummarizedExperiment >> summary [ + "Answer a summary description." + + ^ String streamContents: [ :s | + s + nextPutAll: 'class: '; + print: self class name; + cr. + s + nextPutAll: 'dim: '; + print: self dim; + cr. + s + nextPutAll: 'assays: '; + print: self assayNames asArray; + cr. + (rowData isKindOf: DataFrame) + ifTrue: [ + s + nextPutAll: 'rowData columns: '; + print: rowData columnNames asArray; + cr ] + ifFalse: [ + s + nextPutAll: 'rowData size: '; + print: rowData size; + cr ]. + (colData isKindOf: DataFrame) + ifTrue: [ + s + nextPutAll: 'colData columns: '; + print: colData columnNames asArray ] + ifFalse: [ + s + nextPutAll: 'colData size: '; + print: colData size ] ] +] diff --git a/repository/BioSummarizedExperiment/package.st b/repository/BioSummarizedExperiment/package.st new file mode 100644 index 00000000..1c388fd2 --- /dev/null +++ b/repository/BioSummarizedExperiment/package.st @@ -0,0 +1 @@ +Package { #name : 'BioSummarizedExperiment' } diff --git a/repository/BioTools/BioGenomicRanges.class.st b/repository/BioTools/BioGenomicRanges.class.st index 1cc635d9..2b0aa7d1 100644 --- a/repository/BioTools/BioGenomicRanges.class.st +++ b/repository/BioTools/BioGenomicRanges.class.st @@ -287,7 +287,7 @@ BioGenomicRanges >> at: anIndex [ seqname: (self seqnames at: anIndex) start: (ranges startAt: anIndex) end: (ranges endAt: anIndex) - strand: (self strand at: anIndex) symbol asString. + strand: (self strand at: anIndex) asString. gr metadata: md. ^ gr ]