diff --git a/repository/BioParsers-Tests/BioPhylipParserTest.class.st b/repository/BioParsers-Tests/BioPhylipParserTest.class.st
index 2c2e750d..55a66c27 100644
--- a/repository/BioParsers-Tests/BioPhylipParserTest.class.st
+++ b/repository/BioParsers-Tests/BioPhylipParserTest.class.st
@@ -5,250 +5,366 @@ Class {
 	#package : 'BioParsers-Tests'
 }
 
-{ #category : 'testing' }
-BioPhylipParserTest >> firstLineTokenizer [
-
-	^ BioPhylipParser new firstLineTokenizer 
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> phylipInterleavedDNA [
-
-	^ '  6   13    
-Archaeopt CGATGCTTAC CGCCGATGCT
-HesperorniCGTTACTCGT TGTCGTTACT
-BaluchitheTAATGTTAAT TGTTAATGTT
-B. virginiTAATGTTCGT TGTTAATGTT
-BrontosaurCAAAACCCAT CATCAAAACC
-B.subtilisGGCAGCCAAT CACGGCAGCC
-
-TACCGCCGAT GCTTACCGC
-CGTTGTCGTT ACTCGTTGT
-AATTGTTAAT GTTAATTGT
-CGTTGTTAAT GTTCGTTGT
-CATCATCAAA ACCCATCAT
-AATCACGGCA GCCAATCAC
-
-CCCCGCCCCC GCTTACCGC
-CCCCGTCCCC ACTCGTTGT
-CCCCGTCCCC GTTAATTGT
-CCCCGTCCCC GTTCGTTGT
-CCCCATCCCC ACCCATCAT
-CCCCACCCCC GCCAATCAC
-'
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> phylipInterleavedProtein [
-
-	^ ' 5 176
-cox2_leitaMAFILSFWMI FLLDSVIVLL SFVCFVCVWI CALLFSTVLL VSKLNNIYCT
-cox2_crifaMAFILSFWMI FLIDAVIVLL SFVCFVCIWI CSLFFSSFLL VSKINNVYCT
-cox2_bsaltMSFIISFWML FLIDSLIVLL SGAIFVCIWI CSLFFLCILF ICKLDYIFCS
-cox2_trybbMSFILTFWMI FLMDSIIVLI SFSIFLSVWI CALIIATVLT VTKINNIYCT
-cox2_tborrMLFFINQLLL LLVDTFVILE IFSLFVCVFI IVMYILFINY NIFLKNINVY
-
-WDFTASKFID VYWFTIGGMF SLGLLLRLCL LLYFGHLNFV SFDLCKVVGF
-WDFTASKFID AYWFTIGGMF VLCLLLRLCL LLYFGCLNFV SFDLCKVVGF
-WDFISAKFID LYWFTLGCLF IVCLLIRLCL LLYFSCLNFV CFDLCKCIGF
-WDFISSKFID TYWFVLGMMF ILCLLLRLCL LLYFSCINFV SFDLCKVIGF
-LDFIGSKYLD LYWFLIGIFF VIVLLIRLCL LLYYSWISLL IFDLCKIMGF
-
-QWYWVYFIFG ETTIFSNLIL ESDYMIGDLR LLQCNHVLTL LSLVIYKLWL
-QWYWVYFIFG ETTIFSNLIL ESDYLIGDLR LLQCNHVLTL LSLVIYKLWL
-QWYWVYFIFG ETTIFSNLIL ESDYLIGDLR LLQCNHVLTL LSLVIYKVWL
-QWYWVYFLFG ETTIFSNLIL ESDYLIGDLR ILQCNHVLTL LSLVIYKLWV
-QWYWIFFVFK ENVIFSNLLI ESDYWIGDLR LLQCNNTFNL ICLVVYKIWV
-
-SAVDVIHSFA ISSLGVKVEN LVAVMK
-SAVDVIHSFA VSSLGIKVDC IPGRCN
-SAIDVIHSFT LANLGIKVD? ?PGRCN
-SAVDVIHSFT ISSLGIKVEN PGRCNE
-TSIDVIHSFT ISTLGIKIDC IPGRCN
-'
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> speciesDNALineTokenizer [
-
-	^ BioPhylipParser new speciesDNALineTokenizer 
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> speciesDNANamedBlockTokenizer [
-
-	^ BioPhylipParser new speciesDNANamedBlockTokenizer 
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeDNASpeciesBlock01 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| speciesBlock expectedResult firstRecord |
-	
-	speciesBlock := 'Archaeopt CGATGCTTAC CGC
-HesperorniCGTTACTCGT TGT
-BaluchitheTAATGTTAAT TGT
-B. virginiTAATGTTCGT TGT
-BrontosaurCAAAACCCAT CAT
-B.subtilisGGCAGCCAAT CAC'.
-	expectedResult :=  #(#('Archaeopt ' 'CGATGCTTAC CGC' nil) #('Hesperorni' 'CGTTACTCGT TGT' nil) #('Baluchithe' 'TAATGTTAAT TGT' nil) #('B. virgini' 'TAATGTTCGT TGT' nil) #('Brontosaur' 'CAAAACCCAT CAT' nil)).
-	
-	parseResult := self speciesDNANamedBlockTokenizer parse: speciesBlock.
-	firstRecord := parseResult first.
-	
-	self assert: firstRecord first equals: 'Archaeopt '.
-	self assert: firstRecord second equals: 'CGATGCTTAC CGC'.
-
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeDNASpeciesBlock02 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| speciesBlock expectedResult firstRecord |
-	
-	speciesBlock := 'Archaeopt CGATGCTTAC CGC
-Hes       CGTTACTCGT TGT
-BaluchitheTAATGTTAAT TGT
-B. virginiTAATGTTCGT TGT
-BrontosaurCAAAACCCAT CAT
-B.subtilisGGCAGCCAAT CAC'.
-	expectedResult :=  #(
-		#('Archaeopt ' 'CGATGCTTAC CGC') 
-		#('Hesperorni' 'CGTTACTCGT TGT') 
-		#('Baluchithe' 'TAATGTTAAT TGT') 
-		#('B. virgini' 'TAATGTTCGT TGT') 
-		#('Brontosaur' 'CAAAACCCAT CAT')).
-
-	parseResult := self speciesDNANamedBlockTokenizer parse: speciesBlock.
-	firstRecord := parseResult first.
-	
-	self assert: firstRecord first equals: 'Archaeopt '.
-	self assert: firstRecord second equals: 'CGATGCTTAC CGC'.
-
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeDNASpeciesLine01 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| speciesLineBlock expectedResult |
-	
-	speciesLineBlock := 'Archaeopt CGATGCTTAC CGC'.
-	expectedResult := #('Archaeopt ' 'CGATGCTTACCGC').
-	parseResult := self speciesDNALineTokenizer parse: speciesLineBlock.
-
-	self assert: (parseResult bioHasEqualElements: expectedResult ).
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeDNASpeciesLine02 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| speciesLineBlock expectedResult |
-	
-	speciesLineBlock := 'Archaeopt CGATGCTTACCGC'.	
-	expectedResult := #('Archaeopt ' 'CGATGCTTACCGC').
-	parseResult := self speciesDNALineTokenizer parse: speciesLineBlock.
-		
-	self assert: (parseResult bioHasEqualElements: expectedResult).
-
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeDNASpeciesLine03 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| speciesLineBlock expectedResult |
-	
-	speciesLineBlock := 'B. virginiTAATGTTCGT TGT'.
-	expectedResult := #('B. virgini' 'TAATGTTCGTTGT').
-	parseResult := self speciesDNALineTokenizer parse: speciesLineBlock.
-
-	self assert: (parseResult bioHasEqualElements: expectedResult).
-
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeFirstLine01 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| firstLine |
-	
-	firstLine := '6   13
-'.
-	parseResult := self firstLineTokenizer parse: firstLine.
-	self assert: (parseResult bioHasEqualElements: #('6' '13') ).
-
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeFirstLine02 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| firstLine |
-	
-	firstLine := '      6   13
-'.
-	parseResult := self firstLineTokenizer parse: firstLine.
-	self assert: (parseResult bioHasEqualElements: #('6' '13') ).
-
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeFirstLine03 [
-	" Private - Answer a <String> with a sample phylip DNA "
-	
-	| firstLine |
-	
-	firstLine := '6   13    
-'.
-	parseResult := self firstLineTokenizer parse: firstLine.
-	self assert: (parseResult bioHasEqualElements: #('6' '13') ).
-
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeInterleavedDNA [
-	" Private - Answer a <String> with a sample phylip DNA "
-
-	| phylipString |
-	phylipString := self phylipInterleavedDNA.
-
-	parseResult := BioParser tokenizePhylipInterleavedDNA: phylipString.
-	self assert: parseResult size equals: 4.
-	self assert: parseResult first equals: 6.
-	self assert: parseResult second equals: 13.
-	self assert: (parseResult third bioHasEqualElements:
-			 #( 'Archaeopt ' 'Hesperorni' 'Baluchithe' 'B. virgini'
-			    'Brontosaur' 'B.subtilis' )).
-	self assert: (parseResult fourth bioHasEqualElements:
-			 #( 'CGATGCTTACCGCCGATGCTTACCGCCGATGCTTACCGCCCCCGCCCCCGCTTACCGC'
-			    'CGTTACTCGTTGTCGTTACTCGTTGTCGTTACTCGTTGTCCCCGTCCCCACTCGTTGT'
-			    'TAATGTTAATTGTTAATGTTAATTGTTAATGTTAATTGTCCCCGTCCCCGTTAATTGT'
-			    'TAATGTTCGTTGTTAATGTTCGTTGTTAATGTTCGTTGTCCCCGTCCCCGTTCGTTGT'
-			    'CAAAACCCATCATCAAAACCCATCATCAAAACCCATCATCCCCATCCCCACCCATCAT'
-			    'GGCAGCCAATCACGGCAGCCAATCACGGCAGCCAATCACCCCCACCCCCGCCAATCAC' ))
-]
-
-{ #category : 'testing' }
-BioPhylipParserTest >> testTokenizeInterleavedProtein [
-
-	| phylipString |
-	phylipString := self phylipInterleavedProtein.
-	parseResult := BioParser tokenizePhylipInterleavedProtein: phylipString.
+{ #category : 'as yet unclassified' }
+BioPhylipParserTest >> testAmbiguousDNAAlphabetDetection [
+	"Note: Ambiguous DNA codes like N, R, Y overlap with amino acid codes.
+	 BioSmalltalk detects such sequences as protein alphabet by default.
+	 This test verifies that sequences are still created correctly."
+	| phylip aln seq |
+	phylip := '2 10
+Seq1 AACGTGGNNA
+Seq2 CCGTATGGNN
+'.
+	aln := BioPhylipParser parseString: phylip.
+	seq := aln sequences first.
+	"Sequence is created and can be used regardless of alphabet detection"
+	self assert: seq size equals: 10.
+	self assert: (seq asString includesSubstring: 'NN')
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testAsPhylipStringRelaxed [
+
+	| phylip aln output |
+	phylip := '3 10
+Homo_sapiens AACGTGGCCA
+Pan_troglodytes CCGTATGGCC
+Gorilla GGCTTTGACC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	output := BioPhylipParser new asPhylipStringRelaxed: aln.
+	self assert: (output includesSubstring: '3 10').
+	self assert: (output includesSubstring: 'Homo_sapiens').
+	self assert: (output includesSubstring: 'AACGTGGCCA')
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testAutoDetectsInterleaved [
+
+	| phylip aln |
+	phylip := '2 20
+S1 ATGCTAGCTA
+S2 CCGCTAGCTA
+GCTAGCTAGC
+GCTAGCTAGC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 2.
+	self assert: aln numberOfBases equals: 20
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testAutoDetectsSequential [
+
+	| phylip aln |
+	"Sequential: taxon data on consecutive lines, continuation lines have no name"
+	phylip := '2 20
+S1 ATGC
+GCTAGCTAGCTAGCTA
+S2 CCGC
+TAGCTAGCTAGCTAGC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 2.
+	self assert: aln numberOfBases equals: 20
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testClassicStrict5Taxa [
+
+	| phylip aln |
+	phylip := '5 13
+Alpha     AACGTGGCCACAT
+Beta      AAGGTCGCCACAC
+Gamma     CAGTTCGCCACAA
+Delta     GAGATTTCCGCCT
+Epsilon   GAGATCTCCGCCC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 5.
+	self assert: aln numberOfBases equals: 13.
+	self assert: (aln sequenceNames includes: 'Alpha').
+	self assert: (aln sequenceNames includes: 'Epsilon').
+	self assert: aln sequences first asString equals: 'AACGTGGCCACAT'.
+	self assert: aln sequences last asString equals: 'GAGATCTCCGCCC'
+]
+
+{ #category : 'as yet unclassified' }
+BioPhylipParserTest >> testDNAAlphabetDetection [
+	| phylip aln seq |
+	phylip := '2 10
+Seq1 AACGTGGCCA
+Seq2 CCGTATGGCA
+'.
+	aln := BioPhylipParser parseString: phylip.
+	seq := aln sequences first.
+	self assert: (seq alphabet class name) equals: #BioIUPACUnambiguousDNA
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testGapsAndAmbiguityCodes [
+
+	| phylip aln |
+	phylip := '3 10
+Seq1 AAC-GG??TN
+Seq2 CCN-AT???K
+Seq3 GGRYY??-KM
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 3.
+	self assert: aln numberOfBases equals: 10.
+	self assert: aln sequences first asString equals: 'AAC-GG??TN'
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testInterleavedWithBlankLines [
+
+	| phylip aln |
+	phylip := '3 40
+Taxon1    ATGCTAGCTAGCTAGCTAGC
+Taxon2    CCGCTAGCTAGCTAGCTAGC
+Taxon3    GGGCTAGCTAGCTAGCTAGC
+
+TAGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 3.
+	self assert: aln numberOfBases equals: 40
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testMinimalFile [
+
+	| phylip aln |
+	phylip := '2 1
+A T
+B G
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 2.
+	self assert: aln numberOfBases equals: 1.
+	self assert: aln sequences first asString equals: 'T'.
+	self assert: aln sequences last asString equals: 'G'
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testParserProperties [
+
+	| phylip aln |
+	phylip := '3 10
+Taxon1 AACGTGGCCA
+Taxon2 CCGTATGGCC
+Taxon3 GGCTTTGACC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 3.
+	self assert: aln numberOfBases equals: 10.
+	self assert: (aln sequenceNames includes: 'Taxon1').
+	self assert: (aln sequenceNames includes: 'Taxon3')
+]
+
+{ #category : 'as yet unclassified' }
+BioPhylipParserTest >> testProteinAlphabetDetection [
+	| phylip aln seq |
+	phylip := '2 15
+Human   MVKQLEARKRPEQQE
+Mouse   MVKQLEARHRPEQQK
+'.
+	aln := BioPhylipParser parseString: phylip.
+	seq := aln sequences first.
+	self assert: (seq alphabet class name) equals: #BioIUPACProtein
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testProteinSequences [
 
-	self assert: parseResult size equals: 4.
-	self assert: parseResult first equals: 5.
-	self assert: parseResult second equals: 176.
-	self assert: (parseResult third bioHasEqualElements:
-			 #( 'cox2_leita' 'cox2_crifa' 'cox2_bsalt' 'cox2_trybb'
-			    'cox2_tborr' )).
-	self assert: (parseResult fourth bioHasEqualElements:
-			 #( 'MAFILSFWMIFLLDSVIVLLSFVCFVCVWICALLFSTVLLVSKLNNIYCTWDFTASKFIDVYWFTIGGMFSLGLLLRLCLLLYFGHLNFVSFDLCKVVGFQWYWVYFIFGETTIFSNLILESDYMIGDLRLLQCNHVLTLLSLVIYKLWLSAVDVIHSFAISSLGVKVENLVAVMK'
-			    'MAFILSFWMIFLIDAVIVLLSFVCFVCIWICSLFFSSFLLVSKINNVYCTWDFTASKFIDAYWFTIGGMFVLCLLLRLCLLLYFGCLNFVSFDLCKVVGFQWYWVYFIFGETTIFSNLILESDYLIGDLRLLQCNHVLTLLSLVIYKLWLSAVDVIHSFAVSSLGIKVDCIPGRCN'
-			    'MSFIISFWMLFLIDSLIVLLSGAIFVCIWICSLFFLCILFICKLDYIFCSWDFISAKFIDLYWFTLGCLFIVCLLIRLCLLLYFSCLNFVCFDLCKCIGFQWYWVYFIFGETTIFSNLILESDYLIGDLRLLQCNHVLTLLSLVIYKVWLSAIDVIHSFTLANLGIKVD??PGRCN'
-			    'MSFILTFWMIFLMDSIIVLISFSIFLSVWICALIIATVLTVTKINNIYCTWDFISSKFIDTYWFVLGMMFILCLLLRLCLLLYFSCINFVSFDLCKVIGFQWYWVYFLFGETTIFSNLILESDYLIGDLRILQCNHVLTLLSLVIYKLWVSAVDVIHSFTISSLGIKVENPGRCNE'
-			    'MLFFINQLLLLLVDTFVILEIFSLFVCVFIIVMYILFINYNIFLKNINVYLDFIGSKYLDLYWFLIGIFFVIVLLIRLCLLLYYSWISLLIFDLCKIMGFQWYWIFFVFKENVIFSNLLIESDYWIGDLRLLQCNNTFNLICLVVYKIWVTSIDVIHSFTISTLGIKIDCIPGRCN' ))
+	| phylip aln |
+	"Protein sequences with amino acid codes"
+	phylip := '3 10
+Human MVKQLEARKR
+Mouse MVKQLEARHR
+Chicken GGCTTTGACC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 3.
+	self assert: aln numberOfBases equals: 10.
+	self assert: (aln sequenceNames includes: 'Human')
+]
+
+{ #category : 'as yet unclassified' }
+BioPhylipParserTest >> testRNAAlphabetDetection [
+	| phylip aln seq |
+	phylip := '2 10
+Seq1 AACGUGGUUU
+Seq2 CCGUAUGGAU
+'.
+	aln := BioPhylipParser parseString: phylip.
+	seq := aln sequences first.
+	self assert: (seq alphabet class name) equals: #BioIUPACUnambiguousRNA
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testRelaxedInterleaved [
+
+	| phylip aln |
+	phylip := '3 40
+Homo_sapiens ATGCTAGCTAGCTAGCTAGC
+Pan_troglodytes CCGCTAGCTAGCTAGCTAGC
+Gorilla_gorilla GGGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 3.
+	self assert: aln numberOfBases equals: 40.
+	self assert: (aln sequenceNames includes: 'Homo_sapiens').
+	self assert: (aln sequenceNames includes: 'Gorilla_gorilla')
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testRelaxedSequential [
+
+	| phylip aln |
+	phylip := '4 20
+Homo_sapiens AACGTGGCCACATACGTGGC
+Pan_troglodytes AAGGTCGCCACACAAGGTCC
+Gorilla_gorilla CAGTTCGCCACAACAGTTCC
+Pongo_abelii GAGATTTCCGCCTGAGATTT
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 4.
+	self assert: aln numberOfBases equals: 20.
+	self assert: (aln sequenceNames includes: 'Homo_sapiens').
+	self assert: (aln sequenceNames includes: 'Pongo_abelii')
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testRoundTripStrict [
+
+	| phylip aln output |
+	phylip := '3 10
+Taxon1   AACGTGGCCA
+Taxon2   CCGTATGGCC
+Taxon3   GGCTTTGACC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	output := BioPhylipParser new asPhylipStringStrict: aln.
+	self assert: (output includesSubstring: '3 10').
+	self assert: (output includesSubstring: 'Taxon1').
+	self assert: (output includesSubstring: 'AACGTGGCCA')
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testSequentialWithWrapping [
+
+	| phylip aln |
+	phylip := '3 40
+Taxon1    ATGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+Taxon2    CCGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+Taxon3    GGGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 3.
+	self assert: aln numberOfBases equals: 40.
+	self assert: aln sequences first asString size equals: 40
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testSpacesInSequences [
+
+	| phylip aln |
+	phylip := '2 10
+Seq1 AACGTGGCCA
+Seq2 CCGTATGGCC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 2.
+	self assert: aln numberOfBases equals: 10.
+	self assert: aln sequences first asString equals: 'AACGTGGCCA'
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testStrictInterleaved [
+
+	| phylip aln |
+	phylip := '3 40
+Taxon1    ATGCTAGCTAGCTAGCTAGC
+Taxon2    CCGCTAGCTAGCTAGCTAGC
+Taxon3    GGGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+TAGCTAGCTAGCTAGCTAGC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 3.
+	self assert: aln numberOfBases equals: 40.
+	self
+		assert: aln sequenceNames asArray
+		equals: #( 'Taxon1' 'Taxon2' 'Taxon3' ).
+	self
+		assert: aln sequences first asString
+		equals: 'ATGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC'
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testStrictSequential [
+
+	| phylip aln |
+	phylip := '5 13
+Alpha     AACGTGGCCACAT
+Beta      AAGGTCGCCACAC
+Gamma     CAGTTCGCCACAA
+Delta     GAGATTTCCGCCT
+Epsilon   GAGATCTCCGCCC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 5.
+	self assert: aln numberOfBases equals: 13.
+	self assert: (aln sequenceNames includes: 'Alpha').
+	self assert: (aln sequenceNames includes: 'Epsilon').
+	self assert: aln sequences first asString equals: 'AACGTGGCCACAT'
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testThreeBlockInterleaved [
+
+	| phylip aln |
+	phylip := '2 60
+S1 ATGCTAGCTAGCTAGCTAGCTAGCTAGCTA
+S2 ATGCTAGCTAGCTAGCTAGCTAGCTAGCTA
+GCTAGCTAGCTAGCTAGCTAGCTAGCTAGC
+GCTAGCTAGCTAGCTAGCTAGCTAGCTAGC
+'.
+	aln := BioPhylipParser parseString: phylip.
+	self assert: aln size equals: 2.
+	self assert: aln numberOfBases equals: 60.
+	self assert: aln sequences first asString size equals: 60
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testValidatorRejectsWrongSeqLength [
+
+	| phylip |
+	phylip := '2 10
+Seq1 AACGTG
+Seq2 CCGTAT
+'.
+	self should: [ BioPhylipParser parseString: phylip ] raise: Error
+]
+
+{ #category : 'tests' }
+BioPhylipParserTest >> testValidatorRejectsWrongTaxaCount [
+
+	| phylip |
+	phylip := '3 10
+Seq1 AACGTGGCCA
+Seq2 CCGTATGGCC
+'.
+	self should: [ BioPhylipParser parseString: phylip ] raise: Error
 ]
diff --git a/repository/BioParsers-Tests/BioPhylipPetitParserTest.class.st b/repository/BioParsers-Tests/BioPhylipPetitParserTest.class.st
new file mode 100644
index 00000000..9975e9b2
--- /dev/null
+++ b/repository/BioParsers-Tests/BioPhylipPetitParserTest.class.st
@@ -0,0 +1,254 @@
+Class {
+	#name : 'BioPhylipPetitParserTest',
+	#superclass : 'BioAbstractParserTest',
+	#category : 'BioParsers-Tests',
+	#package : 'BioParsers-Tests'
+}
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> firstLineTokenizer [
+
+	^ BioPhylipParser new firstLineTokenizer 
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> phylipInterleavedDNA [
+
+	^ '  6   13    
+Archaeopt CGATGCTTAC CGCCGATGCT
+HesperorniCGTTACTCGT TGTCGTTACT
+BaluchitheTAATGTTAAT TGTTAATGTT
+B. virginiTAATGTTCGT TGTTAATGTT
+BrontosaurCAAAACCCAT CATCAAAACC
+B.subtilisGGCAGCCAAT CACGGCAGCC
+
+TACCGCCGAT GCTTACCGC
+CGTTGTCGTT ACTCGTTGT
+AATTGTTAAT GTTAATTGT
+CGTTGTTAAT GTTCGTTGT
+CATCATCAAA ACCCATCAT
+AATCACGGCA GCCAATCAC
+
+CCCCGCCCCC GCTTACCGC
+CCCCGTCCCC ACTCGTTGT
+CCCCGTCCCC GTTAATTGT
+CCCCGTCCCC GTTCGTTGT
+CCCCATCCCC ACCCATCAT
+CCCCACCCCC GCCAATCAC
+'
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> phylipInterleavedProtein [
+
+	^ ' 5 176
+cox2_leitaMAFILSFWMI FLLDSVIVLL SFVCFVCVWI CALLFSTVLL VSKLNNIYCT
+cox2_crifaMAFILSFWMI FLIDAVIVLL SFVCFVCIWI CSLFFSSFLL VSKINNVYCT
+cox2_bsaltMSFIISFWML FLIDSLIVLL SGAIFVCIWI CSLFFLCILF ICKLDYIFCS
+cox2_trybbMSFILTFWMI FLMDSIIVLI SFSIFLSVWI CALIIATVLT VTKINNIYCT
+cox2_tborrMLFFINQLLL LLVDTFVILE IFSLFVCVFI IVMYILFINY NIFLKNINVY
+
+WDFTASKFID VYWFTIGGMF SLGLLLRLCL LLYFGHLNFV SFDLCKVVGF
+WDFTASKFID AYWFTIGGMF VLCLLLRLCL LLYFGCLNFV SFDLCKVVGF
+WDFISAKFID LYWFTLGCLF IVCLLIRLCL LLYFSCLNFV CFDLCKCIGF
+WDFISSKFID TYWFVLGMMF ILCLLLRLCL LLYFSCINFV SFDLCKVIGF
+LDFIGSKYLD LYWFLIGIFF VIVLLIRLCL LLYYSWISLL IFDLCKIMGF
+
+QWYWVYFIFG ETTIFSNLIL ESDYMIGDLR LLQCNHVLTL LSLVIYKLWL
+QWYWVYFIFG ETTIFSNLIL ESDYLIGDLR LLQCNHVLTL LSLVIYKLWL
+QWYWVYFIFG ETTIFSNLIL ESDYLIGDLR LLQCNHVLTL LSLVIYKVWL
+QWYWVYFLFG ETTIFSNLIL ESDYLIGDLR ILQCNHVLTL LSLVIYKLWV
+QWYWIFFVFK ENVIFSNLLI ESDYWIGDLR LLQCNNTFNL ICLVVYKIWV
+
+SAVDVIHSFA ISSLGVKVEN LVAVMK
+SAVDVIHSFA VSSLGIKVDC IPGRCN
+SAIDVIHSFT LANLGIKVD? ?PGRCN
+SAVDVIHSFT ISSLGIKVEN PGRCNE
+TSIDVIHSFT ISTLGIKIDC IPGRCN
+'
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> speciesDNALineTokenizer [
+
+	^ BioPhylipParser new speciesDNALineTokenizer 
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> speciesDNANamedBlockTokenizer [
+
+	^ BioPhylipParser new speciesDNANamedBlockTokenizer 
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeDNASpeciesBlock01 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| speciesBlock expectedResult firstRecord |
+	
+	speciesBlock := 'Archaeopt CGATGCTTAC CGC
+HesperorniCGTTACTCGT TGT
+BaluchitheTAATGTTAAT TGT
+B. virginiTAATGTTCGT TGT
+BrontosaurCAAAACCCAT CAT
+B.subtilisGGCAGCCAAT CAC'.
+	expectedResult :=  #(#('Archaeopt ' 'CGATGCTTAC CGC' nil) #('Hesperorni' 'CGTTACTCGT TGT' nil) #('Baluchithe' 'TAATGTTAAT TGT' nil) #('B. virgini' 'TAATGTTCGT TGT' nil) #('Brontosaur' 'CAAAACCCAT CAT' nil)).
+	
+	parseResult := self speciesDNANamedBlockTokenizer parse: speciesBlock.
+	firstRecord := parseResult first.
+	
+	self assert: firstRecord first equals: 'Archaeopt '.
+	self assert: firstRecord second equals: 'CGATGCTTAC CGC'.
+
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeDNASpeciesBlock02 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| speciesBlock expectedResult firstRecord |
+	
+	speciesBlock := 'Archaeopt CGATGCTTAC CGC
+Hes       CGTTACTCGT TGT
+BaluchitheTAATGTTAAT TGT
+B. virginiTAATGTTCGT TGT
+BrontosaurCAAAACCCAT CAT
+B.subtilisGGCAGCCAAT CAC'.
+	expectedResult :=  #(
+		#('Archaeopt ' 'CGATGCTTAC CGC') 
+		#('Hesperorni' 'CGTTACTCGT TGT') 
+		#('Baluchithe' 'TAATGTTAAT TGT') 
+		#('B. virgini' 'TAATGTTCGT TGT') 
+		#('Brontosaur' 'CAAAACCCAT CAT')).
+
+	parseResult := self speciesDNANamedBlockTokenizer parse: speciesBlock.
+	firstRecord := parseResult first.
+	
+	self assert: firstRecord first equals: 'Archaeopt '.
+	self assert: firstRecord second equals: 'CGATGCTTAC CGC'.
+
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeDNASpeciesLine01 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| speciesLineBlock expectedResult |
+	
+	speciesLineBlock := 'Archaeopt CGATGCTTAC CGC'.
+	expectedResult := #('Archaeopt ' 'CGATGCTTACCGC').
+	parseResult := self speciesDNALineTokenizer parse: speciesLineBlock.
+
+	self assert: (parseResult bioHasEqualElements: expectedResult ).
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeDNASpeciesLine02 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| speciesLineBlock expectedResult |
+	
+	speciesLineBlock := 'Archaeopt CGATGCTTACCGC'.	
+	expectedResult := #('Archaeopt ' 'CGATGCTTACCGC').
+	parseResult := self speciesDNALineTokenizer parse: speciesLineBlock.
+		
+	self assert: (parseResult bioHasEqualElements: expectedResult).
+
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeDNASpeciesLine03 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| speciesLineBlock expectedResult |
+	
+	speciesLineBlock := 'B. virginiTAATGTTCGT TGT'.
+	expectedResult := #('B. virgini' 'TAATGTTCGTTGT').
+	parseResult := self speciesDNALineTokenizer parse: speciesLineBlock.
+
+	self assert: (parseResult bioHasEqualElements: expectedResult).
+
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeFirstLine01 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| firstLine |
+	
+	firstLine := '6   13
+'.
+	parseResult := self firstLineTokenizer parse: firstLine.
+	self assert: (parseResult bioHasEqualElements: #('6' '13') ).
+
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeFirstLine02 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| firstLine |
+	
+	firstLine := '      6   13
+'.
+	parseResult := self firstLineTokenizer parse: firstLine.
+	self assert: (parseResult bioHasEqualElements: #('6' '13') ).
+
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeFirstLine03 [
+	" Private - Answer a <String> with a sample phylip DNA "
+	
+	| firstLine |
+	
+	firstLine := '6   13    
+'.
+	parseResult := self firstLineTokenizer parse: firstLine.
+	self assert: (parseResult bioHasEqualElements: #('6' '13') ).
+
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeInterleavedDNA [
+	" Private - Answer a <String> with a sample phylip DNA "
+
+	| phylipString |
+	phylipString := self phylipInterleavedDNA.
+
+	parseResult := BioParser tokenizePhylipInterleavedDNA: phylipString.
+	self assert: parseResult size equals: 4.
+	self assert: parseResult first equals: 6.
+	self assert: parseResult second equals: 13.
+	self assert: (parseResult third bioHasEqualElements:
+			 #( 'Archaeopt ' 'Hesperorni' 'Baluchithe' 'B. virgini'
+			    'Brontosaur' 'B.subtilis' )).
+	self assert: (parseResult fourth bioHasEqualElements:
+			 #( 'CGATGCTTACCGCCGATGCTTACCGCCGATGCTTACCGCCCCCGCCCCCGCTTACCGC'
+			    'CGTTACTCGTTGTCGTTACTCGTTGTCGTTACTCGTTGTCCCCGTCCCCACTCGTTGT'
+			    'TAATGTTAATTGTTAATGTTAATTGTTAATGTTAATTGTCCCCGTCCCCGTTAATTGT'
+			    'TAATGTTCGTTGTTAATGTTCGTTGTTAATGTTCGTTGTCCCCGTCCCCGTTCGTTGT'
+			    'CAAAACCCATCATCAAAACCCATCATCAAAACCCATCATCCCCATCCCCACCCATCAT'
+			    'GGCAGCCAATCACGGCAGCCAATCACGGCAGCCAATCACCCCCACCCCCGCCAATCAC' ))
+]
+
+{ #category : 'testing' }
+BioPhylipPetitParserTest >> testTokenizeInterleavedProtein [
+
+	| phylipString |
+	phylipString := self phylipInterleavedProtein.
+	parseResult := BioParser tokenizePhylipInterleavedProtein: phylipString.
+
+	self assert: parseResult size equals: 4.
+	self assert: parseResult first equals: 5.
+	self assert: parseResult second equals: 176.
+	self assert: (parseResult third bioHasEqualElements:
+			 #( 'cox2_leita' 'cox2_crifa' 'cox2_bsalt' 'cox2_trybb'
+			    'cox2_tborr' )).
+	self assert: (parseResult fourth bioHasEqualElements:
+			 #( 'MAFILSFWMIFLLDSVIVLLSFVCFVCVWICALLFSTVLLVSKLNNIYCTWDFTASKFIDVYWFTIGGMFSLGLLLRLCLLLYFGHLNFVSFDLCKVVGFQWYWVYFIFGETTIFSNLILESDYMIGDLRLLQCNHVLTLLSLVIYKLWLSAVDVIHSFAISSLGVKVENLVAVMK'
+			    'MAFILSFWMIFLIDAVIVLLSFVCFVCIWICSLFFSSFLLVSKINNVYCTWDFTASKFIDAYWFTIGGMFVLCLLLRLCLLLYFGCLNFVSFDLCKVVGFQWYWVYFIFGETTIFSNLILESDYLIGDLRLLQCNHVLTLLSLVIYKLWLSAVDVIHSFAVSSLGIKVDCIPGRCN'
+			    'MSFIISFWMLFLIDSLIVLLSGAIFVCIWICSLFFLCILFICKLDYIFCSWDFISAKFIDLYWFTLGCLFIVCLLIRLCLLLYFSCLNFVCFDLCKCIGFQWYWVYFIFGETTIFSNLILESDYLIGDLRLLQCNHVLTLLSLVIYKVWLSAIDVIHSFTLANLGIKVD??PGRCN'
+			    'MSFILTFWMIFLMDSIIVLISFSIFLSVWICALIIATVLTVTKINNIYCTWDFISSKFIDTYWFVLGMMFILCLLLRLCLLLYFSCINFVSFDLCKVIGFQWYWVYFLFGETTIFSNLILESDYLIGDLRILQCNHVLTLLSLVIYKLWVSAVDVIHSFTISSLGIKVENPGRCNE'
+			    'MLFFINQLLLLLVDTFVILEIFSLFVCVFIIVMYILFINYNIFLKNINVYLDFIGSKYLDLYWFLIGIFFVIVLLIRLCLLLYYSWISLLIFDLCKIMGFQWYWIFFVFKENVIFSNLLIESDYWIGDLRLLQCNNTFNLICLVVYKIWVTSIDVIHSFTISTLGIKIDCIPGRCN' ))
+]
diff --git a/repository/BioParsers/BioPhylipParser.class.st b/repository/BioParsers/BioPhylipParser.class.st
index 46e0ce8a..bea56d9f 100644
--- a/repository/BioParsers/BioPhylipParser.class.st
+++ b/repository/BioParsers/BioPhylipParser.class.st
@@ -1,218 +1,618 @@
-"
-Documentation taken from http://bioweb2.pasteur.fr/docs/phylip/doc/main.html#inputfiles
-
-"
 Class {
 	#name : 'BioPhylipParser',
-	#superclass : 'BioAbstractTextParser',
-	#category : 'BioParsers-Core',
+	#superclass : 'BioObject',
+	#instVars : [
+		'numTaxa',
+		'numChars',
+		'taxaNames',
+		'sequences',
+		'isInterleaved',
+		'isStrict',
+		'currentLine',
+		'lines',
+		'errorLine'
+	],
+	#category : 'BioParsers-PHYLIP',
 	#package : 'BioParsers',
-	#tag : 'Core'
+	#tag : 'PHYLIP'
 }
 
-{ #category : 'accessing-dna' }
-BioPhylipParser >> buildDNAResults: aCollection [
-	" Answer an identified object for the receiver's parsing output "
+{ #category : 'as yet unclassified' }
+BioPhylipParser class >> parseFile: aFileReference [
+	^ self new parseFile: aFileReference
+]
 
-	| tokenized sequences |
-	
-	tokenized := self buildTokens: aCollection.
-	sequences := tokenized third 
-			with: tokenized fourth 
-			do: [: first : snd | BioSequence newAmbiguousDNA: snd named: first ].
-	^ BioPhylip new
-		numberOfTaxa: tokenized first;
-		numberOfCharacters: tokenized second;
-		sequences: sequences.
+{ #category : 'as yet unclassified' }
+BioPhylipParser class >> parseString: aString [
+	^ self new parseString: aString
 ]
 
-{ #category : 'accessing-protein' }
-BioPhylipParser >> buildProteinResults: aCollection [
-	" Answer an identified object for the receiver's parsing output "
+{ #category : 'converting' }
+BioPhylipParser >> asAlignment [
+	"Build a BioAlignment from the parsed data.
+	 Uses BioSequence class>>newNamed:sequence: which auto-detects the alphabet."
+	| alignment |
+	alignment := BioAlignment new.
+	1 to: numTaxa do: [ :i |
+		| seq |
+		seq := BioSequence newNamed: (taxaNames at: i) sequence: (sequences at: i) asUppercase.
+		alignment addFromSequence: seq ].
+	^ alignment
+]
 
-	| tokenized sequences |
-	
-	tokenized := self buildTokens: aCollection.
-	sequences := tokenized third 
-			with: tokenized fourth 
-			do: [: first : snd | BioSequence newProtein: snd named: first ].
-	^ BioPhylip new
-		numberOfTaxa: tokenized first;
-		numberOfCharacters: tokenized second;
-		sequences: sequences.
+{ #category : 'converting' }
+BioPhylipParser >> asPhylipStringRelaxed: anAlignment [
+	"Answer a relaxed PHYLIP string from anAlignment (variable-length names, sequential)."
+
+	^ String streamContents: [ :s |
+			  s
+				  nextPutAll: anAlignment size asString;
+				  space;
+				  nextPutAll: anAlignment numberOfBases asString;
+				  cr.
+			  anAlignment sequences do: [ :seq |
+					  s
+						  nextPutAll: (seq name ifNil: [ 'Unnamed' ]);
+						  space;
+						  nextPutAll: seq asString;
+						  cr ] ]
+]
+
+{ #category : 'converting' }
+BioPhylipParser >> asPhylipStringStrict: anAlignment [
+	"Answer a strict PHYLIP string from anAlignment (10-char names, sequential)."
+
+	^ String streamContents: [ :s |
+			  s
+				  nextPutAll: anAlignment size asString;
+				  space;
+				  nextPutAll: anAlignment numberOfBases asString;
+				  cr.
+			  anAlignment sequences do: [ :seq |
+					  | name padded |
+					  name := seq name ifNil: [ 'Unnamed' ].
+					  padded := name size > 10
+						            ifTrue: [ name copyFrom: 1 to: 10 ]
+						            ifFalse: [
+						            name
+						            ,
+						            (String new: 10 - name size withAll: Character space) ].
+					  s
+						  nextPutAll: padded;
+						  nextPutAll: seq asString;
+						  cr ] ]
+]
+
+{ #category : 'private' }
+BioPhylipParser >> cleanSequence: aString [
+	"Remove whitespace from sequence data. PHYLIP allows spaces within sequences."
+
+	^ aString reject: [ :c | c isSeparator ]
 ]
 
 { #category : 'accessing' }
-BioPhylipParser >> buildTokens: aCollection [
-	" Answer a tokenized <Collection> parsing aCollection "
-
-	^ Array 
-		with: (self taxaNumberFrom: aCollection)
-		with: (aCollection first second asNumber)
-		with: (aCollection second collect: #first)
-		with: (self buildTokensFrom: aCollection).
+BioPhylipParser >> currentLine [
 	
+	^ currentLine
+]
+
+{ #category : 'accessing' }
+BioPhylipParser >> currentLine: anInt [ 
+
+	currentLine := anInt
+]
 
+{ #category : 'private' }
+BioPhylipParser >> detectFormat [
+	"Auto-detect strict/relaxed and sequential/interleaved."
+
+	| firstDataLine nameField |
+	currentLine > lines size ifTrue: [
+		Error signal: 'No data lines after header' ].
+	firstDataLine := lines at: currentLine.
+	nameField := self extractNameFromLine: firstDataLine.
+	isStrict := nameField size <= 10 and: [
+			            firstDataLine size >= 10 and: [
+					            (firstDataLine
+						             copyFrom: 1
+						             to: (10 min: firstDataLine size)) trimBoth
+					            = nameField ] ].
+	isInterleaved := self detectInterleaved
 ]
 
-{ #category : 'accessing-private' }
-BioPhylipParser >> buildTokensBlock [ 
+{ #category : 'private' }
+BioPhylipParser >> detectInterleaved [
+	"Determine if the file is interleaved or sequential.
+	 If the first taxon has numChars chars, it is sequential (or single-block).
+	 Otherwise, skip first N taxa lines, then check if the next N lines
+	 are pure sequence (=> interleaved) or include a new taxon name (=> sequential)."
+
+	| i firstSeqLen firstDataLineIdx consecutiveContLines |
+	firstDataLineIdx := currentLine.
+	[
+		firstDataLineIdx <= lines size and: [
+			(lines at: firstDataLineIdx) trimBoth isEmpty ] ] whileTrue: [
+		firstDataLineIdx := firstDataLineIdx + 1 ].
+	firstDataLineIdx > lines size ifTrue: [ ^ false ].
+	firstSeqLen := (self extractSequenceFromLine:
+		                (lines at: firstDataLineIdx)) size.
+	firstSeqLen >= numChars ifTrue: [ ^ false ].
+	i := firstDataLineIdx.
+	numTaxa timesRepeat: [
+			[ i <= lines size and: [ (lines at: i) trimBoth isEmpty ] ]
+				whileTrue: [ i := i + 1 ].
+			i := i + 1 ].
+	[ i <= lines size and: [ (lines at: i) trimBoth isEmpty ] ]
+		whileTrue: [ i := i + 1 ].
+	i > lines size ifTrue: [ ^ false ].
+	consecutiveContLines := 0.
+	[ i <= lines size and: [ consecutiveContLines < numTaxa ] ]
+		whileTrue: [
+				| trimmed |
+				trimmed := (lines at: i) trimBoth.
+				trimmed isEmpty
+					ifTrue: [ i := i + 1 ]
+					ifFalse: [
+							(self nextLineLooksLikeNewTaxon: (lines at: i)) ifTrue: [
+								^ false ].
+							consecutiveContLines := consecutiveContLines + 1.
+							i := i + 1 ] ].
+	^ consecutiveContLines >= numTaxa
+]
+
+{ #category : 'private' }
+BioPhylipParser >> detectSequenceClass [
+	"Answer the sequence class to use for creating sequences.
+	 BioSequence newNamed:sequence: auto-detects the alphabet from the sequence content."
+	^ BioSequence
+]
 
-	^ [: node | 
-		OrderedCollection
-			with: node first
-			with: (node second collect: #allButLast) 
-			with: (((node third reject: [: line | line first isEmpty ]) collect: #first) collect: #withoutBlanks ) ]
+{ #category : 'private' }
+BioPhylipParser >> detectStrictOrRelaxed [
+	"Detect whether the file uses strict (10-char names) or relaxed (variable-length names) format."
+
+	| firstDataLine nameField |
+	currentLine > lines size ifTrue: [
+			isStrict := false.
+			^ self ].
+	firstDataLine := lines at: currentLine.
+	nameField := self extractNameFromLineRelaxed: firstDataLine.
+	isStrict := nameField size <= 10 and: [
+			            firstDataLine size >= 10 and: [
+					            (firstDataLine
+						             copyFrom: 1
+						             to: (10 min: firstDataLine size)) trimBoth
+					            = nameField ] ]
 ]
 
 { #category : 'accessing' }
-BioPhylipParser >> buildTokensFrom: aCollection [
+BioPhylipParser >> errorAt: lineNum message: aString [
+	"Signal a parse error with line number context."
 
-	| taxaNumber collection seqIndex seqBlock |
+	self error:
+		'Phylip parse error at line ' , lineNum asString , ': ' , aString
+]
 
-	taxaNumber := self taxaNumberFrom: aCollection.
-	collection := self buildTokensFromFirstBlock: aCollection.
-	seqIndex := 1.
-	(seqBlock := aCollection third) doWithIndex: [:seq :index | 
-			seqIndex = (taxaNumber + 1)
-				ifTrue: [seqIndex := 1].
-			index <= seqBlock size
-				ifFalse: [ ^ collection ].
-			collection
-				at: seqIndex
-				put: (String
-						streamContents: [:str | str
-								nextPutAll: (collection at: seqIndex);
-								nextPutAll: (seqBlock at: index)]).
-			seqIndex := seqIndex + 1].
-	^ collection
+{ #category : 'private' }
+BioPhylipParser >> extractNameFromLine: aLine [
+	"Extract the taxon name from a data line.
+	 In relaxed mode: name is everything before the first whitespace.
+	 In strict mode: name is the first 10 characters (trimmed).
+	 Returns the trimmed name string."
+
+	| trimmed wsIdx |
+	isStrict ifNotNil: [
+			isStrict ifTrue: [
+					aLine size < 10 ifTrue: [ ^ aLine trimBoth ].
+					^ (aLine copyFrom: 1 to: 10) trimBoth ] ].
+	"Relaxed or unknown: name ends at first whitespace"
+	trimmed := aLine trimBoth.
+	wsIdx := self indexOfFirstWhitespaceIn: trimmed.
+	wsIdx = 0 ifTrue: [ ^ trimmed ].
+	^ trimmed copyFrom: 1 to: wsIdx - 1
+]
 
+{ #category : 'private' }
+BioPhylipParser >> extractNameFromLineRelaxed: aLine [
+	"Extract the taxon name assuming relaxed format (name ends at first whitespace)."
+
+	| trimmed wsIdx |
+	trimmed := aLine trimBoth.
+	wsIdx := self indexOfFirstWhitespaceIn: trimmed.
+	wsIdx = 0 ifTrue: [ ^ trimmed ].
+	^ trimmed copyFrom: 1 to: wsIdx - 1
+]
+
+{ #category : 'private' }
+BioPhylipParser >> extractSequenceFromLine: aLine [
+	"Extract the sequence portion from a data line (after the taxon name)."
+
+	| nameSeq nameEnd |
+	isStrict ifTrue: [
+			aLine size <= 10 ifTrue: [ ^ '' ].
+			^ self cleanSequence: (aLine copyFrom: 11 to: aLine size) ].
+	"Relaxed: find end of name, rest is sequence"
+	nameSeq := aLine trimBoth.
+	nameEnd := self indexOfFirstWhitespaceIn: nameSeq.
+	nameEnd = 0 ifTrue: [ ^ '' ].
+	^ self cleanSequence:
+		  (nameSeq copyFrom: nameEnd + 1 to: nameSeq size)
 ]
 
 { #category : 'accessing' }
-BioPhylipParser >> buildTokensFromFirstBlock: aCollection [
+BioPhylipParser >> indexOfFirstWhitespaceIn: aString [
+	"Answer the 1-based index of the first whitespace character (space or tab) in aString,
+	 or 0 if none."
 
-	^ aCollection second collect: [: seq | seq second withoutBlanks ]
+	1 to: aString size do: [ :i |
+	(aString at: i) isSeparator ifTrue: [ ^ i ] ].
+	^ 0
 ]
 
-{ #category : 'accessing-dna' }
-BioPhylipParser >> dnaInterleaveLineTokenizer [
+{ #category : 'initialization' }
+BioPhylipParser >> initialize [
 
-	^ (self dnaInterleaveSequenceTokenizer , #newline asPParser) star
+	super initialize.
+	taxaNames := OrderedCollection new.
+	sequences := OrderedCollection new.
+	currentLine := 0.
+	lines := #(  ).
+	errorLine := 0
 ]
 
-{ #category : 'accessing-dna' }
-BioPhylipParser >> dnaInterleaveSequenceTokenizer [
+{ #category : 'accessing' }
+BioPhylipParser >> isInterleaved [ ^ isInterleaved
+]
 
-	^ #dnaLetter asPParser trimBlanks star flatten
+{ #category : 'accessing' }
+BioPhylipParser >> isInterleaved: aBool [ isInterleaved := aBool
 ]
 
-{ #category : 'accessing-private' }
-BioPhylipParser >> firstLineTokenizer [
-	" Answer a Parser for parsing the first line of the format "
-	
-	^ (#number asPParser / self parserForAnyButNumber) ,
-		(self parserForAnyButNumber) ,
-		#blank asPParser plus optional flatten ,
-		#newline asPParser ==> [ : node | 
-			node asOrderedCollection 
-				removeAllSuchThat: [ : elem  | elem allSatisfy: [ : e | e = Character space ] ];
-				copyWithoutAll: {
-				Character lf asString .
-				Character cr asString } ]
-]
-
-{ #category : 'accessing-dna' }
-BioPhylipParser >> parseInterleavedDNA: aString [
-	" Answer an object with the result of parsing aString with the receiver's parser "
-	
-	| parseResults |
-	
-	parseResults := self parseString: aString.
-	^ self isSuccess
-		ifTrue: [ results := self buildDNAResults: parseResults ]
-		ifFalse: [ self signalInvalidObject: parseResults ].
+{ #category : 'testing' }
+BioPhylipParser >> isSequenceChar: c [
+	"^ true if c is a valid PHYLIP sequence character (IUPAC nucleotide/amino acid + gap)."
+
+	^ 'ACGTURYNWSMKHDVBacgturynwsmkhbdvb-?.' includes: c
 ]
 
-{ #category : 'accessing-protein' }
-BioPhylipParser >> parseInterleavedProtein: aString [
-	" Answer an object with the result of parsing aString with the receiver's parser "
-	
-	| parseResults |
-	
-	parseResults := self parseString: aString.
-	^ self isSuccess
-		ifTrue: [ results := self buildProteinResults: parseResults ]
-		ifFalse: [ self signalInvalidObject: parseResults ].
+{ #category : 'accessing' }
+BioPhylipParser >> isStrict [ ^ isStrict
 ]
 
-{ #category : 'accessing-private' }
-BioPhylipParser >> parserForAnyButNumber [
+{ #category : 'accessing' }
+BioPhylipParser >> isStrict: aBool [ isStrict := aBool
+]
 
-	^ #digit asPParser negate plus , #number asPParser ==> [: n | n second ] 
+{ #category : 'private' }
+BioPhylipParser >> lineStartsWithName: aLine [
+	"Check if aLine starts with a taxon name (vs being a pure sequence continuation line).
+	 In strict mode: the first 10 chars contain a name followed by spaces/padding.
+	 In relaxed mode: the line has a name (non-sequence-like prefix) followed by whitespace
+	 and then sequence data."
+
+	| trimmed wsIdx potentialName first10 |
+	trimmed := aLine trimBoth.
+	trimmed isEmpty ifTrue: [ ^ false ].
+	isStrict ifTrue: [
+			trimmed size < 10 ifTrue: [ ^ true ]. "Short lines in strict mode must be names"
+			first10 := (trimmed copyFrom: 1 to: 10) trimBoth.
+			^ (self looksLikeSequence: first10) not ].
+	"Relaxed: check for name + whitespace + sequence pattern"
+	wsIdx := self indexOfFirstWhitespaceIn: trimmed.
+	wsIdx = 0 ifTrue: [ ^ false ]. "No whitespace = pure sequence continuation"
+	wsIdx > 50 ifTrue: [ ^ false ]. "Whitespace too far in = sequence not name"
+	potentialName := trimmed copyFrom: 1 to: wsIdx - 1.
+	^ (self looksLikeSequence: potentialName) not
 ]
 
-{ #category : 'accessing-dna' }
-BioPhylipParser >> speciesDNALineTokenizer [
-	" Answer a Parser for parsing the species names line "
-	
-	^ ((PP2PredicateObjectNode noneOf: self speciesFobiddenNames) times: 10) flatten ,
-		self dnaInterleaveSequenceTokenizer
+{ #category : 'accessing' }
+BioPhylipParser >> lines: anArray [ lines := anArray
 ]
 
-{ #category : 'accessing-dna' }
-BioPhylipParser >> speciesDNANamedBlockTokenizer [
-	" Answer a Parser for parsing the sequence blocks "
-	
-	^ (self speciesDNALineTokenizer , #newline asPParser flatten) star
+{ #category : 'private' }
+BioPhylipParser >> looksLikeSequence: aString [
+	"Check if aString looks like sequence data (mostly IUPAC characters)."
+
+	| seqChars totalChars ratio |
+	totalChars := aString size.
+	totalChars = 0 ifTrue: [ ^ false ].
+	seqChars := 0.
+	aString do: [ :c |
+		(self isSequenceChar: c) ifTrue: [ seqChars := seqChars + 1 ] ].
+	ratio := seqChars / totalChars.
+	^ ratio > 0.9
 ]
 
-{ #category : 'accessing-private' }
-BioPhylipParser >> speciesFobiddenNames [
-	" Private - Answer a <Collection> with receiver's not allowed Characters in a species name "
-	
-	^ OrderedCollection new
-		add: Character cr;
-		add: Character lf;
-		add: $[;
-		add: $];
-		add: $(;
-		add: $);
-		add: $:;
-		add: $;;
-		add: $,;
-		yourself
+{ #category : 'private' }
+BioPhylipParser >> nextLineLooksLikeContinuation: aLine [
+	"Check if aLine looks like a sequence continuation (no name prefix)."
+
+	| trimmed wsIdx potentialName first10 |
+	trimmed := aLine trimBoth.
+	trimmed isEmpty ifTrue: [ ^ true ].
+	isStrict ifTrue: [
+			trimmed size >= 10 ifTrue: [
+					first10 := (trimmed copyFrom: 1 to: 10) trimBoth.
+					first10 isEmpty ifTrue: [ ^ true ].
+					^ self looksLikeSequence: first10 ].
+			^ true ].
+	"Relaxed: check if line starts with a name"
+	wsIdx := self indexOfFirstWhitespaceIn: trimmed.
+	wsIdx = 0 ifTrue: [ ^ true ].
+	wsIdx > 30 ifTrue: [ ^ true ].
+	potentialName := trimmed copyFrom: 1 to: wsIdx - 1.
+	^ self looksLikeSequence: potentialName
+]
+
+{ #category : 'private' }
+BioPhylipParser >> nextLineLooksLikeNewTaxon: aLine [
+	"Check if aLine looks like a new taxon name + sequence (vs pure sequence continuation).
+	 Returns true if it looks like a new taxon (=> interleaved).
+	 Returns false if it looks like continuation (=> sequential)."
+
+	| trimmed wsIdx potentialName first10 |
+	trimmed := aLine trimBoth.
+	trimmed isEmpty ifTrue: [ ^ false ].
+	isStrict ifTrue: [ "Strict: first 10 chars should be a name (not sequence chars)"
+			trimmed size < 10 ifTrue: [ ^ self looksLikeSequence: trimmed not ].
+			first10 := (trimmed copyFrom: 1 to: 10) trimBoth.
+			first10 isEmpty ifTrue: [ ^ false ].
+			^ (self looksLikeSequence: first10) not ].
+	"Relaxed: name ends at first whitespace, check if it looks like a name"
+	wsIdx := self indexOfFirstWhitespaceIn: trimmed.
+	wsIdx = 0 ifTrue: [ ^ false ]. "No whitespace = pure sequence, not a name"
+	wsIdx > 30 ifTrue: [ ^ false ]. "Whitespace very far in = probably sequence not name"
+	potentialName := trimmed copyFrom: 1 to: wsIdx - 1.
+	^ (self looksLikeSequence: potentialName) not
 ]
 
 { #category : 'accessing' }
-BioPhylipParser >> taxaNumberFrom: aCollection [
+BioPhylipParser >> numChars [ ^ numChars
+]
 
-	^ aCollection first first asNumber
+{ #category : 'accessing' }
+BioPhylipParser >> numTaxa [ ^ numTaxa
 ]
 
-{ #category : 'accessing-dna' }
-BioPhylipParser >> tokenizeInterleavedDNA [
-	" Private - Tokenize the receiver's epression as DNA data "
+{ #category : 'parsing' }
+BioPhylipParser >> parseAsInterleaved: aString [
+	"Parse assuming interleaved format."
+
+	self initialize.
+	lines := (aString lines select: [ :l | l notEmpty ]) asArray.
+	self parseHeader.
+	isInterleaved := true.
+	self detectStrictOrRelaxed.
+	self parseBodyInterleaved.
+	self validate.
+	^ self asAlignment
+]
 
-	parser := 
-		( self firstLineTokenizer ,
-		self speciesDNANamedBlockTokenizer ,
-		self dnaInterleaveLineTokenizer ) ==> self buildTokensBlock.
-	^ self tokenize.
-	
-	
+{ #category : 'parsing' }
+BioPhylipParser >> parseAsSequential: aString [
+	"Parse assuming sequential format."
+
+	self initialize.
+	lines := (aString lines select: [ :l | l notEmpty ]) asArray.
+	self parseHeader.
+	isInterleaved := false.
+	self detectStrictOrRelaxed.
+	self parseBodySequential.
+	self validate.
+	^ self asAlignment
 ]
 
-{ #category : 'accessing-protein' }
-BioPhylipParser >> tokenizeInterleavedProtein [
-	" Private - Tokenize the receiver's epression as Protein data "
+{ #category : 'parsing' }
+BioPhylipParser >> parseBody [
+	"Parse the data body after the header. Dispatches to sequential or interleaved parser."
 
-	parser := 
-		self firstLineTokenizer ,
-		(((PP2PredicateObjectNode noneOf: self speciesFobiddenNames) times: 10) flatten ,
-		#proteinLetterGapped asPParser trimBlanks star flatten , 
-		#newline asPParser) star ,
-			(#proteinLetterGapped asPParser trimBlanks star flatten , #newline asPParser) star ==> self buildTokensBlock.
-	^ self tokenize.
-	
+	isInterleaved
+		ifTrue: [ self parseBodyInterleaved ]
+		ifFalse: [ self parseBodySequential ]
+]
+
+{ #category : 'parsing' }
+BioPhylipParser >> parseBodyInterleaved [
+	"Parse interleaved format: first block has N name+seq lines, subsequent blocks are seq-only."
+
+	| totalRead |
+	numTaxa timesRepeat: [
+			[
+				currentLine <= lines size and: [
+					(lines at: currentLine) trimBoth isEmpty ] ] whileTrue: [
+				currentLine := currentLine + 1 ].
+			currentLine > lines size ifTrue: [
+				Error signal: 'Unexpected end of input in interleaved block' ].
+			taxaNames add: (self extractNameFromLine: (lines at: currentLine)).
+			sequences add:
+				(self extractSequenceFromLine: (lines at: currentLine)).
+			currentLine := currentLine + 1 ].
+	totalRead := 0.
+	[ currentLine <= lines size ] whileTrue: [
+			| line trimmed |
+			line := lines at: currentLine.
+			trimmed := line trimBoth.
+			trimmed isEmpty
+				ifTrue: [ currentLine := currentLine + 1 ]
+				ifFalse: [
+						| idx |
+						idx := totalRead \\ numTaxa.
+						idx := idx + 1.
+						sequences
+							at: idx
+							put: (sequences at: idx) , (self cleanSequence: trimmed).
+						totalRead := totalRead + 1.
+						currentLine := currentLine + 1 ] ]
+]
+
+{ #category : 'parsing' }
+BioPhylipParser >> parseBodySequential [
+	"Parse sequential format: each taxon's data appears on consecutive lines.
+	 A taxon starts with a name line, followed by optional continuation lines.
+	 We detect name lines by checking if the line starts with a taxon name
+	 (has a recognizable name prefix followed by whitespace then sequence data)."
+
+	| taxonIndex currentSeq charsRead |
+	taxonIndex := 0.
+	currentSeq := ''.
+	charsRead := 0.
+	[ currentLine <= lines size ] whileTrue: [
+			| line trimmed name seq |
+			line := lines at: currentLine.
+			trimmed := line trimBoth.
+			trimmed isEmpty
+				ifTrue: [ currentLine := currentLine + 1 ]
+				ifFalse: [
+						(taxonIndex < numTaxa and: [ self lineStartsWithName: line ])
+							ifTrue: [ "New taxon line - save previous sequence if any"
+									taxonIndex > 0 ifTrue: [ sequences add: currentSeq ].
+									name := self extractNameFromLine: line.
+									seq := self extractSequenceFromLine: line.
+									taxaNames add: name.
+									currentSeq := seq.
+									charsRead := seq size.
+									taxonIndex := taxonIndex + 1 ]
+							ifFalse: [ "Continuation of current taxon"
+									seq := self cleanSequence: trimmed.
+									currentSeq := currentSeq , seq.
+									charsRead := charsRead + seq size ].
+						currentLine := currentLine + 1 ] ].
+	"Add last sequence"
+	currentSeq ifNotEmpty: [ sequences add: currentSeq ]
+]
+
+{ #category : 'parsing' }
+BioPhylipParser >> parseFile: aFileReference [
+	"Parse a PHYLIP format file. Answer a <BioAlignment>."
+
+	^ self parseString: aFileReference asFileReference contents
+]
+
+{ #category : 'parsing' }
+BioPhylipParser >> parseHeader [
+
+	| header firstNum secondNum |
+	currentLine < lines size ifFalse: [
+		Error signal: 'Empty PHYLIP input' ].
+	header := lines first trimBoth.
+	firstNum := self readFirstNumber: header.
+	secondNum := self readSecondNumber: header.
+	firstNum ifNil: [ Error signal: 'Cannot read taxa count' ].
+	secondNum ifNil: [ Error signal: 'Cannot read character count' ].
+	firstNum < 2 ifTrue: [ Error signal: 'Need at least 2 taxa' ].
+	secondNum < 1 ifTrue: [ Error signal: 'Need at least 1 character' ].
+	numTaxa := firstNum.
+	numChars := secondNum.
+	currentLine := 2
+]
+
+{ #category : 'parsing' }
+BioPhylipParser >> parseString: aString [
+	"Parse a PHYLIP format string. Auto-detect sequential/interleaved.
+	 Try sequential first; if validation fails, try interleaved."
+
+	| aln |
+	aln := [ self parseAsSequential: aString ]
+		       on: Error
+		       do: [ nil ].
+	aln ifNotNil: [ ^ aln ].
+	^ self parseAsInterleaved: aString
+]
+
+{ #category : 'private' }
+BioPhylipParser >> readFirstNumber: aString [
+	"Read the first integer from the header line."
+
+	| idx |
+	idx := aString indexOf: Character space startingAt: 1.
+	idx = 0 ifTrue: [
+		idx := aString indexOf: Character tab startingAt: 1 ].
+	idx = 0 ifTrue: [ ^ nil ].
+	^ [ (aString copyFrom: 1 to: idx - 1) asInteger ]
+		  on: Error
+		  do: [ nil ]
+]
+
+{ #category : 'private' }
+BioPhylipParser >> readSecondNumber: aString [
+	"Read the second integer from the header line."
+
+	| idx rest |
+	idx := aString indexOf: Character space startingAt: 1.
+	idx = 0 ifTrue: [
+		idx := aString indexOf: Character tab startingAt: 1 ].
+	idx = 0 ifTrue: [ ^ nil ].
+	rest := (aString copyFrom: idx + 1 to: aString size) trimBoth.
+	^ [ rest asInteger ]
+		  on: Error
+		  do: [ nil ]
+]
+
+{ #category : 'initialization' }
+BioPhylipParser >> resetForParse: aString [
+	"Reset parser state and set up lines from aString."
+
+	self initialize.
+	lines := (aString lines select: [ :l | l notEmpty ]) asArray.
+	currentLine := 2.
+	self parseHeader
+]
+
+{ #category : 'accessing' }
+BioPhylipParser >> sequences [ ^ sequences
+]
+
+{ #category : 'accessing' }
+BioPhylipParser >> taxaNames [ ^ taxaNames
+]
+
+{ #category : 'private' }
+BioPhylipParser >> tryParseInterleaved: aString [
+	"Try to parse assuming interleaved format. Signal error on failure."
+
+	| savedState |
+	savedState := self deepCopy.
+	[
+		self resetForParse: aString.
+		isInterleaved := true.
+		self detectStrictOrRelaxed.
+		self parseBodyInterleaved.
+		self validate.
+		^ self asAlignment ]
+		on: Error
+		do: [ :ex | "Restore state and re-raise"
+				savedState restoreTo: self.
+				ex pass ]
+]
+
+{ #category : 'private' }
+BioPhylipParser >> tryParseSequential: aString [
+	"Try to parse assuming sequential format."
+
+	self initialize.
+	lines := (aString lines select: [ :l | l notEmpty ]) asArray.
+	self parseHeader.
+	isInterleaved := false.
+	self detectStrictOrRelaxed.
+	self parseBodySequential.
+	self validate.
+	^ self asAlignment
+]
+
+{ #category : 'accessing' }
+BioPhylipParser >> validate [
+	"Validate the parsed data."
+
+	taxaNames size = numTaxa ifFalse: [
+			Error signal: 'Expected ' , numTaxa asString , ' taxa but found '
+				, taxaNames size asString ].
+	sequences size = numTaxa ifFalse: [
+			Error signal:
+				'Expected ' , numTaxa asString , ' sequences but found '
+				, sequences size asString ].
+	1 to: numTaxa do: [ :i |
+			(sequences at: i) size = numChars ifFalse: [
+					Error signal: 'Sequence for ' , (taxaNames at: i) , ' has '
+						, (sequences at: i) size asString
+						, ' chars but header specifies ' , numChars asString ] ]
 ]
diff --git a/repository/BioParsers/BioPhylipPetitParser.class.st b/repository/BioParsers/BioPhylipPetitParser.class.st
new file mode 100644
index 00000000..140b5dd8
--- /dev/null
+++ b/repository/BioParsers/BioPhylipPetitParser.class.st
@@ -0,0 +1,218 @@
+"
+Documentation taken from http://bioweb2.pasteur.fr/docs/phylip/doc/main.html#inputfiles
+
+"
+Class {
+	#name : 'BioPhylipPetitParser',
+	#superclass : 'BioAbstractTextParser',
+	#category : 'BioParsers-Core',
+	#package : 'BioParsers',
+	#tag : 'Core'
+}
+
+{ #category : 'accessing-dna' }
+BioPhylipPetitParser >> buildDNAResults: aCollection [
+	" Answer an identified object for the receiver's parsing output "
+
+	| tokenized sequences |
+	
+	tokenized := self buildTokens: aCollection.
+	sequences := tokenized third 
+			with: tokenized fourth 
+			do: [: first : snd | BioSequence newAmbiguousDNA: snd named: first ].
+	^ BioPhylip new
+		numberOfTaxa: tokenized first;
+		numberOfCharacters: tokenized second;
+		sequences: sequences.
+]
+
+{ #category : 'accessing-protein' }
+BioPhylipPetitParser >> buildProteinResults: aCollection [
+	" Answer an identified object for the receiver's parsing output "
+
+	| tokenized sequences |
+	
+	tokenized := self buildTokens: aCollection.
+	sequences := tokenized third 
+			with: tokenized fourth 
+			do: [: first : snd | BioSequence newProtein: snd named: first ].
+	^ BioPhylip new
+		numberOfTaxa: tokenized first;
+		numberOfCharacters: tokenized second;
+		sequences: sequences.
+]
+
+{ #category : 'accessing' }
+BioPhylipPetitParser >> buildTokens: aCollection [
+	" Answer a tokenized <Collection> parsing aCollection "
+
+	^ Array 
+		with: (self taxaNumberFrom: aCollection)
+		with: (aCollection first second asNumber)
+		with: (aCollection second collect: #first)
+		with: (self buildTokensFrom: aCollection).
+	
+
+]
+
+{ #category : 'accessing-private' }
+BioPhylipPetitParser >> buildTokensBlock [ 
+
+	^ [: node | 
+		OrderedCollection
+			with: node first
+			with: (node second collect: #allButLast) 
+			with: (((node third reject: [: line | line first isEmpty ]) collect: #first) collect: #withoutBlanks ) ]
+]
+
+{ #category : 'accessing' }
+BioPhylipPetitParser >> buildTokensFrom: aCollection [
+
+	| taxaNumber collection seqIndex seqBlock |
+
+	taxaNumber := self taxaNumberFrom: aCollection.
+	collection := self buildTokensFromFirstBlock: aCollection.
+	seqIndex := 1.
+	(seqBlock := aCollection third) doWithIndex: [:seq :index | 
+			seqIndex = (taxaNumber + 1)
+				ifTrue: [seqIndex := 1].
+			index <= seqBlock size
+				ifFalse: [ ^ collection ].
+			collection
+				at: seqIndex
+				put: (String
+						streamContents: [:str | str
+								nextPutAll: (collection at: seqIndex);
+								nextPutAll: (seqBlock at: index)]).
+			seqIndex := seqIndex + 1].
+	^ collection
+
+]
+
+{ #category : 'accessing' }
+BioPhylipPetitParser >> buildTokensFromFirstBlock: aCollection [
+
+	^ aCollection second collect: [: seq | seq second withoutBlanks ]
+]
+
+{ #category : 'accessing-dna' }
+BioPhylipPetitParser >> dnaInterleaveLineTokenizer [
+
+	^ (self dnaInterleaveSequenceTokenizer , #newline asPParser) star
+]
+
+{ #category : 'accessing-dna' }
+BioPhylipPetitParser >> dnaInterleaveSequenceTokenizer [
+
+	^ #dnaLetter asPParser trimBlanks star flatten
+]
+
+{ #category : 'accessing-private' }
+BioPhylipPetitParser >> firstLineTokenizer [
+	" Answer a Parser for parsing the first line of the format "
+	
+	^ (#number asPParser / self parserForAnyButNumber) ,
+		(self parserForAnyButNumber) ,
+		#blank asPParser plus optional flatten ,
+		#newline asPParser ==> [ : node | 
+			node asOrderedCollection 
+				removeAllSuchThat: [ : elem  | elem allSatisfy: [ : e | e = Character space ] ];
+				copyWithoutAll: {
+				Character lf asString .
+				Character cr asString } ]
+]
+
+{ #category : 'accessing-dna' }
+BioPhylipPetitParser >> parseInterleavedDNA: aString [
+	" Answer an object with the result of parsing aString with the receiver's parser "
+	
+	| parseResults |
+	
+	parseResults := self parseString: aString.
+	^ self isSuccess
+		ifTrue: [ results := self buildDNAResults: parseResults ]
+		ifFalse: [ self signalInvalidObject: parseResults ].
+]
+
+{ #category : 'accessing-protein' }
+BioPhylipPetitParser >> parseInterleavedProtein: aString [
+	" Answer an object with the result of parsing aString with the receiver's parser "
+	
+	| parseResults |
+	
+	parseResults := self parseString: aString.
+	^ self isSuccess
+		ifTrue: [ results := self buildProteinResults: parseResults ]
+		ifFalse: [ self signalInvalidObject: parseResults ].
+]
+
+{ #category : 'accessing-private' }
+BioPhylipPetitParser >> parserForAnyButNumber [
+
+	^ #digit asPParser negate plus , #number asPParser ==> [: n | n second ] 
+]
+
+{ #category : 'accessing-dna' }
+BioPhylipPetitParser >> speciesDNALineTokenizer [
+	" Answer a Parser for parsing the species names line "
+	
+	^ ((PP2PredicateObjectNode noneOf: self speciesFobiddenNames) times: 10) flatten ,
+		self dnaInterleaveSequenceTokenizer
+]
+
+{ #category : 'accessing-dna' }
+BioPhylipPetitParser >> speciesDNANamedBlockTokenizer [
+	" Answer a Parser for parsing the sequence blocks "
+	
+	^ (self speciesDNALineTokenizer , #newline asPParser flatten) star
+]
+
+{ #category : 'accessing-private' }
+BioPhylipPetitParser >> speciesFobiddenNames [
+	" Private - Answer a <Collection> with receiver's not allowed Characters in a species name "
+	
+	^ OrderedCollection new
+		add: Character cr;
+		add: Character lf;
+		add: $[;
+		add: $];
+		add: $(;
+		add: $);
+		add: $:;
+		add: $;;
+		add: $,;
+		yourself
+]
+
+{ #category : 'accessing' }
+BioPhylipPetitParser >> taxaNumberFrom: aCollection [
+
+	^ aCollection first first asNumber
+]
+
+{ #category : 'accessing-dna' }
+BioPhylipPetitParser >> tokenizeInterleavedDNA [
+	" Private - Tokenize the receiver's epression as DNA data "
+
+	parser := 
+		( self firstLineTokenizer ,
+		self speciesDNANamedBlockTokenizer ,
+		self dnaInterleaveLineTokenizer ) ==> self buildTokensBlock.
+	^ self tokenize.
+	
+	
+]
+
+{ #category : 'accessing-protein' }
+BioPhylipPetitParser >> tokenizeInterleavedProtein [
+	" Private - Tokenize the receiver's epression as Protein data "
+
+	parser := 
+		self firstLineTokenizer ,
+		(((PP2PredicateObjectNode noneOf: self speciesFobiddenNames) times: 10) flatten ,
+		#proteinLetterGapped asPParser trimBlanks star flatten , 
+		#newline asPParser) star ,
+			(#proteinLetterGapped asPParser trimBlanks star flatten , #newline asPParser) star ==> self buildTokensBlock.
+	^ self tokenize.
+	
+]