#!/usr/bin/env perl

use Test::Most;

use autodie;
use feature qw(say);

use List::AllUtils;
use Path::Class qw(file);

use Bio::MUST::Core;
use Bio::MUST::Core::Utils qw(cmp_store);

my $class = 'Bio::MUST::Core::Ali';

{
    my $infile = file('test', 'AhHMA4.ali');
    my $seq_n = $class->instant_count($infile);
    is $seq_n, 250, 'counted expected number of seqs';

    my $ali = $class->load($infile);
    isa_ok $ali, $class, $infile;
    is $ali->filename, $infile, 'got expected filename for Ali file';
    is $ali->count_comments, 2, 'read expected number of comments';
    is $ali->count_seqs, 250, 'read expected number of seqs';
    cmp_ok $ali->perc_miss, '==', 100.0 * (299500-197368) / 299500,
        'got expected missing %';
    is $ali->header, <<'EOT', 'got expected header';
# HMA4
# P1B-type ATPase 4
EOT
    my $new_ali = $ali->clone;
    is_deeply $new_ali, $ali,
        'got expected Ali clone';
}

my @exp_full_ids = (
    'Pseudovibrio sp._911045@374328480',
    'Burkholderia pseudomallei_320373@126440741',
    'Marinithermus hydrothermalis_869210@328949930',
    'Listeria monocytogenes_393117@254826436',
    'Staphylococcus epidermidis_904337@374818653',
);

my @exp_seqs = (
    'MELIMSNAEDRSQNISPE*******ELENQATKSCCSSQAEQPTKAESSCCSGKGDAQEQESSCCSSKNEAPVDDCCGSAKEHEAKHVHTHTGCGCASKNEVAIPDVTNWKGAR******SFRVEGLCCAEEMGILRRVVGPVVGDPEYLAFDVLNGKMIVSPVARDVTDEQIMKAVDSTGMKAVLFIEQEAADARAKQHRRLGTFTIASGLFWAAAIIVQATLFFSSTSSTDVFSVFDSVPSGPVEVLYMLAIVAGLRLVAPKGWYALRT*LRPDMNLLMLVAVAGAIGIGEWFEGATVAFLFSLSLYLESWSVGRARKAVAALMDIAPTVVRLLKPDGGEEEVAANSVKPGALFVVRGGDRIPLDGVVRKGVGSVDQAPITGESVPVMKEAGDDVYAGTINGEGSFEVEATKGADDTMLARIIRMVSEAQARRAAAEQWVEKFARVYTPAVMVLAVLLAVIPPLLFGAAWMDWFYRALVLLVIACPCALVISTPVSIVAGLTSAARNGVLIKGGVFLELPARLKALAFDKTGTITNGLPTVTDVYPLSGHSVEELLVR***AASLEARSSHPLAEAILTRAKEDG**********VAYQPAENVELLPGRGLSGQRNGKSYWLGS*RRFLNEKDFDIGE*ADAKARELEAEGKTVVAVGTDAHVCGLIALADTVRDTAEELVSQLHKAGVEKLVMLTGDNKATAERVAASVGIDEVRAELLPEDKVAAVEQLSHEYETVAMIGDGVNDAPAMARASFGVAMGAIGSDAAIETADIALMKDDLSRLPWLIHHSKRTLQIIHQNIAFAFVVKGLLVILTALGFASLWAAILGDVGATLIVVTNALRLLKDRAE****',
    '*MTEATRAENRQRTPEADRGGAQSGDARTSARAAGCCSHHHPHGEVETQANLAVQSSNRDSSDHAHAPHSHANGDHP*HDHDHDHDHDHDHDGAACCAPAPVAFAPLPGARKAAGGRVRSAFRIMQMDCPTEETLIRKKLG*AMSEVAALEFNLMQRMLAVEHVPG**AEAGIAAAIRSLGMTP******EQADAGASGRGALPAPADAPRPWWPLAVAGVA*******AAASEAATWLQLPVWLAAALALAAVATCGLGTYRKGWIALTN*GNLNINALMSIAVTGAMAIGQWPEAAMVMVLFTVAELIEARSLDRARNAIQSLMRLAPDTVTLRQPDGTWQPVDAAQVALGAIVRVKPGERIGLDGEIVAGRSTVNQAPITGESLPVEKAEGDAVYAGTINEAGSFEYRVTAAASNTTLARIIHAVEEAQGAKAPTQRFVDSFARVYTPIVFAIALVVAIAPPLVLDGAWRDWIYRALVLLVIACPCALVISTPVTIVSGLAAAARRGILVKGGVYLEQGRRLAWLALDKTGTITRGKPVQTD*FEMRAANVDAALVRG*LAARLAARSDHPVSQAVAAASAAQAGAGGAPRAKPASFADVADFEAIPGRGVRGKIDGVPYWLGN*HRLVEELDCCTSA*LEARLDELERQGKTVVMLIDGARVLGLFAVADTVKDTSRAAVAELHALGIKT*AMLTGDNPHTAQAIAQQVGIDDARGNQLPQDKLAAVEALAAGGRAVGMVGDGINDAPALARADIGFAMGAMGTDTAIETADVALMDDDLRKIPAFVRLSRATHRVLVQNIAFALAVKAVFVGLTVAGMGTMWMAVFADAGASLIVVGNGLRLLRRGQ*****',
    ' **********************************MKTETKTPPPVTTRTYVIEG************        ***************LDCADCARKIEDAVQRLPGVQEAR*************************************VSLATERLTVTSTDGSLGTDTLNQLLSPLGYRVR******DPEASMPKPTPWYRTPKGKSVLLAGTLLAVG*************ILTDILGLAESRLAYTLGTLIGVLPLARKGWANLRQGGFFDINVLVTLAAVGALFIEAEVEALIVVFLFLVGELLESIAAERARASVKALTQLIPETARLIQ*DGQEIEVPASELRPGHRVRVLPGMRVPADGTILEGESAVDESMLTGEPIPVPKGPGDSVYAGTVNTEGAMVVRVERGPEDHLAARILRLIEEAEATKSPTVRFIDRFSRYYTPAIVGIALLVALLPPLLFNAPWEVWTYRALALLLIGCPCALVLSAPAAITSGLARAARMGLLIKGGAALERIGQVRVVALDKTGTLTQGTPAVEA***IQAPNPRELLRL***AAAVEQYSTHPLAAAIVRKAQEEG**********IQPPPASEVRTTAGKFIEGQVEGRHVWLGS*PRYAP*************APVPDRTDGTAVAVFVDGKYSGLIVLRDQLRPDARQGIARMKALGIHP*VMLTGDHTAAAQHVARELGMD*YRAELLPEDKLRILQELKAEG*PVAFVGDGINDAPALAAADVGIAMGG*GTDAALESADAALVEPRITRIADLIGLSRAALSNIRQNIAVALGLKAVFLVTTLAGLTGLWLAILADTGATLIVTANALRLLRFTPPRL**',
    '     **************************************************************************MAEKTVYRVDGLSCTNCAAKFERNVKEIEGVTEAIVN***********************************FGASKITVTGEASIQQVEQAGAFEHLKIIPEKE*******SFTDPEHFTDHQSFIRKNWRLLLSGLFIAVG**********YASQIMNGEDFYLTNALFIFAIFIGGYSLFKEGFKNLLK*FEFTMETLMTIAIIGAAFIGEWAEGSIVVILFAVSEALERYSMDKARQSIRSLMDIAPKEALVRR*SGTDRMVHVDDIQIGDIMIIKPGQKIAMDGHVVKGYSAVNQAAITGESIPVEKNIDDSVFAGTLNEEGLLEVAVTKRVEDTTISKIIHLVEEAQGERAPAQAFVDTFAKYYTPAIIVIAALIATVPPLLFGGNWETWVYQGLSVLVVGCPCALVVSTPVAIVTAIGNAAKNGVLVKGGVYLEEIGGLKAIAFDKTGTLTKGVPVVTDYIELTEATNIQHNKNYIIMAALEQLSQHPLASAIIKYGETREMD********LTSINVNDFTSITGKGIRGTVDGNTYYVGSPVLFKELLASQFTDSIHRQVSDLQLKGKTAMLFGTNQKLISIVAVADEVRSSSQHVIKRLHELGIEKTIMLTGDNQATAQAIGQQVGVSEIEGELMPQDKLDYIKQLKINFGKVAMVGDGINDAPALAAATVGIAMGGAGTDTAIETADVALMGDDLQKLPFTVKLSRKTLQIIKQNITFSLVIKLIALLLVIPGWLTLWIAIMADMGATLLVTLNGLRLMKVKD*****',
    '**MDCSSCARTIEKALSPLDEVTNPKVNFSTGKLTVGLKSQNDINQVTQTVRKLGYDVEETKT****************NSKYITFSVEGMDCGSCAKSIEKHLNNLSYVNDAQVS***********************************FSTGKMQVDFEGNKTKNIEK****EVSKIGYSA*******TLSPTKKSSNSK**WRVFRKPIISTLFLILG**********LVVTLTTLP*VLIANLMYIIAIIVSGIKPLKSAYYAIKS*KSLDMNVLMSVAVIGAIFIGEYFEGAIVVLLFTIGTLLQTISIDKTRNSIQSLMDITSTTANVIT*ENGTTTKDLTDIRVGEILLIKPGDRVPLDGTITDGSSSLNQAPITGESIPVDKTINDEVYAGSINENGTLYIRVSKLVEDTTLSKIIHMVEEAQENKAPTQAFIDRFSEIYTPIVFVLALLVMVIPPLFSLGTWGEWLYKGLELLVIACPCALVISTPVAIVTAIGSAAKNGVLIKGGNHLEGLGTLSALAFDKTGTLTEGRPKVDTIKTIDANETTLLN****IAMSLESYSTHPISNAIVDY**AMQLN********VKKAYVTDFENIVGQGIKGKINESYVYAGN*VKLIESINKKINN*YKEEINKYEQEGFTVIIIASSSMIHGLITIADPLRSNIKQIIQQLNGTHIKNTIMLTGDNKSTAQKIAQLSGIKEVYAELMPGDKLAAIKDLQNKGYRVAMIGDGINDAPALAQSDVGIAMGGIGSDTAMETADVVLMSDDINQLTRTISISKKAKNIIKQNIYFSIIIKLIAFILVFPGLLTLWLAVLSDTGAAILVILNSLRLLRNRNNN***',
);

my @space_seqs = (
    'MELIMSNAEDRSQNISPE*******ELENQATKSCCSSQAEQPTKAESSCCSGKGDAQEQESSCCSSKNEAPVDDCCGSAKEHEAKHVHTHTGCGCASKNEVAIPDVTNWKGAR******SFRVEGLCCAEEMGILRRVVGPVVGDPEYLAFDVLNGKMIVSPVARDVTDEQIMKAVDSTGMKAVLFIEQEAADARAKQHRRLGTFTIASGLFWAAAIIVQATLFFSSTSSTDVFSVFDSVPSGPVEVLYMLAIVAGLRLVAPKGWYALRT*LRPDMNLLMLVAVAGAIGIGEWFEGATVAFLFSLSLYLESWSVGRARKAVAALMDIAPTVVRLLKPDGGEEEVAANSVKPGALFVVRGGDRIPLDGVVRKGVGSVDQAPITGESVPVMKEAGDDVYAGTINGEGSFEVEATKGADDTMLARIIRMVSEAQARRAAAEQWVEKFARVYTPAVMVLAVLLAVIPPLLFGAAWMDWFYRALVLLVIACPCALVISTPVSIVAGLTSAARNGVLIKGGVFLELPARLKALAFDKTGTITNGLPTVTDVYPLSGHSVEELLVR***AASLEARSSHPLAEAILTRAKEDG**********VAYQPAENVELLPGRGLSGQRNGKSYWLGS*RRFLNEKDFDIGE*ADAKARELEAEGKTVVAVGTDAHVCGLIALADTVRDTAEELVSQLHKAGVEKLVMLTGDNKATAERVAASVGIDEVRAELLPEDKVAAVEQLSHEYETVAMIGDGVNDAPAMARASFGVAMGAIGSDAAIETADIALMKDDLSRLPWLIHHSKRTLQIIHQNIAFAFVVKGLLVILTALGFASLWAAILGDVGATLIVVTNALRLLKDRAE****',
    '*MTEATRAENRQRTPEADRGGAQSGDARTSARAAGCCSHHHPHGEVETQANLAVQSSNRDSSDHAHAPHSHANGDHP*HDHDHDHDHDHDHDGAACCAPAPVAFAPLPGARKAAGGRVRSAFRIMQMDCPTEETLIRKKLG*AMSEVAALEFNLMQRMLAVEHVPG**AEAGIAAAIRSLGMTP******EQADAGASGRGALPAPADAPRPWWPLAVAGVA*******AAASEAATWLQLPVWLAAALALAAVATCGLGTYRKGWIALTN*GNLNINALMSIAVTGAMAIGQWPEAAMVMVLFTVAELIEARSLDRARNAIQSLMRLAPDTVTLRQPDGTWQPVDAAQVALGAIVRVKPGERIGLDGEIVAGRSTVNQAPITGESLPVEKAEGDAVYAGTINEAGSFEYRVTAAASNTTLARIIHAVEEAQGAKAPTQRFVDSFARVYTPIVFAIALVVAIAPPLVLDGAWRDWIYRALVLLVIACPCALVISTPVTIVSGLAAAARRGILVKGGVYLEQGRRLAWLALDKTGTITRGKPVQTD*FEMRAANVDAALVRG*LAARLAARSDHPVSQAVAAASAAQAGAGGAPRAKPASFADVADFEAIPGRGVRGKIDGVPYWLGN*HRLVEELDCCTSA*LEARLDELERQGKTVVMLIDGARVLGLFAVADTVKDTSRAAVAELHALGIKT*AMLTGDNPHTAQAIAQQVGIDDARGNQLPQDKLAAVEALAAGGRAVGMVGDGINDAPALARADIGFAMGAMGTDTAIETADVALMDDDLRKIPAFVRLSRATHRVLVQNIAFALAVKAVFVGLTVAGMGTMWMAVFADAGASLIVVGNGLRLLRRGQ*****',
    '                                   MKTETKTPPPVTTRTYVIEG                                   LDCADCARKIEDAVQRLPGVQEAR*************************************VSLATERLTVTSTDGSLGTDTLNQLLSPLGYRVR******DPEASMPKPTPWYRTPKGKSVLLAGTLLAVG*************ILTDILGLAESRLAYTLGTLIGVLPLARKGWANLRQGGFFDINVLVTLAAVGALFIEAEVEALIVVFLFLVGELLESIAAERARASVKALTQLIPETARLIQ*DGQEIEVPASELRPGHRVRVLPGMRVPADGTILEGESAVDESMLTGEPIPVPKGPGDSVYAGTVNTEGAMVVRVERGPEDHLAARILRLIEEAEATKSPTVRFIDRFSRYYTPAIVGIALLVALLPPLLFNAPWEVWTYRALALLLIGCPCALVLSAPAAITSGLARAARMGLLIKGGAALERIGQVRVVALDKTGTLTQGTPAVEA***IQAPNPRELLRL***AAAVEQYSTHPLAAAIVRKAQEEG**********IQPPPASEVRTTAGKFIEGQVEGRHVWLGS*PRYAP*************APVPDRTDGTAVAVFVDGKYSGLIVLRDQLRPDARQGIARMKALGIHP*VMLTGDHTAAAQHVARELGMD*YRAELLPEDKLRILQELKAEG*PVAFVGDGINDAPALAAADVGIAMGG*GTDAALESADAALVEPRITRIADLIGLSRAALSNIRQNIAVALGLKAVFLVTTLAGLTGLWLAILADTGATLIVTANALRLLRFTPPRL**',
    '                                                                               MAEKTVYRVDGLSCTNCAAKFERNVKEIEGVTEAIVN***********************************FGASKITVTGEASIQQVEQAGAFEHLKIIPEKE*******SFTDPEHFTDHQSFIRKNWRLLLSGLFIAVG**********YASQIMNGEDFYLTNALFIFAIFIGGYSLFKEGFKNLLK*FEFTMETLMTIAIIGAAFIGEWAEGSIVVILFAVSEALERYSMDKARQSIRSLMDIAPKEALVRR*SGTDRMVHVDDIQIGDIMIIKPGQKIAMDGHVVKGYSAVNQAAITGESIPVEKNIDDSVFAGTLNEEGLLEVAVTKRVEDTTISKIIHLVEEAQGERAPAQAFVDTFAKYYTPAIIVIAALIATVPPLLFGGNWETWVYQGLSVLVVGCPCALVVSTPVAIVTAIGNAAKNGVLVKGGVYLEEIGGLKAIAFDKTGTLTKGVPVVTDYIELTEATNIQHNKNYIIMAALEQLSQHPLASAIIKYGETREMD********LTSINVNDFTSITGKGIRGTVDGNTYYVGSPVLFKELLASQFTDSIHRQVSDLQLKGKTAMLFGTNQKLISIVAVADEVRSSSQHVIKRLHELGIEKTIMLTGDNQATAQAIGQQVGVSEIEGELMPQDKLDYIKQLKINFGKVAMVGDGINDAPALAAATVGIAMGGAGTDTAIETADVALMGDDLQKLPFTVKLSRKTLQIIKQNITFSLVIKLIALLLVIPGWLTLWIAIMADMGATLLVTLNGLRLMKVKD*****',
    '**MDCSSCARTIEKALSPLDEVTNPKVNFSTGKLTVGLKSQNDINQVTQTVRKLGYDVEETKT****************NSKYITFSVEGMDCGSCAKSIEKHLNNLSYVNDAQVS***********************************FSTGKMQVDFEGNKTKNIEK****EVSKIGYSA*******TLSPTKKSSNSK**WRVFRKPIISTLFLILG**********LVVTLTTLP*VLIANLMYIIAIIVSGIKPLKSAYYAIKS*KSLDMNVLMSVAVIGAIFIGEYFEGAIVVLLFTIGTLLQTISIDKTRNSIQSLMDITSTTANVIT*ENGTTTKDLTDIRVGEILLIKPGDRVPLDGTITDGSSSLNQAPITGESIPVDKTINDEVYAGSINENGTLYIRVSKLVEDTTLSKIIHMVEEAQENKAPTQAFIDRFSEIYTPIVFVLALLVMVIPPLFSLGTWGEWLYKGLELLVIACPCALVISTPVAIVTAIGSAAKNGVLIKGGNHLEGLGTLSALAFDKTGTLTEGRPKVDTIKTIDANETTLLN****IAMSLESYSTHPISNAIVDY**AMQLN********VKKAYVTDFENIVGQGIKGKINESYVYAGN*VKLIESINKKINN*YKEEINKYEQEGFTVIIIASSSMIHGLITIADPLRSNIKQIIQQLNGTHIKNTIMLTGDNKSTAQKIAQLSGIKEVYAELMPGDKLAAIKDLQNKGYRVAMIGDGINDAPALAQSDVGIAMGGIGSDTAMETADVVLMSDDINQLTRTISISKKAKNIIKQNIYFSIIIKLIAFILVFPGLLTLWLAVLSDTGAAILVILNSLRLLRNRNNN***',
);

my @trim_seqs = (
    'MELIMSNAEDRSQNISPE*******ELENQATKSCCSSQAEQPTKAESSCCSGKGDAQEQESSCCSSKNEAPVDDCCGSAKEHEAKHVHTHTGCGCASKNEVAIPDVTNWKGAR******SFRVEGLCCAEEMGILRRVVGPVVGDPEYLAFDVLNGKMIVSPVARDVTDEQIMKAVDSTGMKAVLFIEQEAADARAKQHRRLGTFTIASGLFWAAAIIVQATLFFSSTSSTDVFSVFDSVPSGPVEVLYMLAIVAGLRLVAPKGWYALRT*LRPDMNLLMLVAVAGAIGIGEWFEGATVAFLFSLSLYLESWSVGRARKAVAALMDIAPTVVRLLKPDGGEEEVAANSVKPGALFVVRGGDRIPLDGVVRKGVGSVDQAPITGESVPVMKEAGDDVYAGTINGEGSFEVEATKGADDTMLARIIRMVSEAQARRAAAEQWVEKFARVYTPAVMVLAVLLAVIPPLLFGAAWMDWFYRALVLLVIACPCALVISTPVSIVAGLTSAARNGVLIKGGVFLELPARLKALAFDKTGTITNGLPTVTDVYPLSGHSVEELLVR***AASLEARSSHPLAEAILTRAKEDG**********VAYQPAENVELLPGRGLSGQRNGKSYWLGS*RRFLNEKDFDIGE*ADAKARELEAEGKTVVAVGTDAHVCGLIALADTVRDTAEELVSQLHKAGVEKLVMLTGDNKATAERVAASVGIDEVRAELLPEDKVAAVEQLSHEYETVAMIGDGVNDAPAMARASFGVAMGAIGSDAAIETADIALMKDDLSRLPWLIHHSKRTLQIIHQNIAFAFVVKGLLVILTALGFASLWAAILGDVGATLIVVTNALRLLKDRAE',
    '*MTEATRAENRQRTPEADRGGAQSGDARTSARAAGCCSHHHPHGEVETQANLAVQSSNRDSSDHAHAPHSHANGDHP*HDHDHDHDHDHDHDGAACCAPAPVAFAPLPGARKAAGGRVRSAFRIMQMDCPTEETLIRKKLG*AMSEVAALEFNLMQRMLAVEHVPG**AEAGIAAAIRSLGMTP******EQADAGASGRGALPAPADAPRPWWPLAVAGVA*******AAASEAATWLQLPVWLAAALALAAVATCGLGTYRKGWIALTN*GNLNINALMSIAVTGAMAIGQWPEAAMVMVLFTVAELIEARSLDRARNAIQSLMRLAPDTVTLRQPDGTWQPVDAAQVALGAIVRVKPGERIGLDGEIVAGRSTVNQAPITGESLPVEKAEGDAVYAGTINEAGSFEYRVTAAASNTTLARIIHAVEEAQGAKAPTQRFVDSFARVYTPIVFAIALVVAIAPPLVLDGAWRDWIYRALVLLVIACPCALVISTPVTIVSGLAAAARRGILVKGGVYLEQGRRLAWLALDKTGTITRGKPVQTD*FEMRAANVDAALVRG*LAARLAARSDHPVSQAVAAASAAQAGAGGAPRAKPASFADVADFEAIPGRGVRGKIDGVPYWLGN*HRLVEELDCCTSA*LEARLDELERQGKTVVMLIDGARVLGLFAVADTVKDTSRAAVAELHALGIKT*AMLTGDNPHTAQAIAQQVGIDDARGNQLPQDKLAAVEALAAGGRAVGMVGDGINDAPALARADIGFAMGAMGTDTAIETADVALMDDDLRKIPAFVRLSRATHRVLVQNIAFALAVKAVFVGLTVAGMGTMWMAVFADAGASLIVVGNGLRLLRRGQ',
    '                                   MKTETKTPPPVTTRTYVIEG                                   LDCADCARKIEDAVQRLPGVQEAR*************************************VSLATERLTVTSTDGSLGTDTLNQLLSPLGYRVR******DPEASMPKPTPWYRTPKGKSVLLAGTLLAVG*************ILTDILGLAESRLAYTLGTLIGVLPLARKGWANLRQGGFFDINVLVTLAAVGALFIEAEVEALIVVFLFLVGELLESIAAERARASVKALTQLIPETARLIQ*DGQEIEVPASELRPGHRVRVLPGMRVPADGTILEGESAVDESMLTGEPIPVPKGPGDSVYAGTVNTEGAMVVRVERGPEDHLAARILRLIEEAEATKSPTVRFIDRFSRYYTPAIVGIALLVALLPPLLFNAPWEVWTYRALALLLIGCPCALVLSAPAAITSGLARAARMGLLIKGGAALERIGQVRVVALDKTGTLTQGTPAVEA***IQAPNPRELLRL***AAAVEQYSTHPLAAAIVRKAQEEG**********IQPPPASEVRTTAGKFIEGQVEGRHVWLGS*PRYAP*************APVPDRTDGTAVAVFVDGKYSGLIVLRDQLRPDARQGIARMKALGIHP*VMLTGDHTAAAQHVARELGMD*YRAELLPEDKLRILQELKAEG*PVAFVGDGINDAPALAAADVGIAMGG*GTDAALESADAALVEPRITRIADLIGLSRAALSNIRQNIAVALGLKAVFLVTTLAGLTGLWLAILADTGATLIVTANALRLLRFTPPRL',
    '                                                                               MAEKTVYRVDGLSCTNCAAKFERNVKEIEGVTEAIVN***********************************FGASKITVTGEASIQQVEQAGAFEHLKIIPEKE*******SFTDPEHFTDHQSFIRKNWRLLLSGLFIAVG**********YASQIMNGEDFYLTNALFIFAIFIGGYSLFKEGFKNLLK*FEFTMETLMTIAIIGAAFIGEWAEGSIVVILFAVSEALERYSMDKARQSIRSLMDIAPKEALVRR*SGTDRMVHVDDIQIGDIMIIKPGQKIAMDGHVVKGYSAVNQAAITGESIPVEKNIDDSVFAGTLNEEGLLEVAVTKRVEDTTISKIIHLVEEAQGERAPAQAFVDTFAKYYTPAIIVIAALIATVPPLLFGGNWETWVYQGLSVLVVGCPCALVVSTPVAIVTAIGNAAKNGVLVKGGVYLEEIGGLKAIAFDKTGTLTKGVPVVTDYIELTEATNIQHNKNYIIMAALEQLSQHPLASAIIKYGETREMD********LTSINVNDFTSITGKGIRGTVDGNTYYVGSPVLFKELLASQFTDSIHRQVSDLQLKGKTAMLFGTNQKLISIVAVADEVRSSSQHVIKRLHELGIEKTIMLTGDNQATAQAIGQQVGVSEIEGELMPQDKLDYIKQLKINFGKVAMVGDGINDAPALAAATVGIAMGGAGTDTAIETADVALMGDDLQKLPFTVKLSRKTLQIIKQNITFSLVIKLIALLLVIPGWLTLWIAIMADMGATLLVTLNGLRLMKVKD',
    '**MDCSSCARTIEKALSPLDEVTNPKVNFSTGKLTVGLKSQNDINQVTQTVRKLGYDVEETKT****************NSKYITFSVEGMDCGSCAKSIEKHLNNLSYVNDAQVS***********************************FSTGKMQVDFEGNKTKNIEK****EVSKIGYSA*******TLSPTKKSSNSK**WRVFRKPIISTLFLILG**********LVVTLTTLP*VLIANLMYIIAIIVSGIKPLKSAYYAIKS*KSLDMNVLMSVAVIGAIFIGEYFEGAIVVLLFTIGTLLQTISIDKTRNSIQSLMDITSTTANVIT*ENGTTTKDLTDIRVGEILLIKPGDRVPLDGTITDGSSSLNQAPITGESIPVDKTINDEVYAGSINENGTLYIRVSKLVEDTTLSKIIHMVEEAQENKAPTQAFIDRFSEIYTPIVFVLALLVMVIPPLFSLGTWGEWLYKGLELLVIACPCALVISTPVAIVTAIGSAAKNGVLIKGGNHLEGLGTLSALAFDKTGTLTEGRPKVDTIKTIDANETTLLN****IAMSLESYSTHPISNAIVDY**AMQLN********VKKAYVTDFENIVGQGIKGKINESYVYAGN*VKLIESINKKINN*YKEEINKYEQEGFTVIIIASSSMIHGLITIADPLRSNIKQIIQQLNGTHIKNTIMLTGDNKSTAQKIAQLSGIKEVYAELMPGDKLAAIKDLQNKGYRVAMIGDGINDAPALAQSDVGIAMGGIGSDTAMETADVVLMSDDINQLTRTISISKKAKNIIKQNIYFSIIIKLIAFILVFPGLLTLWLAVLSDTGAAILVILNSLRLLRNRNNN',
);

my @pad_seqs = (
    'MELIMSNAEDRSQNISPE*******ELENQATKSCCSSQAEQPTKAESSCCSGKGDAQEQESSCCSSKNEAPVDDCCGSAKEHEAKHVHTHTGCGCASKNEVAIPDVTNWKGAR******SFRVEGLCCAEEMGILRRVVGPVVGDPEYLAFDVLNGKMIVSPVARDVTDEQIMKAVDSTGMKAVLFIEQEAADARAKQHRRLGTFTIASGLFWAAAIIVQATLFFSSTSSTDVFSVFDSVPSGPVEVLYMLAIVAGLRLVAPKGWYALRT*LRPDMNLLMLVAVAGAIGIGEWFEGATVAFLFSLSLYLESWSVGRARKAVAALMDIAPTVVRLLKPDGGEEEVAANSVKPGALFVVRGGDRIPLDGVVRKGVGSVDQAPITGESVPVMKEAGDDVYAGTINGEGSFEVEATKGADDTMLARIIRMVSEAQARRAAAEQWVEKFARVYTPAVMVLAVLLAVIPPLLFGAAWMDWFYRALVLLVIACPCALVISTPVSIVAGLTSAARNGVLIKGGVFLELPARLKALAFDKTGTITNGLPTVTDVYPLSGHSVEELLVR***AASLEARSSHPLAEAILTRAKEDG**********VAYQPAENVELLPGRGLSGQRNGKSYWLGS*RRFLNEKDFDIGE*ADAKARELEAEGKTVVAVGTDAHVCGLIALADTVRDTAEELVSQLHKAGVEKLVMLTGDNKATAERVAASVGIDEVRAELLPEDKVAAVEQLSHEYETVAMIGDGVNDAPAMARASFGVAMGAIGSDAAIETADIALMKDDLSRLPWLIHHSKRTLQIIHQNIAFAFVVKGLLVILTALGFASLWAAILGDVGATLIVVTNALRLLKDRAE  ',
    '*MTEATRAENRQRTPEADRGGAQSGDARTSARAAGCCSHHHPHGEVETQANLAVQSSNRDSSDHAHAPHSHANGDHP*HDHDHDHDHDHDHDGAACCAPAPVAFAPLPGARKAAGGRVRSAFRIMQMDCPTEETLIRKKLG*AMSEVAALEFNLMQRMLAVEHVPG**AEAGIAAAIRSLGMTP******EQADAGASGRGALPAPADAPRPWWPLAVAGVA*******AAASEAATWLQLPVWLAAALALAAVATCGLGTYRKGWIALTN*GNLNINALMSIAVTGAMAIGQWPEAAMVMVLFTVAELIEARSLDRARNAIQSLMRLAPDTVTLRQPDGTWQPVDAAQVALGAIVRVKPGERIGLDGEIVAGRSTVNQAPITGESLPVEKAEGDAVYAGTINEAGSFEYRVTAAASNTTLARIIHAVEEAQGAKAPTQRFVDSFARVYTPIVFAIALVVAIAPPLVLDGAWRDWIYRALVLLVIACPCALVISTPVTIVSGLAAAARRGILVKGGVYLEQGRRLAWLALDKTGTITRGKPVQTD*FEMRAANVDAALVRG*LAARLAARSDHPVSQAVAAASAAQAGAGGAPRAKPASFADVADFEAIPGRGVRGKIDGVPYWLGN*HRLVEELDCCTSA*LEARLDELERQGKTVVMLIDGARVLGLFAVADTVKDTSRAAVAELHALGIKT*AMLTGDNPHTAQAIAQQVGIDDARGNQLPQDKLAAVEALAAGGRAVGMVGDGINDAPALARADIGFAMGAMGTDTAIETADVALMDDDLRKIPAFVRLSRATHRVLVQNIAFALAVKAVFVGLTVAGMGTMWMAVFADAGASLIVVGNGLRLLRRGQ   ',
    '                                   MKTETKTPPPVTTRTYVIEG                                   LDCADCARKIEDAVQRLPGVQEAR*************************************VSLATERLTVTSTDGSLGTDTLNQLLSPLGYRVR******DPEASMPKPTPWYRTPKGKSVLLAGTLLAVG*************ILTDILGLAESRLAYTLGTLIGVLPLARKGWANLRQGGFFDINVLVTLAAVGALFIEAEVEALIVVFLFLVGELLESIAAERARASVKALTQLIPETARLIQ*DGQEIEVPASELRPGHRVRVLPGMRVPADGTILEGESAVDESMLTGEPIPVPKGPGDSVYAGTVNTEGAMVVRVERGPEDHLAARILRLIEEAEATKSPTVRFIDRFSRYYTPAIVGIALLVALLPPLLFNAPWEVWTYRALALLLIGCPCALVLSAPAAITSGLARAARMGLLIKGGAALERIGQVRVVALDKTGTLTQGTPAVEA***IQAPNPRELLRL***AAAVEQYSTHPLAAAIVRKAQEEG**********IQPPPASEVRTTAGKFIEGQVEGRHVWLGS*PRYAP*************APVPDRTDGTAVAVFVDGKYSGLIVLRDQLRPDARQGIARMKALGIHP*VMLTGDHTAAAQHVARELGMD*YRAELLPEDKLRILQELKAEG*PVAFVGDGINDAPALAAADVGIAMGG*GTDAALESADAALVEPRITRIADLIGLSRAALSNIRQNIAVALGLKAVFLVTTLAGLTGLWLAILADTGATLIVTANALRLLRFTPPRL',
    '                                                                               MAEKTVYRVDGLSCTNCAAKFERNVKEIEGVTEAIVN***********************************FGASKITVTGEASIQQVEQAGAFEHLKIIPEKE*******SFTDPEHFTDHQSFIRKNWRLLLSGLFIAVG**********YASQIMNGEDFYLTNALFIFAIFIGGYSLFKEGFKNLLK*FEFTMETLMTIAIIGAAFIGEWAEGSIVVILFAVSEALERYSMDKARQSIRSLMDIAPKEALVRR*SGTDRMVHVDDIQIGDIMIIKPGQKIAMDGHVVKGYSAVNQAAITGESIPVEKNIDDSVFAGTLNEEGLLEVAVTKRVEDTTISKIIHLVEEAQGERAPAQAFVDTFAKYYTPAIIVIAALIATVPPLLFGGNWETWVYQGLSVLVVGCPCALVVSTPVAIVTAIGNAAKNGVLVKGGVYLEEIGGLKAIAFDKTGTLTKGVPVVTDYIELTEATNIQHNKNYIIMAALEQLSQHPLASAIIKYGETREMD********LTSINVNDFTSITGKGIRGTVDGNTYYVGSPVLFKELLASQFTDSIHRQVSDLQLKGKTAMLFGTNQKLISIVAVADEVRSSSQHVIKRLHELGIEKTIMLTGDNQATAQAIGQQVGVSEIEGELMPQDKLDYIKQLKINFGKVAMVGDGINDAPALAAATVGIAMGGAGTDTAIETADVALMGDDLQKLPFTVKLSRKTLQIIKQNITFSLVIKLIALLLVIPGWLTLWIAIMADMGATLLVTLNGLRLMKVKD   ',
    '**MDCSSCARTIEKALSPLDEVTNPKVNFSTGKLTVGLKSQNDINQVTQTVRKLGYDVEETKT****************NSKYITFSVEGMDCGSCAKSIEKHLNNLSYVNDAQVS***********************************FSTGKMQVDFEGNKTKNIEK****EVSKIGYSA*******TLSPTKKSSNSK**WRVFRKPIISTLFLILG**********LVVTLTTLP*VLIANLMYIIAIIVSGIKPLKSAYYAIKS*KSLDMNVLMSVAVIGAIFIGEYFEGAIVVLLFTIGTLLQTISIDKTRNSIQSLMDITSTTANVIT*ENGTTTKDLTDIRVGEILLIKPGDRVPLDGTITDGSSSLNQAPITGESIPVDKTINDEVYAGSINENGTLYIRVSKLVEDTTLSKIIHMVEEAQENKAPTQAFIDRFSEIYTPIVFVLALLVMVIPPLFSLGTWGEWLYKGLELLVIACPCALVISTPVAIVTAIGSAAKNGVLIKGGNHLEGLGTLSALAFDKTGTLTEGRPKVDTIKTIDANETTLLN****IAMSLESYSTHPISNAIVDY**AMQLN********VKKAYVTDFENIVGQGIKGKINESYVYAGN*VKLIESINKKINN*YKEEINKYEQEGFTVIIIASSSMIHGLITIADPLRSNIKQIIQQLNGTHIKNTIMLTGDNKSTAQKIAQLSGIKEVYAELMPGDKLAAIKDLQNKGYRVAMIGDGINDAPALAQSDVGIAMGGIGSDTAMETADVVLMSDDINQLTRTISISKKAKNIIKQNIYFSIIIKLIAFILVFPGLLTLWLAVLSDTGAAILVILNSLRLLRNRNNN ',
);

my @raw_seqs = (
    'MELIMSNAEDRSQNISPEELENQATKSCCSSQAEQPTKAESSCCSGKGDAQEQESSCCSSKNEAPVDDCCGSAKEHEAKHVHTHTGCGCASKNEVAIPDVTNWKGARSFRVEGLCCAEEMGILRRVVGPVVGDPEYLAFDVLNGKMIVSPVARDVTDEQIMKAVDSTGMKAVLFIEQEAADARAKQHRRLGTFTIASGLFWAAAIIVQATLFFSSTSSTDVFSVFDSVPSGPVEVLYMLAIVAGLRLVAPKGWYALRTLRPDMNLLMLVAVAGAIGIGEWFEGATVAFLFSLSLYLESWSVGRARKAVAALMDIAPTVVRLLKPDGGEEEVAANSVKPGALFVVRGGDRIPLDGVVRKGVGSVDQAPITGESVPVMKEAGDDVYAGTINGEGSFEVEATKGADDTMLARIIRMVSEAQARRAAAEQWVEKFARVYTPAVMVLAVLLAVIPPLLFGAAWMDWFYRALVLLVIACPCALVISTPVSIVAGLTSAARNGVLIKGGVFLELPARLKALAFDKTGTITNGLPTVTDVYPLSGHSVEELLVRAASLEARSSHPLAEAILTRAKEDGVAYQPAENVELLPGRGLSGQRNGKSYWLGSRRFLNEKDFDIGEADAKARELEAEGKTVVAVGTDAHVCGLIALADTVRDTAEELVSQLHKAGVEKLVMLTGDNKATAERVAASVGIDEVRAELLPEDKVAAVEQLSHEYETVAMIGDGVNDAPAMARASFGVAMGAIGSDAAIETADIALMKDDLSRLPWLIHHSKRTLQIIHQNIAFAFVVKGLLVILTALGFASLWAAILGDVGATLIVVTNALRLLKDRAE',
    'MTEATRAENRQRTPEADRGGAQSGDARTSARAAGCCSHHHPHGEVETQANLAVQSSNRDSSDHAHAPHSHANGDHPHDHDHDHDHDHDHDGAACCAPAPVAFAPLPGARKAAGGRVRSAFRIMQMDCPTEETLIRKKLGAMSEVAALEFNLMQRMLAVEHVPGAEAGIAAAIRSLGMTPEQADAGASGRGALPAPADAPRPWWPLAVAGVAAAASEAATWLQLPVWLAAALALAAVATCGLGTYRKGWIALTNGNLNINALMSIAVTGAMAIGQWPEAAMVMVLFTVAELIEARSLDRARNAIQSLMRLAPDTVTLRQPDGTWQPVDAAQVALGAIVRVKPGERIGLDGEIVAGRSTVNQAPITGESLPVEKAEGDAVYAGTINEAGSFEYRVTAAASNTTLARIIHAVEEAQGAKAPTQRFVDSFARVYTPIVFAIALVVAIAPPLVLDGAWRDWIYRALVLLVIACPCALVISTPVTIVSGLAAAARRGILVKGGVYLEQGRRLAWLALDKTGTITRGKPVQTDFEMRAANVDAALVRGLAARLAARSDHPVSQAVAAASAAQAGAGGAPRAKPASFADVADFEAIPGRGVRGKIDGVPYWLGNHRLVEELDCCTSALEARLDELERQGKTVVMLIDGARVLGLFAVADTVKDTSRAAVAELHALGIKTAMLTGDNPHTAQAIAQQVGIDDARGNQLPQDKLAAVEALAAGGRAVGMVGDGINDAPALARADIGFAMGAMGTDTAIETADVALMDDDLRKIPAFVRLSRATHRVLVQNIAFALAVKAVFVGLTVAGMGTMWMAVFADAGASLIVVGNGLRLLRRGQ',
    'MKTETKTPPPVTTRTYVIEGLDCADCARKIEDAVQRLPGVQEARVSLATERLTVTSTDGSLGTDTLNQLLSPLGYRVRDPEASMPKPTPWYRTPKGKSVLLAGTLLAVGILTDILGLAESRLAYTLGTLIGVLPLARKGWANLRQGGFFDINVLVTLAAVGALFIEAEVEALIVVFLFLVGELLESIAAERARASVKALTQLIPETARLIQDGQEIEVPASELRPGHRVRVLPGMRVPADGTILEGESAVDESMLTGEPIPVPKGPGDSVYAGTVNTEGAMVVRVERGPEDHLAARILRLIEEAEATKSPTVRFIDRFSRYYTPAIVGIALLVALLPPLLFNAPWEVWTYRALALLLIGCPCALVLSAPAAITSGLARAARMGLLIKGGAALERIGQVRVVALDKTGTLTQGTPAVEAIQAPNPRELLRLAAAVEQYSTHPLAAAIVRKAQEEGIQPPPASEVRTTAGKFIEGQVEGRHVWLGSPRYAPAPVPDRTDGTAVAVFVDGKYSGLIVLRDQLRPDARQGIARMKALGIHPVMLTGDHTAAAQHVARELGMDYRAELLPEDKLRILQELKAEGPVAFVGDGINDAPALAAADVGIAMGGGTDAALESADAALVEPRITRIADLIGLSRAALSNIRQNIAVALGLKAVFLVTTLAGLTGLWLAILADTGATLIVTANALRLLRFTPPRL',
    'MAEKTVYRVDGLSCTNCAAKFERNVKEIEGVTEAIVNFGASKITVTGEASIQQVEQAGAFEHLKIIPEKESFTDPEHFTDHQSFIRKNWRLLLSGLFIAVGYASQIMNGEDFYLTNALFIFAIFIGGYSLFKEGFKNLLKFEFTMETLMTIAIIGAAFIGEWAEGSIVVILFAVSEALERYSMDKARQSIRSLMDIAPKEALVRRSGTDRMVHVDDIQIGDIMIIKPGQKIAMDGHVVKGYSAVNQAAITGESIPVEKNIDDSVFAGTLNEEGLLEVAVTKRVEDTTISKIIHLVEEAQGERAPAQAFVDTFAKYYTPAIIVIAALIATVPPLLFGGNWETWVYQGLSVLVVGCPCALVVSTPVAIVTAIGNAAKNGVLVKGGVYLEEIGGLKAIAFDKTGTLTKGVPVVTDYIELTEATNIQHNKNYIIMAALEQLSQHPLASAIIKYGETREMDLTSINVNDFTSITGKGIRGTVDGNTYYVGSPVLFKELLASQFTDSIHRQVSDLQLKGKTAMLFGTNQKLISIVAVADEVRSSSQHVIKRLHELGIEKTIMLTGDNQATAQAIGQQVGVSEIEGELMPQDKLDYIKQLKINFGKVAMVGDGINDAPALAAATVGIAMGGAGTDTAIETADVALMGDDLQKLPFTVKLSRKTLQIIKQNITFSLVIKLIALLLVIPGWLTLWIAIMADMGATLLVTLNGLRLMKVKD',
    'MDCSSCARTIEKALSPLDEVTNPKVNFSTGKLTVGLKSQNDINQVTQTVRKLGYDVEETKTNSKYITFSVEGMDCGSCAKSIEKHLNNLSYVNDAQVSFSTGKMQVDFEGNKTKNIEKEVSKIGYSATLSPTKKSSNSKWRVFRKPIISTLFLILGLVVTLTTLPVLIANLMYIIAIIVSGIKPLKSAYYAIKSKSLDMNVLMSVAVIGAIFIGEYFEGAIVVLLFTIGTLLQTISIDKTRNSIQSLMDITSTTANVITENGTTTKDLTDIRVGEILLIKPGDRVPLDGTITDGSSSLNQAPITGESIPVDKTINDEVYAGSINENGTLYIRVSKLVEDTTLSKIIHMVEEAQENKAPTQAFIDRFSEIYTPIVFVLALLVMVIPPLFSLGTWGEWLYKGLELLVIACPCALVISTPVAIVTAIGSAAKNGVLIKGGNHLEGLGTLSALAFDKTGTLTEGRPKVDTIKTIDANETTLLNIAMSLESYSTHPISNAIVDYAMQLNVKKAYVTDFENIVGQGIKGKINESYVYAGNVKLIESINKKINNYKEEINKYEQEGFTVIIIASSSMIHGLITIADPLRSNIKQIIQQLNGTHIKNTIMLTGDNKSTAQKIAQLSGIKEVYAELMPGDKLAAIKDLQNKGYRVAMIGDGINDAPALAQSDVGIAMGGIGSDTAMETADVVLMSDDINQLTRTISISKKAKNIIKQNIYFSIIIKLIAFILVFPGLLTLWLAVLSDTGAAILVILNSLRLLRNRNNN',
);

{
    my $infile = file('test', 'AhHMA4_clustalw.ali');
    my $ali = $class->load($infile);
    isa_ok $ali, $class, $infile;
    is $ali->filename, $infile, 'got expected filename for Ali file';
    is $ali->count_comments, 0, 'read expected number of comments';
    is $ali->count_seqs, 5, 'read expected number of seqs';
    is $ali->header, <<'EOT', 'got expected empty header';
#
#
EOT
    is_deeply [ map { $_->full_id } $ali->all_seq_ids ], \@exp_full_ids,
        'got expected full_ids from SeqIds';
    is_deeply [ map { $_->full_id } $ali->all_seqs ], \@exp_full_ids,
        'got expected full_ids directly from Seqs';
    is_deeply [ map { $_->seq }     $ali->all_seqs ], \@exp_seqs,
        'got expected Seqs';

    $ali->spacify_seqs;
    is_deeply [ map { $_->seq }     $ali->all_seqs ], \@space_seqs,
        'got expected spacified Seqs';

    $ali->trim_seqs;
    is_deeply [ map { $_->seq }     $ali->all_seqs ], \@trim_seqs,
        'got expected trimmed Seqs';

    $ali->pad_seqs;
    is_deeply [ map { $_->seq }     $ali->all_seqs ], \@pad_seqs,
        'got expected padded Seqs';

    cmp_ok $ali->perc_miss, '==', 100.0 * (4275-3816) / 4275,
        'got expected missing %';

    my $ali2 = $class->load($infile);
    $ali2->uniformize;
    is_deeply [ map { $_->seq }     $ali2->all_seqs ], \@pad_seqs,
        'got expected uniformized Seqs';

    my $ali3 = $class->load($infile);
    $ali3->degap_seqs;
    is_deeply [ map { $_->seq }     $ali3->all_seqs ], \@raw_seqs,
        'got expected degapped Seqs';
}

{
    # accessors
    my $infile = file('test', 'AhHMA4_clustalw.ali');
    my $ali = $class->load($infile);
    my @long_seqs = $ali->filter_seqs( sub { $_->nomiss_seq_len > 800 } );
    cmp_ok @long_seqs, '==', 2, 'rightly filtered seqs based on length';

    my $index = int( rand(@exp_full_ids) );
    explain $index;
    my $full_id = $exp_full_ids[$index];
    my $exp_seq = $exp_seqs[$index];
    my $got_seq = $ali->get_seq_with_id($full_id)->seq;
    cmp_ok $got_seq, 'eq', $exp_seq, "got expected seq with id $full_id";

    my $mis_seq = $ali->get_seq_with_id('missing-id');
    ok !defined $mis_seq, 'got expected undef for missing seq';
}

{
    my $infile = file('test', 'new_seqs.ali');
    my $ali = $class->load($infile);

    my $exp_new_seq_ids = [
        'Arabidopsis thaliana@ABC123#NEW#',
        'Arabidopsis thaliana@XWZ789#NEW#',
    ];

    is_deeply [ map { $_->full_id } $ali->all_new_seqs ],     $exp_new_seq_ids,
        'got expected seq_ids for new seqs';

    my $exp_pre_seq_ids = [
        'Arabidopsis thaliana@SOS777',
    ];

    is_deeply [ map { $_->full_id } $ali->all_but_new_seqs ], $exp_pre_seq_ids,
        'got expected seq_ids for preexisting seqs';

    $ali->clear_new_tags;
    cmp_ok $ali->all_new_seqs, '==', 0, 'rightly cleared new tags';
}

{
    my $infile = file('test', 'AhHMA4_clustalw.ali');
    my $ali = $class->load($infile);
    cmp_store(
        obj => $ali, method => 'store',
        file => 'AhHMA4_clustalw_uni.ali',
        test => 'wrote expected aligned Ali',
    );
    cmp_store(
        obj => $ali, method => 'store',
        file => 'AhHMA4_clustalw.fasta',
        test => 'wrote expected aligned .fasta file',
    );
}

{
    my $infile = file('test', 'sparse_seqs.ali');
    my $ali = $class->load($infile);
    cmp_store(
        obj => $ali, method => 'store',
        file => 'sparse_seqs_pad.ali',
        test => 'wrote expected sparsely aligned Ali',
    );
    cmp_store(
        obj => $ali, method => 'store_fasta',
        file => 'sparse_seqs_pad.fasta',
        test => 'wrote expected sparsely aligned .fasta file',
    );
}

{
    my $infile = file('test', 'phylip.ali');
    my $ali = $class->load($infile);
    cmp_store(
        obj => $ali, method => 'store_phylip',
        file => 'phylip.phy',
        test => 'wrote expected .phy file',
        args => { clean => 1 },
    );
}

{
    my $infile = file('test', 'phylip.ali');
    my $ali = $class->load($infile);
    cmp_store(
        obj => $ali, method => 'store_phylip',
        file => 'phylip.p80',
        test => 'wrote expected .p80 file',
        args => { clean => 1, short => 0, chunk => -1 },
    );
}

{
    my $infile = file('test', 'phylip.phy');
    my $ali = $class->load_phylip($infile);
    cmp_store(
        obj => $ali, method => 'store_fasta',
        file => 'phylip.fasta',
        test => 'read expected .phy file',
    );
}

{
    my $infile = file('test', 'phylip-lr.phy');
    my $ali = $class->load_phylip($infile);
    cmp_store(
        obj => $ali, method => 'store_fasta',
        file => 'phylip.fasta',
        test => 'read expected .phy file (left justified)',
    );
}

{
    my $infile = file('test', 'phylip-rep.phy');
    my $ali = $class->load_phylip($infile);
    cmp_store(
        obj => $ali, method => 'store_fasta',
        file => 'phylip.fasta',
        test => 'read expected .phy file (with repeated ids)',
    );
}

{
    for my $max_res (1, 2, 0.25, 0.5) {
        my $infile = file('test', 'nuclideal.ali');
        my $ali = $class->load($infile);

        # Note: this indirectly tests apply_mask method!
        $ali->idealize($max_res);
        cmp_store(
            obj => $ali, method => 'store',
            file => "nuclideal_${max_res}.ali",
            test => 'wrote expected idealized nucleotide Ali',
        );
    }
}

{
    for my $max_res (1, 2, 0.25, 0.5) {
        my $infile = file('test', 'protideal.ali');
        my $ali = $class->load($infile);

        # Note: this indirectly tests apply_mask method!
        $ali->idealize($max_res);
        cmp_store(
            obj => $ali, method => 'store',
            file => "protideal_${max_res}.ali",
            test => 'wrote expected idealized protein Ali',
        );
    }
}

{
    for my $max_res (0.1, 0.3, 0.5, 0.7, 0.9) {
        my $infile = file('test', 'superideal.ali');
        my $ali = $class->load($infile);

        # Note: this indirectly tests apply_mask method!
        $ali->idealize($max_res);
        cmp_store(
            obj => $ali, method => 'store',
            file => "superideal_${max_res}.ali",
            test => 'wrote expected idealized AA supermatrix Ali',
        );
    }
}

{
    for my $char (undef, 'X') {
        my $infile = file('test', 'gapify.ali');
        my $ali = $class->load($infile);

        $ali->gapify_seqs($char);
        cmp_store(
            obj => $ali, method => 'store',
            file => 'gapify_' . ($char // 'gap') . '.ali',
            test => 'wrote expected gapified Ali',
        );
    }
}

{
    for my $char ('*', undef) {
        my $infile = file('test', 'gapify.ali');
        my $ali = $class->load($infile);

        cmp_store(
            obj => $ali, method => 'store_fasta',
            file => 'gapify_' . ($char ? 'gap' : 'X') . '.fasta',
            test => 'wrote expected gapified fasta through store_fasta',
            args => { clean => 1, gapify => $char },
        );
    }
}


# TODO: test apply_mask and apply_list here?



{
    for my $stk ( qw(cbs upsk) ) {
        my $infile = "test/$stk.stockholm";
        my $ali = $class->load_stockholm($infile);
        is $ali->filename, $infile, 'got expected filename from stockholm file';
        my $flag = $ali->is_protein;
        ok( $stk eq 'cbs' ? $flag : !$flag,
            "rightly detected alignment type: $infile" );
        cmp_store(
            obj => $ali, method => 'store',
            file => "$stk.ali",
            test => 'wrote expected Ali from stockholm',
        );
    }
}

{
    my $infile = file('test', 'uniq.ali');
    my $ali = $class->load($infile);
    ok not($ali->has_uniq_ids), 'rightly detected duplicate ids';
    cmp_store(
        obj => $ali, method => 'store',
        file => 'uniq.ali',
        test => 'wrote expected Ali in spite of duplicate ids',
    );
}

{
    my $infile = file('test', 'unaligned.ali');
    my $ali = $class->load($infile);
    ok not($ali->is_aligned), 'rightly detected lack of alignment';
    cmp_ok $ali->width, '==', 25, 'got expected unaligned Ali width';
    cmp_store(
        obj => $ali, method => 'store',
        file => 'unaligned.ali',
        test => 'wrote expected unaligned Ali',
    );
}

my @exp_nomiss_lens = (56, 54, 48, 54, 44, 50, 48, 50, 52, 34);

{
    my $infile = file('test', 'complete.ali');
    my $ali = $class->load($infile);

    $ali->dont_guess;
    ok !$ali->is_aligned, 'rightly overrided guessing of alignment';
    cmp_store(
        obj => $ali, method => 'store_fasta',
        file => 'complete.fasta',
        test => 'wrote expected unaltered FASTA from non-guessing Ali',
    );

    $ali->guess;
    ok $ali->is_aligned, 'rightly detected alignment';
    cmp_ok $ali->width, '==', 56, 'got expected Ali width';
    is_deeply [ map { $_->nomiss_seq_len } $ali->all_seqs ], \@exp_nomiss_lens,
        'got expected seq lengths (excluding gaps and missing chars)';
}

{
    my $infile = file('test', 'AhHMA4_clustalw.ali');
    my $ali = $class->load($infile);

    cmp_store(
        obj => $ali, method => 'temp_fasta',
        file => 'temp_degap.fasta',
        test => 'wrote expected temp degapped .fasta file',
        args => { degap => 1 },
    );

    is_deeply [ map { $_->full_id } $ali->all_seq_ids ], \@exp_full_ids,
        'rightly restored original ids after temp file creation';

    cmp_store(
        obj => $ali, method => 'temp_fasta',
        file => 'temp_degap_nowrap.fasta',
        test => 'wrote expected temp degapped and unwrapped.fasta file',
        args => { degap => 1, chunk => -1 },
    );

    cmp_store(
        obj => $ali, method => 'temp_fasta',
        file => 'temp.fasta',
        test => 'wrote expected temp .fasta file (no degapping side-effect)',
    );

    cmp_store(
        obj => $ali, method => 'temp_fasta',
        file => 'temp_prefix.fasta',
        test => 'wrote expected temp .fasta file (custom std_mapper prefix)',
        args => { id_prefix => 'myseq' },
    );
}

# TODO: check that mutators indeed return the Ali object?

# TODO: consider keeping empty seqs when parsing Ali (not that easy)
# {
#     my $infile = file('test', 'empty_seqs.ali');
#     my $ali = $class->load($infile);
#     explain $ali;
#     cmp_ok $ali->count_seqs, '==', 3;
#     ok(List::AllUtils::all { $_->seq_len == 0 } $ali->all_seqs),
#         'rightly read an Ali full of empty seqs';
# }

# uc_seqs
{
    my $infile = file('test', 'seqs4uc.ali');
    my $ali = $class->load($infile);

    my $ali_uc = $ali->uc_seqs;

    cmp_store(
       obj  => $ali_uc, method => 'store',
       file => 'uc_seqs.ali',
       test => 'wrote expected uppercased Ali',
   );
}

# recode_seqs
{
    my $infile = file('test', 'uc_seqs.ali');
    my $ali = $class->load($infile);

    my %base_for = (
        A => 'A',
        G => 'A',
        C => 'C',
        T => 'C',
    );

    my $ali_rec = $ali->recode_seqs( \%base_for );

    cmp_store(
       obj  => $ali_rec, method => 'store',
       file => 'recode_seqs.ali',
       test => 'wrote expected recoded Ali',
   );
}

# map_coords
{
    my $infile = file('test', 'map_coords.ali');
    my $ali = $class->load($infile);
    my $id = 'GIV-Norovirus Hum.GIV.1.POL_1338688@508124125';

    my $got_coords = $ali->map_coords($id, [ 4, 25, 73, 89, 104, 116 ] );
    my $exp_coords = [ 3, 23, 59, 71, 71, 74 ];

    is_deeply $got_coords, $exp_coords,
        'got expected mapped coordinates';
}

# seq_len_stats
{
    my $infile = file('test', 'gblocks.fasta');
    my $ali = $class->load($infile);
    my @stats = $ali->seq_len_stats;

    is_deeply \@stats, [ 711, 874, 1013, 1062, 1076 ],
        'got expected seq len stats';
}

{
    my $chunk = 200;

    my $split = sub {
        my $seq = shift;
        my $base_id = ( split /\s+/xms, $seq->full_id )[0];
        my $max_pos = $seq->seq_len - $chunk;
        my $n = 0;
        my $out_str;
        for (my $pos = 0; $pos <= $max_pos; $pos += $chunk, $n++) {
            $out_str .= ">$base_id.$n\n" . $seq->edit_seq($pos,
                $pos + $chunk <= $max_pos ? $chunk : 2 * $chunk
            ) . "\n";
        }
        return $out_str;
    };

    cmp_store(
        obj  => $class, method => 'instant_store',
        file => 'outsplit.fasta',
        test => 'got expected instantly transformed FASTA file',
        args => { infile => file('test', 'insplit.fasta'), coderef => $split },
    );
}

done_testing;
