From 2bc8fb113b4062677b4c72f55fd4de51b15f09c0 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Fri, 15 Aug 2025 12:23:36 +0200 Subject: [PATCH 1/2] nextclade: first stab at nextclade genome dataset --- nextclade_genome/Snakefile | 224 +++ .../defaults/auspice_config_genome.json | 70 + nextclade_genome/defaults/clades_genome.tsv | 52 + nextclade_genome/defaults/colors.tsv | 59 + nextclade_genome/defaults/config.yaml | 19 + nextclade_genome/defaults/dropped_strains.txt | 0 .../defaults/genome_dataset/CHANGELOG.md | 3 + .../defaults/genome_dataset/README.md | 31 + .../defaults/genome_dataset/annotation.gff3 | 8 + .../defaults/genome_dataset/pathogen.json | 64 + .../defaults/genome_dataset/reference.fasta | 164 +++ .../defaults/genome_dataset/sequences.fasta | 1250 +++++++++++++++++ nextclade_genome/defaults/include_strains.txt | 101 ++ nextclade_genome/defaults/reference_genome.gb | 220 +++ 14 files changed, 2265 insertions(+) create mode 100644 nextclade_genome/Snakefile create mode 100644 nextclade_genome/defaults/auspice_config_genome.json create mode 100644 nextclade_genome/defaults/clades_genome.tsv create mode 100644 nextclade_genome/defaults/colors.tsv create mode 100644 nextclade_genome/defaults/config.yaml create mode 100644 nextclade_genome/defaults/dropped_strains.txt create mode 100644 nextclade_genome/defaults/genome_dataset/CHANGELOG.md create mode 100644 nextclade_genome/defaults/genome_dataset/README.md create mode 100644 nextclade_genome/defaults/genome_dataset/annotation.gff3 create mode 100644 nextclade_genome/defaults/genome_dataset/pathogen.json create mode 100644 nextclade_genome/defaults/genome_dataset/reference.fasta create mode 100644 nextclade_genome/defaults/genome_dataset/sequences.fasta create mode 100644 nextclade_genome/defaults/include_strains.txt create mode 100644 nextclade_genome/defaults/reference_genome.gb diff --git a/nextclade_genome/Snakefile b/nextclade_genome/Snakefile new file mode 100644 index 0000000..8e8f9ee --- /dev/null +++ b/nextclade_genome/Snakefile @@ -0,0 +1,224 @@ + +configfile: os.path.join(workflow.basedir, "defaults/config.yaml") + +rule all: + input: + tree_json = "dataset/genome/tree.json" + + +rule download: + """Downloading sequences and metadata from data.nextstrain.org""" + output: + sequences = "data/sequences.fasta.zst", + metadata = "data/metadata.tsv.zst" + params: + sequences_url = "https://data.nextstrain.org/files/workflows/rubella/sequences.fasta.zst", + metadata_url = "https://data.nextstrain.org/files/workflows/rubella/metadata.tsv.zst" + shell: + """ + curl -fsSL --compressed {params.sequences_url:q} --output {output.sequences} + curl -fsSL --compressed {params.metadata_url:q} --output {output.metadata} + """ + +rule decompress: + """Decompressing sequences and metadata""" + input: + sequences = "data/sequences.fasta.zst", + metadata = "data/metadata.tsv.zst" + output: + sequences = "data/sequences.fasta", + metadata = "data/metadata.tsv" + shell: + """ + zstd -d -c {input.sequences} > {output.sequences} + zstd -d -c {input.metadata} > {output.metadata} + """ + +rule filter: + input: + sequences = "data/sequences.fasta", + metadata = "data/metadata.tsv", + exclude = lambda w: config[w.build]["filter"]["exclude"], + include = lambda w: config[w.build]["filter"]["include"] + output: + sequences = "results/{build}/pre-filtered.fasta" + params: + group_by = lambda w: config[w.build]["filter"]["group_by"], + subsample_max_sequences = lambda w: config[w.build]["filter"]["subsample_max_sequences"], + min_date = lambda w: config[w.build]["filter"]["min_date"], + min_length = lambda w: config[w.build]["filter"]["min_length"], + strain_id = lambda w: config[w.build]["strain_id_field"] + shell: + """ + augur filter \ + --sequences {input.sequences} \ + --metadata {input.metadata} \ + --metadata-id-columns {params.strain_id} \ + --exclude {input.exclude} \ + --include {input.include} \ + --output-sequences {output.sequences} \ + --group-by {params.group_by} \ + --subsample-max-sequences {params.subsample_max_sequences} \ + --min-length {params.min_length} + """ + +rule align: + input: + sequences = "results/{build}/pre-filtered.fasta" + output: + sequences = "results/{build}/aligned.fasta", + translations = "results/{build}/translations/touch.txt" + params: + dataset = lambda w: config[w.build]['files']['dataset_files'], + translations = "results/{build}/translations" + threads: workflow.cores + shell: + """ + nextclade3 run \ + --jobs {threads} \ + --input-ref {params.dataset}/reference.fasta \ + --input-pathogen-json {params.dataset}/pathogen.json \ + --input-annotation {params.dataset}/annotation.gff3 \ + --output-fasta {output.sequences} \ + --output-translations {params.translations}/{{cds}}.fasta \ + --silent \ + {input.sequences} & touch {output.translations} + """ + + +rule tree: + """Building tree""" + input: + alignment = "results/{build}/aligned.fasta" + output: + tree = "results/{build}/tree_raw.nwk" + shell: + """ + augur tree \ + --alignment {input.alignment} \ + --output {output.tree} + """ + +rule refine: + """ + Refining tree + - estimate timetree + - use {params.coalescent} coalescent timescale + - estimate {params.date_inference} node dates + - filter tips more than {params.clock_filter_iqd} IQDs from clock expectation + """ + input: + tree = "results/{build}/tree_raw.nwk", + alignment = "results/{build}/aligned.fasta", + metadata = "data/metadata.tsv" + output: + tree = "results/{build}/tree.nwk", + node_data = "results/{build}/branch_lengths.json" + params: + strain_id = lambda w: config[w.build]["strain_id_field"] + shell: + """ + augur refine \ + --tree {input.tree} \ + --alignment {input.alignment} \ + --metadata {input.metadata} \ + --metadata-id-columns {params.strain_id} \ + --output-tree {output.tree} \ + --output-node-data {output.node_data} \ + --root mid_point + """ + +rule ancestral: + message: + """ + Reconstructing ancestral sequences and mutations + - inferring ambiguous mutations + """ + input: + tree="results/{build}/tree.nwk", + alignment="results/{build}/aligned.fasta", + annotation=lambda w: config[w.build]['files']['reference'], + reference= lambda w: config[w.build]['files']['dataset_files'] + '/reference.fasta', + output: + node_data= "results/{build}/muts.json", + params: + inference="joint", + translations= "results/{build}/translations/%GENE.fasta", + genes = lambda w: config[w.build]['genes'] + shell: + """ + augur ancestral \ + --tree {input.tree} \ + --alignment {input.alignment} \ + --inference {params.inference} \ + --infer-ambiguous \ + --genes {params.genes} \ + --annotation {input.annotation} \ + --translations {params.translations:q} \ + --root-sequence {input.reference} \ + --output-node-data {output.node_data} + """ + +rule clades: + input: + tree = "results/{build}/tree.nwk", + muts = "results/{build}/muts.json", + clade_defs = lambda w: config[w.build]["files"]["clades"] + output: + clades = "results/{build}/clades.json" + shell: + """ + augur clades \ + --tree {input.tree} \ + --mutations {input.muts} \ + --clades {input.clade_defs} \ + --output {output.clades} + """ + + +rule export: + """Exporting data files for auspice""" + input: + tree = "results/{build}/tree.nwk", + metadata = "data/metadata.tsv", + branch_lengths = "results/{build}/branch_lengths.json", + clades = "results/{build}/clades.json", + muts = "results/{build}/muts.json", + colors = lambda w: config[w.build]["files"]["colors"], + auspice_config = lambda w: config[w.build]["files"]["auspice_config"] + output: + auspice_json = "auspice/measles_{build}.json" + params: + strain_id = lambda w: config[w.build]["strain_id_field"], + metadata_columns = lambda w: config[w.build]["export"]["metadata_columns"] + shell: + """ + augur export v2 \ + --tree {input.tree} \ + --metadata {input.metadata} \ + --metadata-id-columns {params.strain_id} \ + --node-data {input.branch_lengths} {input.muts} {input.clades} \ + --colors {input.colors} \ + --metadata-columns {params.metadata_columns} \ + --auspice-config {input.auspice_config} \ + --include-root-sequence-inline \ + --output {output.auspice_json} + """ + + +rule assemble_dataset: + """Assembling the dataset for Nextstrain""" + input: + auspice_json = "auspice/measles_{build}.json", + output: + reference = "dataset/{build}/reference.fasta", + tree_json = "dataset/{build}/tree.json" + params: + dataset = lambda w: config[w.build]['files']['dataset_files'] + threads: workflow.cores + shell: + """ + cp {params.dataset}/* dataset/{wildcards.build}/ + cp {input.auspice_json} dataset/{wildcards.build}/tree.json + """ + diff --git a/nextclade_genome/defaults/auspice_config_genome.json b/nextclade_genome/defaults/auspice_config_genome.json new file mode 100644 index 0000000..219838a --- /dev/null +++ b/nextclade_genome/defaults/auspice_config_genome.json @@ -0,0 +1,70 @@ +{ + "title": "Nextclade dataset for Rubella virus -- full genome", + "maintainers": [ + {"name": "the Nextstrain team", "url": "https://nextstrain.org/team"} + ], + "build_url": "https://github.com/nextstrain/rubella", + "extensions": { + "nextclade": { + "ref_nodes":{ + "default":"__root__", + "search":[ + { + "name": "L78917", + "displayname": "Vaccine strain L78917 (RA 27/3)", + "description": "Show mutations relative to the vaccine strain RA 27/3", + "criteria":[ + { + "node": [{"name": ["L78917"]}] + } + ] + } + ] + } + } + }, + "colorings": [ + { + "key": "gt", + "title": "Genotype", + "type": "categorical" + }, + { + "key": "clade_membership", + "title": "Genotype (Nextstrain)", + "type": "categorical" + }, + { + "key": "region", + "title": "Region", + "type": "categorical" + }, + { + "key": "country", + "title": "Country", + "type": "categorical" + }, + { + "key": "is_reference", + "title": "WHO Reference", + "type": "categorical" + } + ], + "geo_resolutions": [ + "country", + "region" + ], + "display_defaults": { + "map_triplicate": true, + "color_by": "clade_membership" + }, + "filters": [ + "clade_membership", + "region", + "country", + "author" + ], + "metadata_columns": [ + "author" + ] +} diff --git a/nextclade_genome/defaults/clades_genome.tsv b/nextclade_genome/defaults/clades_genome.tsv new file mode 100644 index 0000000..b97a6ed --- /dev/null +++ b/nextclade_genome/defaults/clades_genome.tsv @@ -0,0 +1,52 @@ +clade gene site alt +# 1A nuc 8490 A +# 1A nuc 8745 T +# 1A(VAX) nuc 8327 T # including this excludes JF727653 +1A nuc 8884 C +# 1A nuc 8934 T ## this is the one that hits _very_ basal, bad choice + +1B nuc 8787 T +1B nuc 8781 A + +1C nuc 8781 T +1C nuc 8865 C +1C nuc 8994 C +1C nuc 9207 T + +1D nuc 8440 C +1D nuc 9303 T + +1E nuc 8301 G +1E nuc 8667 T +1E nuc 9231 A +1E nuc 9240 C + +1F nuc 8439 T +1F nuc 9306 T +1F nuc 9546 A +1F nuc 9651 C + +1G nuc 8265 C +1G nuc 8508 T +1G nuc 8577 T + +1H nuc 8388 A +1H nuc 8442 A +1H nuc 8637 T + +1I nuc 9012 A + +1J nuc 9255 G + +2A nuc 8565 T +2A nuc 8754 G +2A nuc 9249 C + +2B nuc 9213 C +2B nuc 9333 G +2B nuc 9348 A + +2C nuc 8376 C +2C nuc 8553 A +2C nuc 9027 T +2C nuc 9623 C diff --git a/nextclade_genome/defaults/colors.tsv b/nextclade_genome/defaults/colors.tsv new file mode 100644 index 0000000..e9fa118 --- /dev/null +++ b/nextclade_genome/defaults/colors.tsv @@ -0,0 +1,59 @@ +# Regions: These fields are identical to those in phylogenetic/defaults/colors.tsv. Any changes to one should also be made to the other. +region Asia #447CCD +region Oceania #5EA9A1 +region Africa #8ABB6A +region Europe #BEBB48 +region South America #E29E39 +region North America #E2562B +# +# MeV Genotypes reported in NCBI GenBank metadata: These fields are identical to those in phylogenetic/defaults/colors.tsv. Any changes to one should also be made to the other. +genotype_ncbi A #5E1D9D +genotype_ncbi B1 #4B26B1 +genotype_ncbi B2 #4138C3 +genotype_ncbi B3 #3F4FCC +genotype_ncbi C1 #4065CF +genotype_ncbi C2 #447ACD +genotype_ncbi D1 #4A8BC3 +genotype_ncbi D2 #529AB6 +genotype_ncbi D3 #5BA6A6 +genotype_ncbi D4 #66AE95 +genotype_ncbi D5 #73B583 +genotype_ncbi D6 #81B973 +genotype_ncbi D7 #91BC64 +genotype_ncbi D8 #A1BE58 +genotype_ncbi D9 #B1BD4E +genotype_ncbi D10 #C0BA47 +genotype_ncbi D11 #CEB541 +genotype_ncbi E #DAAD3D +genotype_ncbi F #E19F3A +genotype_ncbi G1 #E68E36 +genotype_ncbi G2 #E67832 +genotype_ncbi G3 #E35F2D +genotype_ncbi H1 #DF4328 +genotype_ncbi H2 #DB2823 +# +# MeV Genotypes assigned by augur clades +clade_membership A #5E1D9D +clade_membership B1 #4B26B1 +clade_membership B2 #4138C3 +clade_membership B3 #3F4FCC +clade_membership C1 #4065CF +clade_membership C2 #447ACD +clade_membership D1 #4A8BC3 +clade_membership D2 #529AB6 +clade_membership D3 #5BA6A6 +clade_membership D4 #66AE95 +clade_membership D5 #73B583 +clade_membership D6 #81B973 +clade_membership D7 #91BC64 +clade_membership D8 #A1BE58 +clade_membership D9 #B1BD4E +clade_membership D10 #C0BA47 +clade_membership D11 #CEB541 +clade_membership E #DAAD3D +clade_membership F #E19F3A +clade_membership G1 #E68E36 +clade_membership G2 #E67832 +clade_membership G3 #E35F2D +clade_membership H1 #DF4328 +clade_membership H2 #DB2823 diff --git a/nextclade_genome/defaults/config.yaml b/nextclade_genome/defaults/config.yaml new file mode 100644 index 0000000..d1dc09e --- /dev/null +++ b/nextclade_genome/defaults/config.yaml @@ -0,0 +1,19 @@ +genome: + files: + dataset_files: "defaults/genome_dataset" + reference: "defaults/reference_genome.gb" + clades: "defaults/clades_genome.tsv" + auspice_config: "defaults/auspice_config_genome.json" + colors: "defaults/colors.tsv" + filter: + exclude: "defaults/dropped_strains.txt" + include: "defaults/include_strains.txt" + group_by: "region" + subsample_max_sequences: 800 + min_date: 1950 + min_length: 9000 + strain_id_field: "accession" + genes: ['NSP', 'SP'] + export: + metadata_columns: "strain division location" + diff --git a/nextclade_genome/defaults/dropped_strains.txt b/nextclade_genome/defaults/dropped_strains.txt new file mode 100644 index 0000000..e69de29 diff --git a/nextclade_genome/defaults/genome_dataset/CHANGELOG.md b/nextclade_genome/defaults/genome_dataset/CHANGELOG.md new file mode 100644 index 0000000..f14c5a7 --- /dev/null +++ b/nextclade_genome/defaults/genome_dataset/CHANGELOG.md @@ -0,0 +1,3 @@ +## Unreleased + +Initial release. diff --git a/nextclade_genome/defaults/genome_dataset/README.md b/nextclade_genome/defaults/genome_dataset/README.md new file mode 100644 index 0000000..e6448c1 --- /dev/null +++ b/nextclade_genome/defaults/genome_dataset/README.md @@ -0,0 +1,31 @@ +# Measles dataset + +| Key | Value | +| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| name | Measles complete genome +| authors | [Nextstrain](https://nextstrain.org) | +| reference | NC_001498.1 | +| workflow | https://github.com/nextstrain/measles/tree/main/nextclade | +| path | `nextstrain/measles/genome/WHO-2012` | + + +## Scope of this dataset + +This dataset assigns genotypes to measles samples based on [criteria outlined by the WHO](https://www.who.int/publications/i/item/WER8709). +The WHO has defined 24 measles genotypes based on N gene and H gene sequences from 28 reference strains. For new measles samples, genotypes can be assigned based on genetic similarity to the reference strains in the "N450" region (a 450 bp region of the N gene). + +The reference tree used in this dataset includes sequences for the 28 reference strains, along with (nearly) complete genomes of other representative strains for most genotypes. +This dataset can be used to assign genotypes to any sequence that includes at least 400 bp of the N450 region, including whole genome sequences. +In addition, this dataset implements simple quality control metrics based to the amount of missing sequence, the number of ambiguous nucleotides, frameshifts or stop codons, and clusters of mutations relative to sequences in the reference tree. + +## Features + +This dataset supports: + +- Assignment of genotypes +- Phylogenetic placement +- Sequence quality control (QC) + +## What are Nextclade datasets + +Read more about Nextclade datasets in the Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html diff --git a/nextclade_genome/defaults/genome_dataset/annotation.gff3 b/nextclade_genome/defaults/genome_dataset/annotation.gff3 new file mode 100644 index 0000000..6af4594 --- /dev/null +++ b/nextclade_genome/defaults/genome_dataset/annotation.gff3 @@ -0,0 +1,8 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +##sequence-region NC_076948.1 1 9761 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=11041 +NC_076948.1 RefSeq region 1 9761 . + . ID=NC_076948.1:1..9761;Dbxref=taxon:11041;collection-date=2008;country=USA: North Dakota;gbkey=Src;genome=genomic;genotype=2B;mol_type=genomic RNA;nat-host=Homo sapiens;note=infection originated in India;strain=RVi/Bismarck.ND.USA/23.08/2B +NC_076948.1 RefSeq CDS 41 6391 . + 0 Name=NSP;gbkey=CDS;gene=NSP;locus_tag=QKT28_gp1;protein_id=YP_010801281.1;ID=cds-YP_010801281.1;product=non-structural polyprotein p200;Dbxref=GenBank:YP_010801281.1,GeneID:80539971 +NC_076948.1 RefSeq CDS 6511 9702 . + 0 Name=SP;gene=SP;gbkey=CDS;locus_tag=QKT28_gp2;protein_id=YP_010801282.1;ID=cds-YP_010801282.1;product=structural polyprotein;Dbxref=GenBank:YP_010801282.1,GeneID:80539970 diff --git a/nextclade_genome/defaults/genome_dataset/pathogen.json b/nextclade_genome/defaults/genome_dataset/pathogen.json new file mode 100644 index 0000000..366282a --- /dev/null +++ b/nextclade_genome/defaults/genome_dataset/pathogen.json @@ -0,0 +1,64 @@ +{ + "schemaVersion": "3.0.0", + "files": { + "reference": "reference.fasta", + "pathogenJson": "pathogen.json", + "genomeAnnotation": "annotation.gff3", + "treeJson": "tree.json", + "changelog": "CHANGELOG.md", + "readme": "README.md", + "examples": "sequences.fasta" + }, + "compatibility": { + "cli": "3.0.0-alpha.0", + "web": "3.0.0-alpha.0" + }, + "shortcuts": ["rubella"], + "defaultCds": "N", + "attributes": { + "name": "Rubella", + "reference name": "Ichinose-B95a", + "reference accession": "NC_001498 " + }, + "alignmentParams":{ + "retryReverseComplement": true, + "penaltyGapExtend": 1, + "penaltyGapOpen": 10, + "penaltyGapOpenInFrame": 15, + "penaltyGapOpenOutOfFrame": 17, + "excessBandwidth": 20, + "minSeedCover": 0.1, + "kmerLength": 6, + "kmerDistance": 25, + "minMatchLength": 30, + "allowedMismatches": 15, + "windowSize": 30 + }, + "qc": { + "missingData": { + "enabled": true, + "missingDataThreshold": 2000, + "scoreBias": 500, + "scoreWeight": 50 + }, + "snpClusters": { + "enabled": true, + "windowSize": 100, + "clusterCutOff": 6, + "scoreWeight": 50 + }, + "mixedSites": { + "enabled": true, + "mixedSitesThreshold": 15, + "scoreWeight": 50 + }, + "frameShifts": { + "enabled": true, + "scoreWeight": 20 + }, + "stopCodons": { + "enabled": true, + "scoreWeight": 50 + } + } +} diff --git a/nextclade_genome/defaults/genome_dataset/reference.fasta b/nextclade_genome/defaults/genome_dataset/reference.fasta new file mode 100644 index 0000000..86b2328 --- /dev/null +++ b/nextclade_genome/defaults/genome_dataset/reference.fasta @@ -0,0 +1,164 @@ +>NC_076948.1 Rubella virus strain RVi/Bismarck.ND.USA/23.08/2B, complete genome +CAATGGGAGCTAACGGACCTCGCTTAGGACTCCTATTCCCATGGAGAAACTCCTAGATGA +GGTTCTTGCCCCCGGTGGGCCTTATAACTTAACCGTCGGCAGTTGGGTAAGAGACCATGT +CCGCTCAATTGTCGAGGGTGCGTGGGAAGTGCGCGATGTTGTTACCGCTGCCCAAAAGCG +GGCCATCGTAGCTGTGATACCCAGACCTGTTTTCACGCAGATGCAGGTTAGTGATCACCC +AGCACTCCACGCAATTTCGCGGTATACCCGCCGCCATTGGATCGAGTGGGGCCCTAAAGA +AGCCCTACACGTCCTCATCGACCCCAGCCCGGGCCTGCTCCGCGAGGTCGCTCGCGTCGA +GCGCCGCTGGGTCGCATTGTGCCTCCACAGGACGGCACGCAAACTCGCCACCGCCCTGGC +CGAGACGGCCGGTGAGGCGTGGCACGCCGACTACGTGTGCGCGCTGCGTGGCGCACCGAG +CGGCCCCTTTTACGTCCACCCAGAGGACGTCCCGCACGGCGGTCGCGCCGTGGCGGACAG +ATGCTTGCTTTACTACACACCCATGCAGATGTGCGAGCTGATGCGCACCATCGACGCCAC +CCTGCTTGTGGCGGTTGACTTGTGGCCGGTCGCCCTTGCGGCGCACGTCGGTGACGACTG +GGACGACCTGGGCATCGCCTGGCACCTCGACCACGACGGGGGTTGCCCCGCCGACTGCCG +TGGCGCCGGCGCTGGGCCCACGCCCGGCTACACCCGCCCCTGCACTACGCGCATCTACCA +GGTCCTGCCGGACACCGCCCACCCCGGGCGCCTCTACCGGTGCGGGCCCCGCCTGTGGAC +ACGCGATTGCGCCGTAGCCGAACTTTCATGGGAGGTCGCCCAGCATTGCGGGCACCAGGC +GCGTATTCGCGCCGTGCGATGCGCCCTCGCCATTCGCCACGTGCGCAGCCTCCAACCCAG +CGCGCGCGTCCGGCTCCCGGACCTTGTCCATCTCGCCGAGGTGGGCCGGTGGCGGTGGTT +CAGCCTCCCCCGCCCCGTCTTTCAGCGTATGCTGTCCTATTGCAAGACCCTGAGCCCGGA +CGCGTACTACAGCGAGCGCGTGTTCAAGTTCAAGAACGCCCTGAGTCACAGCATCACGCT +CGCGGGCAACGTGCTGCAGGAGGGGTGGAAGGGCACGTGCGCCGAGGAGGACGCGTTGTG +TGCGTACGTGGCCTTCCGCGCGTGGCAGTCCAACGCGAGGCTGGCCGGGATCATGAAAGG +CGCGAAGCGCTGCGCCGCCGACTCCTTGAGCGTGGCCGGCTGGCTTGACACCATCTGGGA +CGCCATCAAGCGGTTCTTCGGCAGCGTGCCCCTCGCCGAGCGCATGGAGGAGTGGGAGCA +GGACGCCGCGGTCGCGGCCTTTGATCGCGGCCCCCTTGAGGACGGCGGGCACCACTTGGT +AACCGTGCAACCTCCCAAGCCGCCGCCCCGCCCCGAGATCGCCGCAACGTGGATTGTCCA +CGCCGCGAGCGCAGACCGCCACTGTGCGTGCGCCCCCCGCTGCGACGTCCCACGCGAACG +CCCCTCTGCGCCTCCGGGCCCGCCAGATGACGAGGCGCTCATCCCGCCGTGGCTGTTTGC +CGAGCACCGCGCCCTTCGTTGCCGCGAGTGGGATTTCGAGGCCCTTCGCGCGCGCGCCGA +CACGGCGGCCACGCCCGCCCCACTAGCCCCACGCCCCGCGCGCCACCCCACCGTGCTCTA +CCGCCACCCCGCCCACCACGGCCCGTGGCTCACTCTTGACGAGCCCGGCGAGGCTGACGC +GGCCCTGGTCTTATGCGACCCACTTGGCCAGCCGCTCCGGGGCCCGGAGCGCCATTTCAC +CGCCGGCGCGCACATGTGCGCGCAGGCGCGGGGGCTCCAGGCCTTTGTCCGCGTCGTGCC +CCCGCCCGAGCGCCCCTGGGCTGACGGGGGCGCTAGAACGTGGGCGAAGTTCTTCCGCAG +CTGCGCCTGGGCACAGCGCTTGCTCGGCGAGCCGGCAGTCATGCACCTCCCGTACACCGA +CGGCGACGTGCCGCAGCTGATTGCGCTGGCCCTGCGCACGCTGGCCCAACAGGGGGCCGC +CTTGGCACTCTCGGTGCGTGACCTGCCCGGGGGCGCGGCGTTCGACGCCAACGCGGTCAC +CGCCGCCGTGCGCGCGGGCTCCAGTCAGCCCGCGCCGGCCGCACCCCCCCCGGACAACTC +CTCGCCACCACGCCGTGCACGGCGGTCGCGGCGACCCCCGGACACCCGCGGCCCTCCGCC +CTCCGCGCCCGCCCGCGACCCGCCGCCGCCCGCCCCCAGCCCGCCCACGCCACCCCGCGC +GCGCGGCCCGGCCCCAGCCACCCTAGAGGAGCCCGCGGATCGCGCGCGTGACGCCGAGCA +GGAGGTTGCCTGCGACCCGAGCGGCCCCGCCGCGCCAACCAGGGCGGACCCAAACAGCGA +CATCGTCGAAAGCTACGCCCGCGCCGCCGGTCCTGTGCACCTTCGCGTCCGCGACATCAT +GGACCCCCCACCTGGTTGCAAGGTCGTGGTCAATGCCGCGAACGAGGGGCTGCTCGCCGG +ATCTGGCGTCTGCGGTGCCATCTTCGCCAAAGCCGCCGCGGCCCTCGCCGAGGACTGCCG +GCGCCTCGCCCCATGCCCCACCGGCGAGGCGGTGGCGACGCCCGGCCACGGCTGCGGGTA +TGCGCACATCATCCACGCGGTCGCTCCTCGGCGCCCTCCGGACCCCGCCGCCCTCGAGCA +GCGCGAAGCCCTGCTCGAACGCGCCTACCGCAGCATCGTCGCGCTGGCCGCCGCGCGGGG +GTGGGCGTGTGTTGCCTGCCCCCTCCTCGGCGCTGGCGTCTACGGCTGGTCTGCCGCGGA +GTCCCTTCGAGCCGCGCTCGCGGCCACGCGCGCCGAGCCCGCCGAGCGCGTGAGCCTGCA +TATATGCCACCCCGACCGCGCCACGCTGACGCACGCCTCCGTGCTCGTGGGCGCGGGGCT +CGCTGCCAGGCGCGTCAGCCCCCCTCCGACCGAGCCCCTCGCACCCCGCCCCGTCGGCGA +CCCGGGCCGATCGACCCAGCGCAGCGCGTCACCCCCGGCCACCCCCTCTGGGGAGGCCAC +CGCGCCTGAGTCGCGCGGGTGCCAGGGGTGCGAACTCTGCCGGTACACGCGCGTCACCAA +TGACCGCGCGTATGTCAACCTCTGGCTCGAGCGCGACCGCGGCGCCACCGGTTGGGCGAT +GCGCATCCCCGAAGTGGTCGTCTACGGGCCGGAACACCTCGCCGCGCACTTTCCATTAAA +CCACTACAGTGTGCTCAAGCCCGCGGAGGTGAGGCCCCCGCGGGGCATGTGCGGGAGTGA +CATGTGGCGCTGCCGCGGCTGGCAGGGCATGCCGCAGGTGCGGTGCACCCCCTCCAATGC +TCACGCCGCCCTGTGCCGCACAGGCGTGCCCCCTCGAGTGAGCACGCGAGGCGATGAGCG +AGACCCTAACACCTGCTGGCTCCGCGCCGCCGCCAACGTTGCGCAGGCTGCGCGCGCCTG +CGGCGCCTACACGAGTGCCGGGTGCCCCCGGTGCGCCTACGGCCGCGCTCTCAGCGAAGC +CCGCACCCACGAGGACTTCGCCGCCCTGAGCCAGCGGTGGAGCGCAAGCCACGCCGACGC +CTCCCCTGACGGCACCGGAGACCCCCTCGACCCCCTGATGGAGACCGTGGGCTGCGCCTG +TTCGCGCGTATGGGTCGGCTCCGAGCACGAGGCCCCCCCCGACCACCTCCTGGTGTCCCT +CCACCGTGCGCCCAACGGTCCGTGGGGCGTGGTGCTCGAAGTGCGCGCGCGCCCCGAGGG +GGGTAACCCCACCGGCCACTTTGTTTGCGCGGTCGGTGGCGGCCCACGCCGTGTTTCGGA +CCGCCCCCACCTTTGGCTCGCGGTTCCACTCTCCCGGGGCGGAGGCACTTGCGCCGCGAC +CGACGAGGGGCTGGCCCAGGCGTATTACGACGACCTCGAGGTGCGCCGCCTCGGGGACGA +CGCCATGGCTCGGGCGGCCCTCGCCGCGGTCCAACGCCCCCGCAAGGGCCCCTACAACAT +CATGGTATGGAACATGGCCGCGGGCGCTGGCAAGACCACTCGCATCCTCGCCGCCTTCAC +GCGCGAAGACCTGTACGTCTGCCCCACCAACGCGCTCCTGCACGAGATCCAGGCCAAACT +CCGCGCGCGCGACATCGACATCAAGAACGCCGCCACCTACGAGCGCGCGCTGACGAAACC +GCTTGCCGCCTACCGCCGTATCTACATCGACGAGGCGTTCACCCTCGGCGGCGAGTATTG +CGCGTTCGTTGCCAGCCAAACTACCGCGGAGGTGATCTGCGTCGGCGATCGGGACCAGTG +CGGCCCGCACTACGCCAACAACTGCCGCACCCCCGTCCCTGACCGCTGGCCCACCGAGCG +CTCACGCCACACTTGGCGTTTCCCCGATTGCTGGGCGGCTCGTTTGCGCGCGGGGCTCGA +CTACGACGTCGAGGGCGAGCACGCCGGCACTTTTGCCTGCAACCTCTGGGACGGCCGCCA +GGTCGACCTCCACCTCGCCTTCTCGCGCGAGACCGTGCGCCGCCTTCACGAGGCTGGCAT +ACGCGCATACACCGTGCGCGAGGCCCAGGGCATGAGCGTTGGCACCGCCTGCATCCACGT +CGGTCGGGACGGCACGGACGTGGCCCTGGCTCTGACACGCGACCTCGCCATCGTCAGCCT +GACCCGGGCCTCCGACGCCCTCTACCTCCACGAGCTCGAGGACGGCTCACTGCGCGCTGC +GGGGCTCAGCGCATTTCTCGACGCCGGGGCACTGGCGGAGCTCAAGGAGGTTCCCGCCGG +CATCGATCGCGTCGTTGCCGTCGAGCAGGCGCCGCCACCATTACCGCCCGCCGACGGCAT +CCCCGAGGCCCAGGACGTGCCGCCCTTTTGCCCCCGCACCTTGGAGGAGCTCGTCTTCGG +CCGCGCCGGCCACCCCCATTACGCGGACCTCAACCGCGTAACCGAGGGCGAGCGAGAGGT +GCGGTACATGCGCATCTCGCGTCACCTGCTCAACAAGAACCACACTGAAATGCCCGGGAC +GGAACGCGTTCTGAGCGCCGTTTGCGCTGTGCGGCGCTACCGCGCGGGTGAGGACGGGTC +GACCCTGCGCACCGCCGTGGCCCGCCAGCACCCGCGCCCCTTCCGCCAGATTCCACCCCC +GCGCGTCACTGCTGGAGTCGCTCAAGAGTGGCGCCTGACGTACCTGCGGGAGCGAATCGA +CCTCACCGACGTCTATACACAGATGGGCGTGGCCGCGCGAGAGCTCACCGACCGCTACGC +GCGCCGCTACCCCGAGATTTTTGCCGGCATGTGCACCGCGCAGAGCCTGAGCGTCCCCGC +CTTCCTCAAAGCCACCTTGAAGTGCGTGGACGCCGCCCTCGGCCCCAGGGACACCGAAGA +CTGCCATGCCGCCCAGGGGAAAGCCGGCCTCGAGATCCGCGCATGGGCCAAAGAGTGGGT +TCAGGTCATGTCCCCGCACTTCCGCGCCATCCAGAAGATCATTATGCGCGCCTTGCGCCC +GCAATTTCTCGTGGCCGCCGGGCACACGGAGCCCGAGGTTGACGCGTGGTGGCAGGCTCA +CTACACCACCAATGCCATCGAGGTCGATTTCACCGAGTTCGACATGAATCAGACCCTTGC +CACTCGGGACGTCGAGCTCGAGATCAGCGCCGCTCTCCTAGGCCTCCCCTGCGCCGAAGA +CTACCGCGCGCTCCGCGCCGGCAGCTACTGCACCCTGCGCGAACTGGGCATTACTGAGAC +CGGCTGCGAGCGCACAAGCGGCGAGCCCGCCACGCTACTGCACAACACCACCGTGGCCAT +GTGCATGGCTATGCGCATGGTCCCGAAGGGCGTGCGCTGGGCCGGGATTTTCCAGGGTGA +CGACATGGTCATCTTCCTCCCTGAGGGCGCACGCGGCGCGGCACTCAAGTGGACCCCCTC +TGAGGTCGGCTTGTTTGGCTTCCACATCCCGGTGAAGCACGTGAGCACCCCAACCCCCAG +CTTCTGCGGACATGTCGGCACCGCGGCCGGCCTCTTCCACGATGTCATGCACCAGGCAAT +CAAGGTGCTCTGCCGCCGCTTTGACCCCGACGTGCTCGAGGAACAGCAGGTAGCCCTCCT +TGACCGCCTCCGGGGAGTCTACGCGGCCCTGCCCGACACCGTTGCTGCCAACGCCGCGTA +CTACGACTACAGCGCGGAGCGCGTTCTCGCGATCGTGCGCGAACTCACCGCATACGCGCG +GGGGCGCGGCCTTGATCATCCGGCCACCATCGGCGCGCTTGAGGAAATCCAGACCCCCTA +CGCGCGCGCCAACCTCCACGACGCCGACTAGCGCCCGTGCACGTAGGGCCTTTAATCTTA +CTACTCTAACCAGGTCATCACCCACCGTTGTCTCGCCGCATCTGGTGGGTACTTCACTCC +TGCCATTTGGGAGAGCCCCAGGGTGCCCAAATGGCTTCCACTACCCCCATCACTATGGAG +GACCTCCAGAAGGCCCTCGAGGCACAATCCCGCGCCCTCCGCGCGGAGCTCGCCGCCGGC +GCCTTGCAGTCACGCCGGCCGCGGCCACAGCGACAGCGCGACTCCAGCACCTCCGGAGAC +GACTCTGGCCGCGACTCAGGAGGGCCTCGCCGCCGCCGCGGCAACCGGGGCCGCGGCCAG +CGTCAAGACTGGTCCAAGGCCCCCCCCCCCCCCGAGGAGCGGCAGGAGAGCCGCTCCCAG +ACTCCGGCCCCTAAGCCGCCGCGGGCGCCGCCACAGGTGCCCCAACCCCCGCGCATGCAA +ACCGGGCGCGGAGGCACTGCCCCGCGCCCCGAGCTGGGGCCGCCGACTAACCCATTCCAG +GCGGCCGTCGCGCGTGGCCTGCGCCCGCCCCTCCATGACCCCGACACCGAGGCACCCACT +GAGGCCTGTGTGACCTCATGGCTCTGGAGCGAGGGCGAGGGCGCCGTCTTCTACCGCGTC +GATCTGCACTTCACCAACTTGGGCACCCCCCCACTCGACGAGGACGGCCGCTGGGACCCG +GCGCTCATGTACAACCCCTGCGGGCCCGAGCCGCCCGCTCACGTCGTCCGTGCGTACAAC +CAACCTGCCGGCGACGTCAGGGGCATCTGGGGCAAGGGCGAGCGCACCTACGCCGAGCAG +GATTTCCGCGTCGGTGGCACGCGCTGGCACCGACTGTTGCGCATGCCAGTGCGCGGCCTC +GACGGCGACAGCGCCCCGCTCCCCCCCCATACTACCGAGCGTATCGAGACCCGCTCGGCG +CGCCATCCTTGGCGCATCCGCTTCGGTGCCCCTCAGGTCTTCCTCGCCGGGCTCTTGCTC +GCGGCCGTTGCCGTCGGCACCGCGCGCGCCGGGCTTCAGCCCCGCGCTGACATAGCGGCG +CCTCCCGCGCCGCCGCAGGCCCCCCGCGGGCACGGGAAGCACTACGGTCATCACCACCAT +CAATTGCCGTTTCTTGGGCACGACGGCCACCACGGGGGCACCTTGCGCGTCGGTGAGCAC +CACCGAAACGCCAGCGACGTGCTGCCCGGCCACTGGCTCCAAGGCGGGTGGGGTTGTTAC +AACCTGAGCGACTGGCATCAGGGCACTCACGTCTGTCACACTAAGCACATGGACTTCTGG +TGCGTGGAGCACGACCGGCCACCGCCCGCCACCCCGACGCCTCCCACCACCGCGGCGAAC +TCCACGACTGCCGCCACCCCCGCTACTACGCCGGCACCTTGCCACGCCGGCCTCAATGAC +AGCTGCGGCGGCTTCCTGTCCGGGTGCGGGCCGATGCGACTGCGCCATGGCGCGGACACC +CGGTGCGGTCGGTTAATCTGTGGGCTGTCCACCACTGCCCAGTACCCGCCCACCCGATTC +GGCTGCGCCATGCGGTGGGGCCTTCCCCCTTGGGAACTGGTCGTCCTTACGGCCCGTCCC +GAGGACGGCTGGACTTGCCGCGGCGTGCCCGCCCACCCAGGTACCCGCTGCCCAGAACTG +GTGAGCCCCATGGGGCGCGCGACTTGCTCCCCAGCTTCGGCCCTCTGGCTTGCCACAGCC +AATGCGCTGTCCCTTGACCATGCGCTCGCGGCCGTTGTCCTGTTGGTCCCGTGGGTTTTG +ATACTCATGGTATGCCGCCGCGCCTGCCGCCGCCGCGGCGCCGCCGCTGCCCTCACCGCG +GTCGTCCTGCAGGGGTACACCCCCCCCGCCTACGGCGAGGAGGCTTTCACCTACCTCTGC +ACTGCACCGGGGTGTGCCACTCAAACACCCGTCCCTGTGCGCCTCGCCGGCGTCCGCTTT +GAGTCCAAGATTGTGGACGGTGGCTGCTTTGCCCCATGGGACCTCGAGGCCACCGGAGCC +TGCATCTGCGAGATCCCCACCGATGTCTCGTGCGAGGGCCTGGGGGCCTGGGTCCCCGCA +GCCCCGTGCGCGCGCATCTGGAACGGCACGCAGCGCGCATGCACCTTCTGGGCTGTCAAC +GCCTACTCGTCCGGCGGGTACGCGCAGCTGGCGTCCTATTTCAACCCCGGCGGCAGCTAC +TACAAGCAATACCACCCCACCGCGTGCGACGTTGAACCTGCCTTCGGGCACAGCGACGCG +GCCTGCTGGGGCTTCCCCACCGACACCGTGATGAGTGTGTTCGCCCTCGCCAGCTACGTC +CAGCACCCCGACAAGACCGTCAGGGTCAAGTTCCACACAGAAACCAGGACCGTCTGGCAG +CTCTCCGTAGCCGGCGTGTCGTGCAACGTCACGACCGAACATCCGTTCTGTAACACGCCG +CACGGACAACTCGAGGTCCAGGTCCCGCCCGACCCCGGCGACCTGGTTGAGTACATTATG +AATTACACCGGCAATCAACAGTCCCGGTGGGGCCTCGGGAGCCCGAACTGCCACGGCCCC +GACTGGGCCTCCCCGGTTTGCCAGCGCCACTCTCCCGACTGTTCGCGGCTCGTGGGGGCC +ACGCCAGAGCGCCCCCGGCTGCGCCTCGTCGATGCCGACGACCCCCTTCTGCGCACCGCC +CCGGGGCCGGGCGAGGTGTGGGTCACGCCTGTCATAGGCTCTCAGGCGCGCAAGTGCGGA +CTCCACATACGCGCCGGACCGTACGGCCACGCCACCGTCGAAATGCCTGAGTGGATCCAC +GCCCACACTACCAGCGACCCCTGGCACCCGCCCGGCCCCTTGGGACTCAAGTTCAAGACA +GTCCGCCCAGTGGTCCTACCGCGCGCGTTAGCGCCCCCTCGCAACGTGCGCGTAACCGGC +TGCTACCAGTGTGGGACCCCCGCGCTGGTGGAGGGCCTTGCCCCAGGAGGAGGGAACTGC +CATCTCACCATCAACGGCGAGGACGCCGGCGCCTTTCCCCCTGGGAAGTTCGTCACCGCC +GCCCTCCTCAACACCCCCCCGCCCTACCAAGTGAGTTGCGGGGGTGAGAGCGACCGCGCG +AGCGCGCGGGTCATTGACCCCGCCGCGCAGTCGTTCACCGGCGTGGTGTATGGCACACAC +ACCACCGCTGTATCGGAGACCCGGCAGACCTGGGCGGAGTGGGCTGCTGCTCACTGGTGG +CAGCTCACTCTGGGCGTTATTTGCGCCCTCCTGTTTGCTGGCCTACTTGCTTGCTGTGCC +AAATGCTTATACTACTTGCGCGGCGCTATAGCGCCGCGCTAGTGGGCCCCCGCGCGAAAC +CCGCATTAGCCCACTAGATTTCCGCACCCGTTGCTGCATAG diff --git a/nextclade_genome/defaults/genome_dataset/sequences.fasta b/nextclade_genome/defaults/genome_dataset/sequences.fasta new file mode 100644 index 0000000..338ffe8 --- /dev/null +++ b/nextclade_genome/defaults/genome_dataset/sequences.fasta @@ -0,0 +1,1250 @@ +>U01990 +ATGGCCACACTTTTAAGGAGCTTAGCATTGTTCAAAAGAAACAAGGACAAACCACCCATT +ACATCAGGATCCGGTGGAGCCATCAGAGGAATCAAACACATTATTATAGTACCAATCCCT +GGAGATTCCTCAATTACCACTCGATCCAGACTTCTGGACCGGTTGGTCAGGTTAATTGGA +AACCCGGATGTAAGCGGGCCCAAATTAACAGGAGCACTAATAGGTATATTATCCTTATTT +GTGGAGTCTCCAGGTCAATTGATTCAGAGGATCACCGATGACCCTGACGTTAGCATAAGG +CTGTTGGAGGTTGTCCAGAGCGACCAGTCACAATCTGGCCTTACCTTTGCATCAAGAGGT +ACCAACATGGAGGATGAGGCGGACCAATACTTTTTACATGATGATTCAAGTAGTGGTGAT +CAATCCAGGTCTGGATGGTTCGAGAACAAGGAAATCTCAGATATTGAAGTGCAAGACCCT +GAGGGATTCAACATGATTCTGGGTACCATCCTAGCTCAAATTTGGGTCTTGCTCGCAAAG +GCGGTTACGGCCCCAGACACGGCAGCTGATTCGGAGCTAAGAAGGTGGATAAAGTACACC +CAACAAAGAAGGGTAGTTGGTGAATTTAGATTGGAGAGAAAATGGTTGGATGTGGTGAGG +AACAGGATTGCCGAAGACCTCTCCTTACGCCGGTTCATGGTCGCTCTAATCCTGGATATC +AAGAGAACACCCGGGAACAAACCCAGGATTGCTGAAATGATATGTGACATTGATACATAT +ATCGTAGAGGCAGGATTAGCCAGTTTTATCCTTACTATTAAGTTCGGGATAGAAACCATG +TATCCTGCTCTTGGACTGCATGAATTTGCTGGTGAGTTATCCACACTTGAGTCCTTGATG +AATCTTTACCAACAAATGGGGGAAACTGCACCCTACATGGTAATTCTAGAGAACTCAATT +CAGAACAAGTTCAGTGCAGGATCATACCCTCTGCTCTGGAGCTATGCCATGGGAGTAGGA +GTGGAACTTGAAAACTCCATGGGAGGTTTGAACTTTGGCCGATCTTACTTTGATCCAGCA +TATTTTAGATTAGGGCAAGAGATGGTTAGGAGATCAGCTGGAAAGGTCAGTTCCACATTG +GCATCTGAACTTGGTATCACGGCCGAGGATGCGAGGCTTGTTTCAGAGATTGCAATGCAT +ACCACTGAGGACAGGATCAGCAGAGCGGTTGGACCCAGGCAGGCCCAAGTGTCATTTCTA +CACGGTGATCAAAGTGAGAACGAGCTACCGGGATTGGGGGGGAAGGAGGACAGGAGGGTC +AAACAGAGTCGAGGAGAAACCAGGGAGAGCTACAGAGAAACCGGGCCCAGCAGAGCAGGT +GATGCGAGAGCTGCCCACCTTCCAACCGGCACACTCCTAGACATTGACACTGCATCGGAG +TCCAGCCAAGACCCACAGGACAGTCGAAGGTCGGCTGACGCCCTGCTCAGGCTGCAAGCC +ATGGCAGGAATCTCGGAAGAACAAGGCTCAGACATGGACACCCTTAGAGTGTACAATGAC +AGAGATCTTCTAGACTAGGTGCGAGAGGCCGCCGCGG +>PP620722 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGACCAGTAGAGCAGTTGGACCCAGACAAGCT +CAAGTGTCGTTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGCAAG +GAAGATAGGAGGGTCAGGCGGAGTCGGGGAGAAGCCAGGGAGAGTAGTGGAGAAATCGGG +TCCAGCAGATTGAGTGACGCGAGAGCTGCCCATCTTCCGACCAGCACACCCCTGGACATT +GACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAGGCTATGGCAGGAATCCTGGAAGAACAAGGCTCAGACACGGACACCCCC +AGGGTGTACAATGACAGAGATCTCCTAGAC +>X16569 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNAGGGACAAGAGCAGGATTAGGGATATCCGAGATGGCCACACTT +TTAAGGAGCTTAGCATTGTTCAAAAGAAACAAGGACAAACCACCCATTACATCAGGATCC +GGTGGAGCCATCAGAGGAATCAAACACATTATTATAGTACCAATCCCTGGAGATTCCTCA +ATTACCACTCGATCCAGACTTCTGGACCGGTTGGTCAGGTTAATTGGAAACCCGGATGTG +AGCGGGCCCAAACTAACAGGGGCACTAATAGGTATACTATCCCTATTTGTGGAGTCTCCA +GGTCAATTGATTCAAAGGATCACCGATGACCCTGACGTTAGCATAAGGCTGTTAGAGGTT +GTCCAGAGTGACCAGTCACAATCTGGCCTTACCTTCGCATCAAGAGGTACCAACATGGAG +GATGAGGCGGACCAATACTTTTCACATGATGATCCAAGTAGTAGTGATCAATCCAGGTTC +GGATGGTTCGAGAACAAGGAAATCTCAGATATTGAAGTGCAAGACCCTGAGGGATTCAAC +ATGATTCTGGGTACCATCCTAGCCCAAATTTGGGTCTTGCTCGCCAAGGCGGTTACGGCC +CCAGACACGGCAGCTGACTCGGAGCTAAGAAGGTGGATAAAATACACCCAACAAAGAAGG +GTAGTTGGTGAATTTAGATTGGAGAGAAAATGGTTGGATGTGGTGAGGAACAGGATTGCC +GAGGACCTCTCCTTACGCCGATTCATGGTCGCCCTAATCCTGGATATCAAGAGAACACCC +GGGAACAAACCCAGGATTGCTGAAATGATATGTGACATTGATACATATATCGTAGAGGCA +GGATTAGCCAGTTTTATCCTGACTATTAAGTTTGGGATAGAAACTATGTATCCTGCTCTT +GGACTGCATGAATTTGCTGGTGAGTTATCCACACTTGAGTCCTTGATGAATCTTTACCAG +CAAATGGGAGAAACTGCACCCTACATGGTAATCCTGGAGAACTCAATTCAGAACAAGTTC +AGTGCAGGATCATACCCTCTGCTCTGGAGCTATGCCATGGGAGTAGGAGTGGAACTTGAA +AACTCCATGGGAGGTTTGAACTTTGGCCGATCTTACTTTGATCCAGCATACTTTAGATTA +GGGCAAGAGATGGTAAGGAGGTCAGCTGGAAAGGTCAGTTCCACATTGGCATCTGAACTC +GGTATCACTGCCGAGGATGCAAGGCTTGTTTCAGAGATTGCAATGCACACTACTGAGGAC +AGGATCAGCAGAGCGGTTGGACCCAGACAAGCCCAAGTGTCATTTCTACACGGTGATCAA +AGTGAGAATGAGCTCCCAAGATTGGGGGGCAAGGAAGATAGGAGGGCCAAACAGAGTCGA +GGAGAAGCCAGGGAGATCTACAGAGAAACCGGGCCCAGCAGAGCAAGTGATGCGAGAGCT +GCCCATCTTCCAACCGGCACACCCCTAGACATTGACACTGCATCGGAGTCCAGCCAAGAT +CAGCAGGACAGTCAAAGGTCAGCTGACGCCCTACTCAGGCTGCAAGCCATGGCAGGAATC +TTGGAAGAACAAGGCTCAGACACGGACACCCCTAGAGTGTACAATGACAGAGATCTTCTA +GACTAGGTGCGAGAGGCCGAGGACCAGAACAACATCCGCCTACCCTCCATCATTGTTATA +AAAAACTTAGGAACCAGGTCCACACAGCCGCCAGCCCATCAACCATCCACTCCCACGATT +GGAGCCGATGGCAGAAGAGCAGGCACGCCATGTCAAAAACGGACTGGAATGCATCCGGGC +TCTCAAGGCCGAGCCCATCGGCTCACTGGCCATCGAGGAAGCTATGGCAGCATGGTCAGA +AATATCAGACAACCCAGGACAGGAGCGAGCCACCTGCAAGGAAGAGAAGGCAGGCAGTTC +GGGTCTCAGCAAACCATGCCTCTCAGCAATTGGATCAACTGAAGGCGGTGCACCTCGCAT +CCGCGGTCAGGGATCTGGAGAGAGCGATGACGACGCTGAAACTTTGGGAATCCCCTCAAG +AGATCTCCAGGCATCAAGCACTGGGTTACAGTGTTATTATGTTTATGATCACAGCGGTGA +AGCGGTTAAGGGAGTCCAAGATGCTGACTCTATCATGGTTCAATCAGGCCTTGATGGTGA +TAGCACCCTCTCAGGAGGAGACAATGAATCTGAAAACAGCGATGTGGATATTGGCGAACC +TGATACCGAGGGATATGCTATCACTGACCGGGGATCTGCTCCCATCTCTATGGGGTTCAG +GGCTTCTGATGTTGAAACTGCAGAAGGAGGGGAGATCCACGAGCTCCTGAGACTCCAATC +CAGAGGCAACAACTTTCCGAAGCTTGGGAAAACTCTCAATGTTCCTCCGCCTCCGGACCC +CGGTAGGGCCAGCACTTCCGAGACACCCATTAAAAAGGGCACAGACGCGAGATTAGCCTC +ATTTGGAACGGGGATCGCGTCTTTATTGACAGGTGGTGCAACCCAATGTGCTCGAAAGTC +ACCCTCGGAACCATCAGGGCCAGGTGCACCTGCGGGGAATGTCCCCGAGTGTGCGAGCAA +TGCCGCACTGATACAGGAGTGGACACCCGAATCTGGTACCACAATCTCCCCGAGATCCCA +GAATAATGAAAAAGGGGGAGACTATTATGATGATGAGCTGTTCTCTGATGTCCAAGATAT +TAAAACAGCCTTGGCCAAAATACACGAGGATAATCAGAAGATAATCTCCAAGCTAGAATC +ACTGCTGTTATTGAAGGGAGAAGTTGAGTCAATTAAGAAGCAGATCAACAGGCAAAATAT +CAGCATATCCACCCTGGAAGGACACCTATCAAGCATCATGATCGCCATTCCTGGACTTGG +GAAGGATCCCAACGACCTCACTGCAGATGTCGAAATCAATCCCGACTTGAAACCCATCAT +AGGCAGAGATTCAGGTCGAGCACTGGCCGAAGTTCTCAAGAAACCCGCTGCCAGCCGACA +ACTCCAAGGAATGACAAATGGACGGACCAGTTCCAGAGGACAGCTGCTGAAGGAATTTCA +GCTAAAGCCGATCGGGAAAAAGATGAGCTCAGCCGTCGGGTTTGTTCCTGACACCGGCCC +TGCATCACGCAGTGTAATCCGCTCCATCATAAAATCTAGCCGGCTAGAGGAGGATCGGAA +ACGTTACCTGATGACTCTCCTTGATGATATCAAAGGAGCCAACGATCTTGCCAAGTTCCA +CCAGATGCTGATGAAGATAATAATGAAGTAGCTACAGCTCAACTCACCTGCCAACCCCAT +GCCAGTCGACTCAACTAGTACAACCTAAATCCATTATAAAAACTTAGGAGCAAAGTGATT +GCCTCCCAAGTTCCACAACGACAGAGATCCACGACCTCGACAAGTCGGCACGGGACATCA +AAGGGTCGATCGCCCCGACACAACCAACCACCCACAGCGACGGCAGGCTGGTGCCCCAGG +CCAGGGCCACAGATCCCGGTCTGGGCGACAGGAAGGACGAACGCCCCATGCACACGCCTC +CGCCGGGGGCCGCCGAGGACAGCGACCCCCCAGGACCCCCAATCGGGCGAGCACCCGGGT +CCCCGCCCCCAGGTGCCGGCAGACCCACAGCAAAACCCGAAGAACTCCCCAAAGAGGCCA +CCGAGCCCGACACAGTCGCCAGACGCACAGCAGGGCCCAACGAAAAACCGGTGCTCCACA +ACAACACCCCACCAACCCTCCCCACACCCCGGAGAAAGGCCCCAACAACAGGGAGCGTCC +CCAACGCAAACCAAGTCTGCAATGCGGCCAATCTGACACCGCTGGATACCCCGCAGAGGC +TCCGTGCCGTTTATATGAGCATCACCCGTCTTTCGGATAACGGGTACTACACCGTCCCTA +GAAGAATGCTGGAGTTCAGATCGGTCAATGCAGTGGCCCTCAACCTGCTGGCGACCCTCA +GGACCGACAAGGCGATTGGCCCTGGGAAGACCACAGACAACGCAGAGCAACCTCCTGAGG +CAACATTTCTGGTCCACATCGGGAACCCCAGGAGAAAGAAGAGTGAAGTCCACTCTGCCG +ATCATTGCAAAATGAAAATCGAAAAGATGGGCCCGGTTTCCGCACCCGGCGGGATAGGGG +GCACCAGTCTTCACATCAGAAGCACAGGCAAAACGAGCAAGACCCTCCATGCACAACTCG +GGCTCAAGAAGACCCCATGTTACCCACCGATGGACATCAACGAAGACCTCAATCGATCAC +TCTGGAGGAGCAGACGCAAGACAGCAAGAATCCAGGCAGCTCCACAGCCATCAGCCCCCC +AAGAACCCCGCACTCACGACGACGCGATCACAAATGACGACCAAGGACCATTCAAAGCTC +TGCAGACCGCAGTGACCAGCAATGCCCGAAAACGACCCCCCTCACAACGACAACCAGAAG +GCCCGGACAAAAAAGCCCCCTCCGAAGAACTCCACGGACCAAGCGAGAGGTCAGCCAGCA +GCTGACGGTAAGTGCGAACACCAGGCGGCCCCAGCCCAGAACAGTCCCGACACAAGGCCA +CCACCAGCCATCCCAATCTGCATCCTCCTCGTGGGACCCCCGAGGACCAACCCCCAAGGC +CGCCCCCGACCCAAACCACCAACCGCATCCCCACCACCCCCGGGAGAGAAACCCCCAGCA +ACTGGAAGGCCCCTCCCCCCCTCCCCCAACGCAAGAAGTCCACAACCGAACCGCACAAGC +GACCGAAGCGACCCAACTGCAGGCATCCGACTCCCTAGACAGACCCTCTCCCCCCGGCAA +ACCAAACAAAACTTAGGGCCAAGGAACATACACACCCAACAGAACCCAGACCCCGGCCCA +CGGCGCCGCGCCCCCAACCCCCGACAACCAGAGGGAGTCCCCAACCAATCCCGCCGATCC +CCCCGGTGCCCACAGGCAGGCACACCAACCCCCGAACAGACCCAGCACCCAGCCACCGAC +AATCCAAGACGGGGGGGCCCCCCCAAAAAAGAGCCCCCAGGGGCCGACAGCCAGCATCGC +GAGGAAGCCCACCCACCCCACACACGACCACGGCAACCAAACCAGAACCCAGACCACCCT +GGGTCACCAGCTCCCGGACTCGGCAATCACCCCGCAGAAAGGAAAGGTCACAACCCGTGT +ACCCCAGCCCCGGTCCGGCGGGCAGCCACTCAACCCGAACCAGCAACCAAGAGCGGTCCC +CGAAGGACCCCCAAACCGCAAAGGACACCAGCATCCCACAGCCTCCCCAAGTCCCCCGGT +CTCCTCCTCCTCCCGAAGGGACCAGAGGATCAATCCACCACATCCGACGACACTCAACTC +CCCACCTCCAAAGGAGACACCGGGAATCCCAGAATCAAGACTCATCTAATGTCCATCATG +GGTCTCAAGGCGAATGTCTCTGCCATACTCATGGCAGTACTGTTAACTCTCCAAACACCC +ACCGGTCAAATCCATTGGGGCAACCTCTCTAAGATCGGGGTGGTAGGAATAGGAAGTGCA +AGCTACAAAGTTATGACTCGTTCCAGCCATCAATCATTAGTCATAAAATTAATGCCCAAT +ATAACTCTCCTCAAGAACTGCACGAGGGCAGAGATCGCAGAATACAGGAGACTACTGAGA +ACAGTTTTGGAACCAATTAGAGATACACTTAATGCAATGACTCAGAATATAAGACCGGTT +CAGAGTGTAGCTTCAAGTAGGAGACACAAGAGATTTGCGGGAGTAGTCCTGGCAGGTGCA +GCCCTAGTCGTTGCCACAGCTGCTCAGATAACAGCCGGCATTGCACTTCACCAGTCCATG +CTAAACTCTCAAGCCATCGACAATTTGAGAGCGAGCCTGGAAACTACTAATCAGGCAATT +GAGGCAATCAGACAAGCAGGGCAGGAGATGATATTGGCTGTTCAGGGTGTCCAAGACTAC +ATCGATAATGAGCTGATACCGTCTATGAACCAACTATCTTGTGATTTAATCGGCCAGAAG +CTCGGGCTCAAATTGCTCAGATACTATACAGAAATCCTGTCATTATTTGGCCCCAGCTTA +CGGGACCCTATATCTGCGGAGATATCTATCCAGGCTCTGAGCTATGCGCTTGGAGGAGAT +ATCAATAAGGTGTTAGAAAAGCTCGGATACAGTGGAGGTGATTTACTGGGCATCTTAGAG +AGCAGAGGAATAAAGGCCCGGATAACTCACGTTGACACAGAGTCCTACTTCATTGTCCTC +AGTATAGCCTATCCGACGCTGTCCGAGATTAAGGGGGTGATTGTCCACCGGCTAGAGGGG +GTCTCGTACAACATAGGCTCTCAAGAGTGGTATACCACCGTGCCCAAGTATGTTGCAACC +CAAGGGTACCTTATCTCGAATTTTGATGAGTCATCGTGTACTTTCATGCCAGAGGGAACT +GTGTGCAGCCAAAATGCCTTGTACCCGATGAGTCCTCTGCTTCAAGAATGCCTCCGGGGG +TCCACCAAGTCCTGTGCTCGTACACTCGTATCCGGGTCTTTTGGGAACCGGTTCATTTTA +TCACAAGGGAACCTAATAGCCAATTGTGCATCAATCCTTTGCAAGTGCTACACAACAGGA +ACGATCATTAATCAAGACCCTGACAAGATCCTAACATACATTGCTGCCGACCACTGCCCG +GTAGTCGAGGTGAACGGCGTGACCATCCAAGTCGGGAGCAGGAGGTATCCAGACGCTGTG +TACTTGCACAAAATTGACCTCGGTCCTCCCATATCACTGGAGAGGTTGGACGTAGGGACA +AATCTGGGGAATGCAATTGCTAAGTTGGAGGATGCCAAGGAATTGTTGGAGTCATCGGAC +CAGATATTGAGGAGTATGAAAGGTTTATCGAGCACTAGCATAGTCTACATCCTGATTGCA +GTGTGTCTTGGAGGGTTGATAGGGATCCCCGCTTTAATATGTTGCTGCAGGGGGCGTTGT +AACAAAAAGGGAGAACAAGTTGGTATGTCAAGACCAGGCCTAAAGCCTGATCTTACAGGA +ACATCAAAATCCTATGTAAGATCGCTCTGATCCTCTACAACTCTTGAAACACAAATGTCC +CACAAGTCTCCTCTTCGTCATCAAGCAACCACCGCATCCGGCATCAAGCCCACCTGAAAT +TATCTCCGGCTTCCCTCTGGCCGAACGATATCGGTAGTTAATTAAAACTTAGGGTGCAAG +ATCATCCACAATGTCACCACAACGAGACCGGATAAATGCCTTCTATAAAGATAACCCCCA +TCCCAAAGGAAGTAGGATAGTTATTAACAGAGAACATCTTATGATTGATAGACCTTATGT +TTTGCTGGCTGTCCTGTTCGTCATGTTTCTGAGCTTGATCGGGTTGCTAGCCATTGCAGG +CATTAGACTTCATCGGGCAGCCATCTACACCGCAGAGATCCATAAAAGCCTCAGCACCAA +TCTAGATGTAACTAACTCAATCGAGCATCAGGTCAAAGACGTGCTGACACCACTCTTCAA +AATCATCGGTGATGAAGTGGGCCTGAGGACACCTCAGAGGTTCACTGACTTAGTGAAATT +CATCTCTGACAAGATTAAATTCCTTAATCCGGATAGGGAGTACGACTTCAGAGATCTCAA +TTGGTGTATCAACCCGCCAGAGAGAATCAAATTGGATTATGATCAATACTGTGCAGATGT +GGCTGCTGAAGAGCTCATGAATGCATTGGTGAACTCAACTCTCCTGGAGACCAGAACAAC +CAATCAGTTCCTAGCTGTCTCAAAAGGAAATTGCTCAGGGCCCACTACAATCAGAGGTCA +ATTCTCGAACATGTCGCTGTCCCTGTTGGACTTGTATTTAGGTCGAGGTTACAATGTGTC +ATCTATAGTAACTATGACATCCCAGGGAATGCATGGGGGAACTTACCTAGTGGAAAAACC +TAATCTGAGCAGCAAAGGGTCAGAGTTGTCACGACTGAGCATGTACCGAGTGTTTGAAGT +AGGTGTTATCAGAAATCCGGGTTTGGGGGCTCCGGTGTTCCATATGACAAACTATTTTGA +GCAACCAGTCAGTAATGATCTCAGCAACTGCATGGTGGCTTTGGGGGAGCTCAAACTCGC +AGCCCTTTGTCACGGGGAAGATTCTATCACAATTCCCTATCAGGGATCAGGGAAAGGTGT +CAGCATCCAGCTCATCAAGCTAGGTGTCTGGAAATCCCCAACAGACATGCAATCCTGGGT +CCCCTTATCAACGGATGATCCAGTGATAGACAGGCTTTACCTCTCATCTCACAGAGGTGT +TATCGCTGACAATCAAGCAAGATGGGCTGTCCCGACAACACGAACAGATGACAAGTTGCG +AATGGAGACATGCTTCCAGCAGGCGTGTAAGGGTAAAATCCAAGCACTCTGCGAGAATCC +CGAGTGGGCACCATTGAAGGATGGCAGGATTCCCTCATACGGGGTCTTGTCTGTTGATCT +GAGTCTGACAGTTGAGCTTAAAATCAAAATTGCTTCGGGATTCGGGCCATTGATCACACA +CGGTTCAGGGATGGACCTATACAAATCCAACCACAACAATGTGTATTGGCTGACTATCCC +GCCAATGAAGAACCTAGCCTTAGGTGTAATCAACACATTGGAGTGGATACCGAGATTCAA +GGTTAATCCCAACCTCTTCACTGTCCCAATTAAGGAAGCAGGCGGAGACTGCCATGCCCC +AACATACCTACCTGCGGAGGTGGACGGTGATGTCAAACTCAGTTCCAATCTGGTGATTCT +ACCTGGTCAAGATCTCCAATATGTCTTGGCAACCTACGATACTTCCAGGGTTGAACATGC +TGTGGTTTATTACGTTTACAGCCCAAGCCGCTCATTTTCTTACTTTTATCCTTTTAGGTT +GCCTATAAAGGGGGTCCCCATCGAATTACAAGTGGAATGCTTCACATGGGACCAAAAACT +CTGGTGCCGCCACTTCTGTGTGCTTGCGGACTCAGAGTCTGGTGGACATATCACTCACTC +TGGGATGGTAGGCATGGGAGTCAGCTGTACAGTCACCCGGGAAGACGGAGCCAATCGCAG +ATAGGGCTGCCAGTGAGTCAATCACATGATGTCACCCAGACATCAGGCATACCCACTAGT +GTGAAATAGACATCAGAATTAAGAAAAA +>PP319667 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGATCAGCAGAGCAGTTGGACCCAGACAAGCC +CAAGTGTCATTTCTACACGGTGATCAAAGTGAGAGTGAGCTGCCAAGATTGGGGGTTAAG +GAGGACAGGAGGGTCAAACAGAACCGAGGAGAAGCCGGGGAGAGCCACAGAGAAACCGAG +TCTAGCAGAGCAAGTGATGTGAGAGCTGCCCATCCTCCAACCGGCACACCCCTAGACATT +GACACTGCATCGGAGTTCAGCCAAGATCCGCAGGACAGTCGAAGGTCAGCCGATGCCCTG +CTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGACTCAGACACGGACACCCCT +AGAGTGTACAATGACAGAGACCTTCTAGAC +>PP229488 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATACATACCACTGAGGACAGGACCAGTAGAGCAGTTGGACCCAGACAAGCT +CAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGCAAG +GAAGATAGGAGGGTCAGACAGAGTCGGGGAGAAGCCAGGGAGAGCAACAGAGAAACCGGG +TCCAGCAGATTAAGTGATGCGAGAGCTGCCCATCTTCCAACCAGCACACCCCTAGACACT +GACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAAGCCATGGCAGGAATCCTGGAAGAACAGAGTTCAGACACGGACACCCCC +AGGGTGTACAATGACAGAGATCTTCTAGAC +>OR027166 +GTCAGTTCCACATTGGCATCTGAACTCGGGATCACTGCTGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGATCAGCAGAACAGTTGGACCCAGACAAGCC +CAAGTGTCATTTCTACACGGTGATCAAAGTGAGAGTGAGCTGCCGAGATTGGGGGGTAAG +GAGGACAGGAGGGTCAAACAGAACCGAGGAGAAGCCGGGGAGAGCCACAGAGAAACCGAG +CCTAGCAGAGCAAGTGATGTGAGAGCTGCCCATCCTCCAACCGGCACACCCCTAGACATT +GACACTGCATCGGAGTTCAGCCAAGATCCGCAGGAAAGTCGAAGGTCAGCCGATGCCCTG +CTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGACTCAGACACGGACACCCCT +AGAGTGTACAATGACAGAGACCTTCTAGAC +>OR027164 +GTCAGTTCCACATTGGCATCTGAACTCGGCATCACTGCTGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGATCAGCAGAACAGTTGGACCCAGACAAGCC +CAAGTGTCATTTCTACACGGTGATCAAAGTGAGAGTGAGCTGCCGAGATTGGGGGGTAAG +GAGGACAGGAGGGTCAAACAGAACCGAGGAGAAGCCGGGGAGAGCCACAGAGAAACCGAG +CCTAGCAGAGCAAGTGATGTGAGAGCTGCCCATCCTCCAACCGGCACACCCCTAGACATT +GACACTGCATCGGAGTTCAGCCAAGATCCGCAGGAAAGTCGAAGGTCAGCCGATGCCCTG +CTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGACTCAGACACGGACACCCCT +AGAGTGTACAATGACAGAGACCTTCTAGAC +>OK424848 +ACCAAACAAAGTTGGGTAAGGATAGATCAATCAATGATCATATTCTAGTGCACTTAGGAT +TCAAGATCCTATTAACAGGGACAAGAGCAGGATTAGGGATATCCGAGATGGCCACACTTT +TAAGGAGCTTAGCATTGTTCAAAAGAAACAAGGACAAACCACCCATTACATCAGGATCCG +GTGGAGCCATCAGAGGAATCAAACACATTATTATAGTACCAATCCCTGGAGATTCCTCAA +TTACCACTCGATCCAGACTACTGGACCGGTTGGTCAGGCTAATTGGAAACCCGGATGTGA +GCGGGCCCAAACTAACAGGGGCACTAATAGGTATATTATCCTTATTTGTGGAGTCTCCAG +GTCAATTGATTCAGAGGATCACCGATGACCCTGACGTTAGCATCAGGCTGTTAGAGGTTG +TCCAGAGTGACCAGTCACAATCTGGCCTTACCTTCGCATCAAGAGGTACCAACATGGAGG +ATGAGGCGGACCAATACTTTTCACATGATGATCCAAGTAGTGGTGATCAATCCAGGTCCG +GATGGTTCGAGAACAAGGAAATCTCAGATATTGAAGTGCAAGACCCTGAGGGATTCAACA +TGATTCTGGGCACCATTCTAGCCCAAATTTGGGTCTTGCTCGCAAAGGCGGTTACGGCCC +CAGACACGGCAGCTGATTCGGAGCTAAGAAGGTGGATAAAGTACACCCAACAAAGAAGGG +TAGTTGGTGAATTTAGATTGGAGAGAAAATGGTTGGATGTGGTGAGGAACAGGATTGCCG +AGGACCTCTCCTTACGCCGATTCATGGTCGCTCTAATCCTGGATATCAAGAGGACACCCG +GGAACAAACCTAGGATTGCTGAAATGATATGTGACATTGATACATATATCGTAGAGGCAG +GACTAGCCAGTTTCATCTTGACCATTAAGTTTGGGATAGAAACCATGTATCCTGCTCTTG +GACTGCATGAATTTGCTGGAGAGTTATCCACACTTGAGTCCTTGATGAATCTTTACCAGC +AAATGGGAGAAACTGCACCCTACATGGTAATCCTGGAGAACTCAATTCAGAACAAGTTCA +GTGCAGGATCATACCCTCTACTCTGGAGCTATGCCATGGGAGTAGGAGTGGAACTTGAAA +ACTCCATGGGAGGTTTGAACTTTGGTCGATCTTACTTTGATCCAGCATATTTTAGATTAG +GGCAAGAGATGGTGAGGAGGTCAGCTGGAAAGGTCAGTTCCACATTGGCATCTGAACTCG +GTATCACTGCCGAGGATGCAAGGCTTGTTTCAGAGATTGCAATGCATACTACTGAGGACA +GGATCAGTAGAGCGGTTGGACCCAGACAAGCCCAAGTGTCATTTATACACGGTGATCAAA +GTGAAAATGAGCTACCAGGATTGGGGGGCAAGGAAGATAGGAGGGTCAAACAGGGTCGGG +GGGAAGCCAGGGAGAGCTACAGAGAAACCGGATCCAGTAGAGCAAGTGATGTGAGAGCTG +CCCATCTTCCAATCAGCACTCCCCTAGACGTTGACACTGCATCAGAGTCAGGCCAAGATC +CGCAGGACAGTCGAAGGTCAGCTGACGCCCTGCTCAGGTTGCAGGCCATGGCAGGAATCT +TGGAAGAACAAGGCTCAGATACAGACATCTCTCGGGTGTACAATGACAAAGATCTTCTAG +ACTAGGTGCGAGAGGCCGAGGACCAGAAGAACATCCGCCTACCCTCCATCATTGTTATAA +AAAACTTAGGAACCAGGTCCACACAGCCGCCAGCCAACCAACCATCCACTCCCACGACTG +GGGCCGATGGCAGAAGAGCAGGCACGCCATGTCAAAAACGGACTGGAATGCATCCGGGCT +CTCAAGGCCGAGCCCATCGGCTCACTGGCCGTCAAGGAAGCCTTGGCAGCATGGTCAGAA +ATATCAGACAACCCAGGACAGGACCGAGCCACCTGCAAGGAAGAGAAGGCAGGCAGTTCG +GGTCTCAGCAAACCATGCCTCTCAGCAATTGGATCAACTGAAGGCGGTGCACCTCGCATC +CGCGGTCAGGGATCTAGAGAGAGCGATGACGACGCTGAAACTGTGGGAATCCCCTCAAGA +AATCTCCAGGCATCAAGCACTGGGTTACAGTGTTATCATGTTTATGATCACAGCGGTGAA +GCGGTTAAGGGAATCCAAGATGCTGACTCTATCGTGGTTCAATCAGGCCTTGATGGTGAT +AGCACCCTCTCAGGAGGAGACGATGAATCTGAAAACAGCGATGTGGATATTGGCGAACCT +GATTCCGAGGGATATGCTATCACTGACCGGGGATCTACTCCCATCTCTATGGGGTTCAGG +GCTTCTGATGTTGAAACTGCAGAAGGAGGGGAGATCCACGAGCTCCTGAGACTCCAATCC +AGAGGCAACAACTTTCCGAAGCTTGGGAAAACTCTCAATGTTCCTCCGCCCCCGCAGCCC +GGTAGGGCCAGCACTTCCGAGACACCCATTAAAAAGGGCACAGACGCGAGATTAGCCTCA +TTTGGAACGGAGATCGCGTCTTTATTGACAGGTGGTGCAACCCAATGTGCTCGAAAGTCA +CCCTCGGAACCATCAGAGCCAGGTGCACCTGCGGGGAATGTCCCCGAGTGTGTGAGCAAT +GCCGCACTGATACAGGAGTGGACACCCGAATCTGGTACCACAATCTCCCCGAGATCCCAG +AATAATGAAGAAGGGGGAGACTATTATGATGATGAGCTGTTCTCCGATGTCCAAGACATC +AAAACAGCCTTGGCCAAAATACACGAGGATAATCAGAAGATAATCTCCAAGTTAGAATCA +CTGCTGTTATTGAAGGGAGAAGTTGAGTCAATTAAGAAGCAGATCAACAAGCAAAATATC +AGCATCTCCACCCTGGAAGGGCACCTCTCAAGCATCATGATCGCCATTCCTGGACTTGGG +AAGGATCCCAACGACCCCACTGCAGATGTCGAACTCAATCCCGACTTGAAACCCATCATA +GGCAGAGATTCAGGCCGAGCATTGGCCGAAGTTCTCAAGAAACCTGTTGCCAGCCGACAA +CTCCAAGGAATGACAAATGGACGGACCAGTTCCAGAGGACAGCTGCTGAAGGAATTTCAA +CTAAAGCCGATCGGGAAAAAGATGAGCTCAGCCGTCGGGTTTGTTCCTGACACCGGCCCT +GCATCACGCAGTGTAATCCGCTCCATTATAAAATCCAGCCGGCTAGAGGAGGATCGGAAG +CGTTACCTGATGACTCTCCTTGATGATATCAAAGGAGCCAACGATCTTGCCAAGTTCCAC +CAGATGCTGATGAAGATAATAATGAAGTAGCTACAGCTCAACTTACCTGCCAGCCTCACG +CCAGTCAACCCAATCAATACAGCCTAAATCCATTATAAAAAACTTAGGAGCAAAGTGATT +GCCTCCCAAGTTCCACAATGACAGAGATCTACGACTTCGACAAGTCGGCATGGGACATCA +AAGGGTCGATCGCTCCGATACAACCTACCACCTACAGTGATGGCAGGCTGGTGCCCCAGG +TCAGAGTCATAGATCCTGGTCTAGGTGACAGGAAGGATGAATGCTTTATGTACATGTTTC +TGCTGGGGGTTGTTGAGGACAGCGATCCCCTAGGGCCTCCAATCGGGCGAGCATTTGGGT +CCCTGCCCTTAGGAGTTGGTAGATCCACAGCAAAACCCGAAGAACTCCTCAAAGAAGCCA +CTGAGCTTGACATAGTTGTTAGACGTACAGCAGGGCTCAATGAAAAACTGGTGTTCTACA +ACAACACCCCACTAACTCTCCTCACACCTTGGAGAAAGGTCCTAACAACAGGAAGTGTCT +TCAACGCAAACCAAGTGTGCAATGCGGTTAATCTGATACCGCTGGATACCCCGCAGAGGT +TCCGTGTTGTTTATATGAGCATCACCCGTCTTTCGGATAACGGGTATTACACCGTTCCTA +GAAGAATGCTGGAATTCAGATCAGTCAATGCAGTAGCCTTCAACCTGCTGGTGACCCTTA +GGATTGACAAGGCGATTGGCCCCGGGAAGATCATCGACAATGCAGAGCAACTTCCTGAGG +CAACATTTATGGTCCACATCGGAAACTTCAGGAGAAAGAAGAGTGAAGTCTACTCTGCTG +ATTACTGCAAAATGAAAATCGAAAAGATGGGCCTGGTTTTTGCACTTGGTGGGATAGGGG +GCACCAGTCTTCACATTAGAAGCACAGGCAAAATGAGCAAGACTCTCCATGCACAACTCG +GGTTCAAGAAGACCTTATGTTACCCACTGATGGATATCAATGAAGACCTTAATCGATTAC +TCTGGAGGAGCAGATGCAAGATAGTAAGAATCCAGGCAGTTTTGCAGCCATCAGTCCCTC +AAGAATTCCGCATTTACGACGACGTGATCATAAATGATGACCAAGGACTATTCAAAGTTC +TGTAAACCGCAGTGCCCAGCAATCTCCAAAAACGACCCCTCTTATCATGACAGCAAGAAG +GCCCGGACAAAAAGGCCCCCCCCAAAAGACTCCACGGACCAGGCGAGAGGCCAGCCAGCA +GCCGATAGAAAGCGCGAACACCAGGCGGCCCAGGTGCAGAACAGCCCCAACACAAGGCCA +CCACCAGCTATCCCAATCTGCGTCATCCCCGTGGGACCCCCGAGGATCAACCCCCAAGGT +CGCCCCCGACCCAGACCACCAACCGCACCCCCACAGCCCCCAGGGAAGAAGCCCCCAACA +ACCGGAAGGCCTCCTCCCCCTTTCCCCCCCCCCCAACGCAAGAACTCCACAACCGAACCG +CACAAGCGACCGAGGTGACCCAACCGCAGGCATCCGACTCCCCAGATAGATCCTCTCCCT +CCGGCAAACTAAACAAAACTTAGGGCCAAGGAACATACACACCCGACAAAGCCCGGACCC +CGGCCCACGGCGCCGCGCCCCCATCCCCCGACAACCAGAGGGAGCCCCCAACCAATCCCG +CCGGCTCCCCCGGTGCCCACAGGTAGGCACACCAACCCCCAGACAGACCCAGCACCCAGC +CATCGACAATCCAAGACGGGGGGCCCCCCCAAAAAAGGCCCCCCAGGGGCCGACAGCCAA +CACCGCGGGGAAGCCCACCCACCCCACACACGACCACGGCAACCGAACCAGAACCCAGAC +CACCCTGGGCCACCGGCTCACAGGCTCGGCAATCACCCCGCAGAAAGGAAAGGCCACAAC +CCGCGCACTCCAGCCCCGATCCGGCGGGCGGCCACCCAACCCGAGCCAGCACCCAAGAGC +GACCCCTGAGGGACCCCCGAATCGCAAAGGACATCAGTATCCCATAGCCTCTCCAAGTCC +CCCGGTCTCCCCCACCTCTCGAAGGGACCAAAAGATCAATCTACCACATCCGACGACACT +CAATTCCCCACCCCCAAAGGAGACACCGGGAACCCCAGAATCAAGACTCATCCAGTGTCC +ATCATGGGTCTCAAGGTGAACGTCTCTGCCATATTCATGGCAGTACTGTTAACTCTCCAA +ACACCCACCGGTCAAATCCATTGGGGCAATCTCTCTAAGATAGGGGTGGTAGGGGTAGGA +AGTGCAAGCTACAAAGTTATGACTCGTTCCAGCCATCAATCATTAGTCATAAAATTAATG +CCCAATATAACTCTCCTCAATAACTGCACAAGGGTAGAGATTGCAGAATACAGGAGACTA +CTGAGGACAGTTTTGGAACCAATTAGAGATGCACTTAATGCAATGACCCAGAATATAAGA +CCGGTTCAGAGTGTAGCTTCAAGTAGGAGACACAAGAGATTTGCGGGAGTTGTCCTGGCA +GGTGCGGCCCTAGGTGTTGCCACAGCTGCTCAGATAACAGCCGGCATTGCACTTCACCAG +TCCATGCTGAACTCTCAAGCCATCGACAATCTGAGAGCGAGCCTGGAAACTACTAATCAG +GCAATTGAGGCAATCAGACAAGCAGGGCAGGAGATGATATTGGCTGTTCAAGGTGTCCAA +GACTACATCAATAATGAGCTGATACCGTCTATGAACCAACTATCTTGTGATTTAATCGGC +CAGAAGCTAGGGCTCAAATTGCTCAGATACTATACAGAAATCCTGTCATTGTTTGGCCCA +AGCTTACGGGACCCCATATCTGCGGAGATATCTATCCAGGCTTTGAGCTATGCGCTTGGA +GGAGATATCAATAAAGTGTTAGAAAAGCTCGGGTATAGTGGAGGTGATTTACTGGGCATC +TTAGAGAGCAGAGGAATAAAGGCCCGGATAACTCACGTCGACACAGAGTCCTACTTCATT +GTACTCAGTATAGCCTATCCGACACTGTCCGAGATTAAGGGGGTGATCGTCCACCGGCTA +GAGGGGGTCTCGTACAACATAGGATCTCAAGAGTGGTATACCACTGTGCCCAAGTATGTC +GCAACCCAAGGATACCTTATTTCGAATTTTGATGAGTCATCGTGTACTTTCATGCCAGAG +GGGACTGTGTGCAGCCAAAATGCCTTGTACCCGATGAGTCCCCTGCTCCAAGAATGCCTC +CGGGGGTCCACCAAGTCTTGTGCTCGTACACTCGTATCTGGGTCTTTTGGGAACCGGTTC +ATTTTATCACAAGGGAACCTAATAGCCAATTGTGCATCAATCCTTTGCAAGTGTTACACA +ACAGGAACGATCATTAATCAAGACCCTGACAAGATCCTGACATACATTGCTGCCGATCAC +TGCCCGGTGGTCGAGGTGAACGGCGTGACCATCCAAGTCGGGAGCAGGAGGTATCCGGAC +GCTGTGTACTTGCACAGAATTGACCTCGGTCCTCCCATATCATTGGAGAGGTTGGACGTG +GGGACAAATCTGGGGAATGCAATTGCTAAGTTGGAGGATGCCAAGGAATTGTTGGAGTCA +TCGGACCAGATATTGAGGAGTATGAAAGGTTTATCGAGCACCAGCATAGTTTACATCCTG +ATTGCAGTGTGTCTTGGAGGGTTGATAGGGATCCCCGCTTTAATATGTTGCTGCAGGGGG +CGTTGTAACAAAAAGGGAGAACAAGTTGGTATGTCAAGACCAGGCCTAAAGCCTGATCTT +ACAGGAACATCAAAATCCTATGTAAGGTCGCTTTGATCCTCTACAACTCTTGAAACACAA +ATGTCCCACAAGTCTCCTCTTCGTCATCAAGCAACCACCGCATCCAGCATCAAGCCCACC +TGAAATTGTCTCCGGCTTCCCTCTGACCGAGCAATATCGGTAGTTAATTAAAACTTAGGG +TGCAAGATCATCCACAATGTCACCGCAACGAGACCGGATAAATGCCTTCTACAAAGACAA +CCCCCATCCTAAGGGAAGTAGGATAGTTATTAACAGAGAACATCTTATGATTGATAGACC +TTATGTTTTGCTGGCTGTTCTATTCGTCATGTTTCTGAGCTTGATCGGGCTGCTAGCCAT +TGCAGGCATTAGATTGCATCGGGCAGCCATCTACACCGCGGAGATCCATAAAAGCCTCAG +CACCAATCTAGATGTAACTAACTCAATCGAGCATCAGGTCAAGGACGTGCTGACACCACT +CTTCAAGATCATCGGTGATGAAGTGGGCCTGAGGACACCTCAGAGATTCACTGACCTAGT +GAAATTCATCTCTGACAAGATTAAATTCCTTAATCCGGATAGGGAGTACGACTTCAGAGA +TCTCACTTGGTGTATTAACCCGCCAGAGAGAATCAAATTGGATTATGACCAATACTGTGC +AGATGTGGCTGCTGAAGAACTCATGAATGCATTGGTGAACTCAACTCTACTGGAGGCTAG +GGCAACCAATCAGTTCCTAGCTGTTTCAAAGGGAAACTGCTCAGGGCCCACTACAATCAG +AGGTCAATTCTCAAACATGTCGCTGTCCCTGTTGGACTTGTATTTAAGTCAAGGTTACAA +TGTGTCATCTATAGTCACTATGACATCCCAGGGAATGTACGGGGGAACTTACCTAGTGGA +AAAGCCTAATCTGAACAGCAAGGGGTCAGAGTTGTCACAACCGAGCATGCACCGAGTGTT +TGAGGTAGGTGTTATCAGAAATCCGGGCTTGGGGGCTCCGGTGTTCCATATGACAAATTA +TTTTGAGCAACCAGTCAGTAATGATTTCAGCAACTGCATGGTGGCTTTAGGGGAGCTCAA +ATTCGCAGCCCTTTGTCACAGGGGGGATTCTATCACAATTCCCTATCAAGGATCAGGGAA +AGCTGTCAGCTTCCAGCTCGTCAAGCTAGGTGTCTGGAAATCCCCAACCGACATGCAATC +CTGGGTCCCCCTATCAACAGATGATCCAGTGATAGACAGGCTTTACCTCTCATCTCACAG +AGGCGTTATCGATGACAATCAAGCAAAATGGGCTGTCCCGACAACACGGACAGATGACAA +GTTGCGAATGGAGACATGCTTCCAGCAGGCGTGTAAGGGTAAAATCCAAGCACTCTGCGA +GAATCCCGAGTGGGCACCATTGAAGGATAACAGGATTCCTTCATACGGGATCTTGTCTGT +CAATCTGAGTCTGACAGTTGAGCTTAAAATCAAAATTGCGTCAGGATTCGGGCCATTGAT +CACACACGGCTCAGGGATGGACCTGTACAAGTCCAACCACAACAATGTGTATTGGCTGAC +TATCCCGCCAATGAAGAACCTAGCCTTAGGTGTAATCAACACATTGGAGTGGGTACCGAG +ATTCAAGGTTAGTCCCGACCTCTTCACTGTTCCAATCAAGGAGGCAGGCGAGGACTGCCA +TGCCCCAACATACCTACCTGCGGAGGTGGACGGTGATGTCAAACTCAGTTCCAATCTGGT +GATTCTACCTGGTCAAGACCTTCAATACGTTTTGGCAACCTACGATACTTCCAGAGTTGA +ACATGCTGTGGTTTATTATGTTTACAGCCCAAGCCGCTCATTTTCTTACTTTTATCCTTT +TAGGTTGCCTATAAAGGGGGTCCCCATCGAATTACAAGTGGAATGCTTCACATGGGACCA +AAAACTCTGGTGCCGTCACTTCTGTGTGCTTGCGGACTCAGAGTCTGGTGGACATATCAC +TCACTCTGGGATGGTGGGCATGGGAGTCAGCTGCACAGTCACTCGGGAAGATGGAATAAA +CCGCAGATAGGGCTGCCGGTGAGCCGATCACATGATGTCACCCAGACATCAGGCATACCC +ACTAGTGTGAAATAGACATCAGAATTAAGAAAAACATAGGGTCCAAGTGGTTCCCCGTTA +TGGACTCGCTATCCGTCAACCAGATCCTATACCCCGAAGTTCACCTAGATAGCCCGATAG +TTACCAATAAGATAGTAGCTATCCTAGAGTATGCTCGAGTCCCTCACGCTTACAGCCTGG +AGGACCCTACACTGTGTCAGAACATCAAGCACCGCCTAAAAAACGGATTTTCCAACCAAA +TGATTATAAACAATGTGGAAGTTGGGAATGTCATTAAGTCCAAGCTTAGGAGTTATCCCG +CCCACACTCATATTCCATATCCAAATTGTAATCAGGATTTATTTAACATAGAAGACAAAG +AGTCAACAAGGAAGATCCGTGAGCTCCTCAAAAAGGGAAATTTGCTGTACTCCAAAGTCA +GTGATAAGGTTTTCCAATGCCTGAGGGACACTAACTCACGGCTTGGCCTAGGCTCCGAAT +TGAGGGAGGACATCAAGGAGAAAATTATTAACTTGGGAGTTTACATGCACAGCTCCCAAT +GGTTTGAGCCCTTTCTGTTTTGGTTTACAGTCAAGACTGAGATGAGGTCAGTGATTAAAT +CACAAACCCATACTTGCCATAGGAGGAGACACACACCTGCATTCTTCACTGGCAGTTCAG +TTGAGTTGCTAATCTCTCGTGACCTTGTGGCCATAATCAGTAAAGAGTCTCAACATGTAT +ATTACCTGACGTTTGAACTGGTTTTGATGTATTGTGATGTCATAGAGGGGAGGTTAATGA +CAGAGACCGCCATGACCATTGATGCCAGGTATACAGAACTTCTAGGAAGAGTCAGATACA +TGTGGAAACTGATAGATGGTTTCTTCCCTGCACTCGGGAATCCAACTTATCAGATTGTAG +CCATGCTGGAGCCACTTTCACTTGCTTACCTGCAGCTGAGGGATATAACAATAGAACTTA +GAGGTGCTTTCCTTAACCACTGCTTTACTGAGATACATGATGTTCTTGACCAAAACGGGT +TTTCTGATGAAGGTACTTATCATGAGTTAATTGAAGCCCTAGATTACATTTTCATAACTG +ATGACATACATCTGACAGGGGAGATTTTCTCATTTTTCAGAAGTTTCGGCCACCCCAGAC +TTGAAGCAGTAACGGCTGCTGAAAATGTTAGGAAATACATGAATCAGCCTAAAGTCATTG +TGTATGAGACTCTGATGAAAAGTCATGCCATATTTTGTGGAATCATAATCAACGGCTACC +GTGACAGGCACGGAGGCAGTTGGCCACCCCTGACCCTCCCCCTGCATGCTGCAGACACAA +TCCGGAATGCTCAAGCTTCAGGTGAAGGGTTAACACATGAGCAGTGCGTTGATAACTGGA +AGTCTTTTGCTGGAGTGAGATTTGGCTGCTTTATGCCTCTTAGCCTGGACAGTGATCTGA +CAATGTACCTAAAGGACAAGGCACTTGCTGCTCTCCAAAGGGAGTGGGATTCAGTTTACC +CGAAAGAGTTCCTGCGTTACGACCCTCCCAAGGGAACCGGGTCACGGAGGCTTGTAGATG +TTTTCCTTAATGATTCGAGCTTTGACCCATATGATATGATAATGTATGTTGTAAGTGGAG +ATTACCTCCGTGACCCTGAGTTCAACCTGTCTTACAGCCTGAAAGAAAAGGAGATCAAGG +AAACAGGTAGACTTTTTGCTAAAATGACTTACAAAATGAGGGCATGCCAAGTGATTGCTG +AAAATCTAATCTCAAACGGGATTGGCAAATATTTCAAGGACAATGGGATGGCCAAGGACG +AGCACGATTTGACTAAGGCACTCCACACTCTAGCTGTCTCAGGAGTCCCCAAAGATCTCA +AAGAAAGTCACAGAGGGGGGCCAGTCCTAAAAACTTACTCCCGAAGCCCAGTCCACACAA +GTACCAGGAACGTGAAAGCAGAAAAAGGGTTTATAGGATTCCCTCATGTAATTCGGCAGG +ACCAAGACACTGATCATCCGGAGAATATGGAAGCTTACGAGACAGTCAGTGCATTTATCA +CGACTGATCTCAAAAAGTACTGCCTTAATTGGAGATATGAGACCATCAGCTTATTTGCAC +AGAGGCTAAATGAGATTTACGGATTACCCTCATTTTTCCAGTGGCTGCATAAGAGGCTTG +AAACCTCTGTCCTCTATGTAAGTGACCCTCATTGCCCCCCTGACCTTGACGCCCATGTCC +CGTTATGCAAAGTCCCCAATGACCAAATCTTCATTAAGTACCCTATGGGAGGGATAGAAG +GGTATTGTCAGAAGCTGTGGACCATCAGCACCATTCCTTATTTATACCTGGCTGCGTATG +AGAGCGGAGTAAGGATTGCTTCGTTAGTGCAAGGGGACAATCAGACTATAGCTGTAACAA +AAAGGGTGCCCAGCACATGGCCTTACAACCTTAAGAAACGGGAAGCTGCTAGAGTAACTA +GAGATTACTTTGTAATTCTTAGGCAAAGGCTACATGACATTGGCCATCACCTCAAGGCAA +ATGAGACAATTGTTTCATCACATTTCTTTGTCTATTCAAAAGGAATATATTATGATGGGC +TACTTGTGTCCCAATCACTCAAGAGCATCGCAAGATGTGTATTCTGGTCAGAGACTATAG +TTGATGAAACAAGGGCAGCATGCAGTAATATTGCTACAACAATGGCTAAAAGCATCGAGA +GAGGTTATGACCGTTACCTTGCATATTCCCTGAACGTCCTAAAAGTAATACAGCAGATTC +TGATCTCTCTTGGCTTCACAATCAATTCAACCATGACCCGAGATGTAGTCATACCCCTCC +TCACAAACAATGATCTCTTAATAAGGATGGCACTGTTGCCCGCTCCTATTGGGGGAATGA +ATTATCTGAATATGAGCAGGCTGTTTGTCAGAAACATCGGTGATCCAGTAACATCATCAA +TTGCTGATCTCAAGAGAATGATTCTCGCATCACTAATGCCTGAAGAGACCCTCCATCAAG +TAATGACACAACAACCAGGGGACTCTTCATTCCTAGACTGGGCTAGCGACCCTTACTCAG +CAAATCTTGTATGCGTCCAGAGCATCACTAGACTCCTCAAGAACATAACTGCGAGGTTTG +TCCTGATCCATAGTCCAAACCCAATGTTAAAAGGGTTATTCCATGATGACAGTAAAGAAG +AGGACGAGGGACTGGCAGCATTCCTCATGGACAGGCATATTATAGTACCTAGGGCAGCTC +ATGAAATCCTGGATCATAGTGTTACAGGGGCACGAGAGTCTATTGCAGGCATGCTAGATA +CCACAAAAGGCCTGATTCGAGCCAGCATGAGGAAGGGGGGGTTAACCTCTCGAGTGATAA +CCAGATTGTCCAATTATGACTATGAACAATTTAGAGCAGGGATGGTGCTTTTGACAGGAA +GAAAGAGAAATGTCCTCATTGACAAAGAGTCATGTTCAGTGCAGCTGGCTAGAGCCCTAA +GAAGCCATATGTGGGCAAGGCTAGCTCGAGGACGGCCTATTTACGGCCTTGAGGTCCCTG +ATGTACTAGAATCTATGCGAGGCCACCTTATTCGGCGTCATGAGACATGTGTCATCTGCG +AGTGTGGATCAGTCAACTACGGATGGTTTTTTGTCCCTTCAGGTTGCCAACTGGATGATA +TTGACAAGGAAACATCATCCTTGAGAGTCCCATATATTGGTTCTACCACTGATGAGAGAA +CAGACATGAAGCTTGCCTTCGTAAGAGCCCCAAGTCGATCCTTGAGATCTGCTGTTAGAA +TAGCAACAGTGTACTCATGGGCTTACGGTGATGATGATAGCTCTTGGAATGAAGCCTGGT +TGTTGGCAAGGCAAAGGGCCAATGTGAGCCTGGAAGAGCTAAGGGTGATCACTCCCATCT +CAACTTCGACTAATTTAGCGCATAGGTTGAGGGATCGTAGCACTCAAGTGAAATACTCAG +GCACGTCTCTTGTCCGAGTGGCAAGGTATACCACAATCTCCAACGACAATCTCTCATTTG +TCATATCAGATAAGAAGGTTGATACTAACTTTATATACCAACAAGGAATGCTTCTAGGGT +TGGGTGTTTTAGAAACATTGTTTCGACTTGAGAAGGATACCGGATCATCTAACACTGTGT +TACATCTTCACGTCGAAACAGATTGTTGCGTGATCCCGATGATAGATCACCCCAGGATAC +CCAGCTCCCGCAAGCTAGAGCTGAGGGCAGAGCTATGTACCAACCCATTGATATATGATA +ATGCACCTTTAATTGACAGAGATGCAACAAGGCTATACACCCAGAGCCATAGGAGGCACC +TTGTGGAATTTGTTACATGGTCCACACCCCAACTATATCACATTCTAGCTAAGTCCACAG +CACTATCCATGATTGACCTGGTAACAAAATTTGAGAAGGACCATATGAATGAAATTTCAG +CTCTCATAGGGGATGACGATATCAATAGTTTCATAACTGAGTTTCTGCTTATAGAGCCAA +GATTATTCACTATCTACTTGGGCCAGTGTGCAGCCATCAATTGGGCATTTGATATACATT +ATCATAGACCATCGGGGAAATATCAGATGGGTGAGCTTTTGTCTTCATTCCTTTCTCGGA +TGAGCAAAGGAGTGTTTAAGGTACTTGTCAATGCTCTAAGCCACCCAAAGATCTACAAAA +AATTCTGGCATTGTGGTATTATAGAGCCTATCCATGGTCCTTCACTTGATGCTCAAAACT +TGCACACAACTGTGTGCAACATGGTTTACACATGCTATATGACCTACCTCGACCTGTTGT +TGAATGAAGAGTTAGAAGAGTTCACATTTCTTTTGTGCGAAAGTGACGAGGATGTAGTAC +CGGACAGATTCGACAATATCCAGGCAAAACACCTGTGTGTCCTAGCAGATTTGTACTGTC +AACCAGGGACCTGCCCACCAATTCGAGGTCTGAGACCGGTAGAGAAATGTGCAGTTCTAA +CCGATCATATCAAGGCAGAGGCTAGGTTATCTCCAGCAGGGTCTTCGTGGAACATAAATC +CAATTATTGTAGACCATTACTCATGCTCTCTGACTTATCTCCGGCGAGGATCGATCAAAC +AGATAAGATTGAGAGTTGATCCAGGATTCATTTTTGACGCCCTCGCTGATGTAAATGTCA +GTCAGCCAAAGATCTGCAGCAACAACATCTCAAATATGAGCATCAAGGATTTCAGACCCC +CACACGATGATGTTGCCAAATTGCTCAAAGATATCAACACAAGCAAGCACAATCTTCCCA +TTTCAGGGGGTAATCTCGCCAATTATGAAATCCATGCTTTCCGCAGAATCGGGTTAAACT +CATCTGCTTGCTACAAAGCTGTTGAGATATCAACATTAATTAGGAGATGCCTTGAGCCAG +GGGAAGACGGCTTGTTCTTGGGTGAGGGGTCAGGTTCTATGTTGATCACTTATAAGGAGA +TACTAAAACTAAACAAGTGCTTCTATAATAGTGGGGTTTCCGCCAATTCTAGATCTGGTC +AAAGGGAATTAGCACCCTATCCCTCCGAAGTTGGCCTTGTCGAACACAAAATGGGAGTAG +GTAATATTGTCAAGGTGCTTTTTAACGGGAGGCCCGAAGTCACGTGGGTAGGCAGTATAG +ATTGCTTCAATTTCATAGTCAGTAATATCCCTACCTCTAGTTTGGGGTTTATCCATTCAG +ATATAGAGACCTTACCTAATAAAGATACTATAGAGAAGCTAGAGGAATTGGCAGCCATCC +TATCGATGGCTCTGCTCCTTGGCAAAATAGGATCAATACTGGTGATTAAGCTTATGCCTT +TCAGCGGGGATTTTGTTCAGGGATTTATAAGTTCTGTAGGGTCTCATTATAGAGAAGTGA +ACCTTGTCTACCCTAGATACAGCAACTTCATATCTACTGAATCTTATTTAGTTATGACAG +ATCTCAAAGCTAACCGGTTAATGAATCCTGAAAAGATTAAGCAGCAGATAATTGAATCAT +CCGTGCGGACTTCACCTGGACTTATAGGTCACATCCTATCCATTAAGCAACTAAGCTGCA +TACAAGCGATTGTGGGAGACGCAGTTAGCAGAGGTGATATCAACCCTACTCTGAAAAAAC +TTACACCTATAGAGCAGGTGCTGATCAATTGCGGGTTGGCAATTAACGGACCTAAACTGT +GCAAAGAATTGATCCACCATGATGTTGCCTCAGGGCAAGATGGATTGCTTAACTCTATAC +TCATCCTCTACAGGGAGTTGGCAAGATTCAAAGAGAACCAAAGAAGTCAACAAGGGATGT +TCCACGCTTACCCCGTATTGGTAAGTAGCAGGCAACGAGAACTTATATCTAGGATCACCC +GCAAATTTTGGGGGCATATTCTTCTTTACTCCGGGAACAGAAAATTAATAAATCGGTTTA +TCCAGAATCTCAAGTCCGGTTATCTGATACTAGATTTACACCAGAATATCTTCGTTAAGA +ATCTATCTAAGTCAGAGAAACAGATTATCATGACGGGGGGTTTAAAACGTGAGTGGGTTT +TTAAGGTAACAGTCAAGGAGACCAAAGAATGGTATAAGTTAGTCGGATACAGTGCCCTGA +TTAAGGATTAATTGGTTGAACTCCGGAACCCTAATCCTGCCCTAGGTAGTTAGGCATTAT +TTGCAATATATTAAAGAAAACTTTGAAAATACGAAGTTTCTATTCCCAGCTTTGTCTGGT +>MN758619 +GTCAGTTCCACACTGGCATCTGAACTCGGTATCACAGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGACCAGCAGAGCAGTTGGACCCAGGCAAGCC +CAAGTCTCATTTTTACACGGTGATCAAAGTGAGAATGAGCTACCGGGATTGGGGGGCAAG +GAAGATAGAAGGGTCAAACAGAGTCGAGGGGAAACCAGGGAGAACTCCAGAGAAACCAGG +CCCAGCAGAGCAAGTGATGCGAGAGCCGCCCATCTCCCAACCAGCACACCCCCAGACATT +GACACTGCATCGGAGTACAGCCAAGAACCACAGGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAAGCCATGGCAGGGATCCTGGAAGAACAAGGCTCGGACACAGATACCCCT +AGAGTGTACAACGATAGAGATCTTCTAGAC +>MK803443 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGATCAGCAGAGCAGTTGGACCCAGACAAGCC +CAAGTGTCATTTCTACACGGTGATCAAAGTGAAAGTGAGCTGCCGAGATTAGGGGGTAAG +GAGGACAGGAGGGTCAAACAGAACCGAGGAGAAGCCGGGGAGAGCCACAGAGAAACCGAG +CCTAGCAGAGCAAGTGATGTGAGAGCTGCCCATCCTCCAACCGGCACACCCCTAGACATT +GACACTGCATCGGAGTTCAGCCAAGATCCGCAGGACAGTCGAAGGTCAGCCGATGCCCTG +CTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGACTCAGACACGGACACCCCT +AGAGTGTACAATGACAGAGACCTTCTAGAC +>LC707232 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCGAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGATCAGTAGAGCGGTCGGACCCAGACAAGCC +CAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTCCCAGGATTGGGGGGCAAG +GAAGATAGGAGGGTCAAACAGAGTCGGGGAGAAGCCAGGGAGAGCTACAGAGAGACCGGG +TCCAGCAGAGCAAGTGATGAGAGAGCTGCCCATCTTCCAACCAGCACACCCCTAGACATT +GACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAAGCCATGGCAGGAATCTTGGAGGAGCAAGGCTCAGATACGGACACCCCT +AGGGTGTACAATGACAGTGATCTTCTAGAC +>LC706508 +GTCAGTTCCACATTGGCATCCGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGATCAGTAGAGCGGCCGGACCCAGACAAGCC +CAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTAGGGGGCAAG +GAAGACAGGAGGGTCAAACAGAGTCGAGGAGAAGCCAGGGAGAGCTACAGAGAAACCGAG +TCCAGCAGAGCAAGTGATGCGAGAGCTGCCCATCCTCCAACCAGCATGCCCCTAGACATT +GACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCTCTG +CTCAGGCTGCAAGCCATGGCAGGAATCTTGGAAGAACAAGGCTCAGACACGGACACCCCT +AGGGTATACAATGACAGAGATCTTCTAGAT +>LC488774 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGACCAGTAGAGCAGTTGGACCCAGACAAGCT +CAAGTGTCATTTCTGCACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGCAAG +GAAGATAGGAGGGTCAGACAGAGTCGGGGAGAAGCCAGGGAGAGCAACAGAGAAACCGGG +TCCAGCAGATCAAGTGATGCGAGAGCTGCCCATCTTCCAACCAGCACACCCCTAGACATT +GACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTAGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAAGCCATGGCAGGGATCCTGGAAGAACAAAGTTCAGACACGGACACCCCC +AGGGTGTACAATGACAGAGATCTTCTAGAC +>KX787040 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACCGAGGACAGGATCAGTAGAGCGGTTGGACCCAGACAAGCT +CAAGTGTCATTTATACACGGTGATCAAAGTGAAAATGAGCTACCAGGATTGGGGGGCAAG +GAAGATAGGAGGGTCAAACAGGGTCGGGGAGAAGCCAGGGAGAGCTACAGAGAAACCGGA +TCCAATAGAGCAAGTGATGCGAGAGCTGCCCATCTTCCAATCAGCACACCTCTAGACATT +GACACTGCATCAGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGTTGCAGGCCATGGCAGGAATCTTGGAAGAACAAGGCTCAGATACGGACATCTCT +AGGGTGTATAATGACAAAGATCTTCTAGAC +>KJ847285 +GTAAGTTCCACATTGGCATCTGAACTCGGTATCACAGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAGGACAGGACCAGTAGAGCGGTTGGACCCAGGCAAGCT +CAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCGGGATTGGGAGGTAAG +GAAGATAAGAAGGTCAAACAGAGTCGAGGAGAACCCAGGGAGAGCTATAGAGAAACCGGG +CCCAGCAGAGCAAATGATGCGAGAGCTGCCCACCTTCCAACCGGCACACCCCTAGACATT +GACACTGCATCGGAGTTTAGCCAAGACCCGCAAGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGGCTCAGACATGGACACCCCT +AGAGTGTACAATGACAGAGATCTTCTAGAC +>JQ627680 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTT +TCAGAGATTGCAATGAATACTACTGAGGACAGGATCAGTAGAGCAGTTGGGCCCAGACAA +GCCCAAGTATCATTTCTACACGGTGATCAGAGTGAGAATGAGCTGCCGAGATTGGGGGGC +AAGGAGGATAGAAGGGTCAAACAGAGCCGAGGAGAAGCCGGGGAGAGCTACAGAGAAACT +GGGCCCAGCAGAGCAAGTGATGCGAGAGCCACCCGTCTTCCAACCGGCACACCCCTAGAC +ATTGACACTGTATCGGAGTCCAGCCTAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCC +CTGCTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGGCTCAGACACGGACACC +CCTAGAGTGTACAATGACAGAGATCTTCTAGACTAG +>JQ609271 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGACTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGGTCAGTAGAGCAGTCGGACCCAGACAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGTTACCAGGACTGGGGGGG +AAGGAGGACAGGAGGGTCAAACAGAGTAGGGGAGAAGCAAGGGAGAGCCACAGAGAAACC +GGATCCAGCAGAGCAAGTGATGCGAGAGCTGCCCATCTTCCAACCAGCACACCCCTAGAC +ATTGACACTGCATCAGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCC +CTGCTCAGACTGCAAGCTATGGCAGGGATCTTGGAAGAACAAGGCTCAGACACGGACACT +CCCAAGGTGTACAATGACAGAGATCTTCTAGACTAG +>JN112839 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATAACTGCCGAGGATGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGAATCAGCAGAGCAGTTGGACCCAGACAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTGCCGAGATTGGGGGGT +AAGGAGGACAGGAGGGTCAAACAGAACCGAGGAGAAGCCGGGGAGAGCCACAGAGAAACC +GAGCCCAGCAGAGCAAGTGATGCGAGAGCTGCCCATCCTCCAACCGGCACACCCCTAGAC +ATTGACACTGCATCGGAGTTCAGCCAAGATCCGCAGGACAGTCGAAGGTCAGCCGATGCC +CTGCTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGACTCAGACACGGACACC +CCTAGAGTGTACAATGATAGAGACCTTCTAGACTAG +>JF739459 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACGGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCATACTACTGAAGACAGGATCAGTAGAGCGGTTGGACCCAGGCAAGCC +CAAGTTTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCGGGATTAGGGGGCAAG +GAAGATAGGAGGGTCAAACAGAGTCGAGGAGAAGCCAGGGAGAGCTCCAGAGAAACCGGG +CCCAACAGAGCAAGTGATGTGAGAGCTGCCCATCTCCCAACCAGCACACCCCCAGACATT +GACACTGCATCGGAGTACAGCCAAGACCCACAGGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAAGCCATGGCAGGGATTTTGGAAGAACAAGGCTCAGACACGGACACCCCT +AGAGTGTACAACGACAGAGATCTTCTAGAC +>GU440575 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTAGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGACCAGTAGAGCGGTTGGACCCAGACAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGC +AAGGAAGACAGGAGGGCCAAACAGAGTCGGGGAGAATCCAGGGAGAGCTACAGAGATACC +GGGTCCAGCAGAGCAAGTGATGTGAGGGCTGCCCATCTTCCAACCAACACACCCCTAGAC +ATTGACACCGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCC +CTGCTCAGGCTGCAAGCCATGGCAGGAATCTTGGAAGAGCAAGGCTCAGACACGGACACC +CCTAGGGTGTACAATGATAGAGATCTTCTAGACTAG +>GQ374254 +AAGGTCAGTTCCACATTAGCATCTGAACTCGGTATCACTGCCGAAGATGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACAGAGGACAGGATCAGTAGAGCGGTTGGACCCAGACAA +TCCCAAGTGTCATTTCTACACGGTGATCAAAATGAAAGTGAGCTACCGAGATGGGGGGGT +AAGGAAGATATGAGGGTCAAACAGAGTCGGGGAGAAGCCAGAGAGAGCTACAGAGAAACC +GGGCCCAGCAGAGCAAGTGACGCAAGAGCTGCCCATCCTCCAACCGACACACCTTTAGAC +ATTGACACTGCATCGGAGTCTAGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCT +CTGCTCAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGGCTCAGACACGGACACC +CCTAGAGTGTACAATGACAGAGACCTCCTAGACTAG +>FJ226469 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACGGCCGAGGATGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGATCAGTAGAGCGGTTGGACCCAGGCAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCGGGATTGGGAGGT +AAGGAAGATAAGAGAGTCAAACAGAGTCGAGGAGAAGCCAGGGAGAGCTATAGAGAAACT +GGGCACAGCAGAGCAAATGATGCGAGAGCTGCTGACCTTCCAACCGGCACATCCCTAGAC +ATTGACACTGCATCGGAGTTCAGCCAAGACCCACAGGACAGTCGAAGGTCAGCTGACGCC +CTGCTCAGGCTGCAAGCCATGGCAGGGATCCCGGAAGAACAAGGCTCAGACATAGACACC +CCTAGAGTGTACAATGACAGAGATCTCCTAGACTAG +>DQ839359 +ATGGCCACACTTTTAAGGAGCTTAGCATTGTTCAAAAGAAACAAGGACAAACCACCCATC +ACATCAGGATCCGGTGGAGCTATCAGAGGAATCAAACACATTATTATAGTACCAATCCCT +GGAGATTCCTCAATTACCACTCGATCCAGGCTTCTGGACCGATTGGTCAGGTTAATTGGA +AACCCGGATGTGAGCGGGCCCAAACTAACAGGGGCACTAATCGGCATATTATCCTTATTT +GTGGAGTCTCCAGGTCAATTGATTCAGAGGATCACCGATGACCCTGACGTTAGCATAAGG +CTGTTAGAGGTTGTCCAGAGTGACCAGTCACAATCTGGCCTTACCTTCGCATCAAGAGGT +ACCAACATGGAGGATGAGGCGGACCAATACTTTTCACATGATGATCCAAGTAGTAGTGAT +CAATCCAGGTCCGGATGGTTCGAGAACAAGGAAATCTCAGATATTGAAGTGCAAGACCCT +GAAGGATTCAACATGATTCTGGGTACCATTCTAGCCCAAATTTGGGTCTTGCTCGCAAAG +GCGGTTACGGCTCCAGACACGGCAGCTGATTCGGAGCTAAGAAGGTGGATAAAATACACC +CAACAAAGAAGGGTAGTTGGTGAATTTCGATTGGAGAGAAAATGGTTGGATGTGGTGAGG +AACAGGATTGCCGAGGACCTCTCCTTACGCCGATTCATGGTCGCTCTAATCCTGGATATT +AAGAGAACACCCGGGAACAAACCTAGGATTGCTGAAATGATATGTGACATTGATACATAT +ATCGTAGAGGCAGGATTAGCCAGTTTTATCCTGACTATTAAGTTCGGGATAGAAACTATG +TATCCTGCTCTTGGACTGCATGAATTTGCTGGTGAATTATCCACACTTGAGTCCTTGATG +AATCTTTACCAGCAAATGGGAGAAACTGCTCCCTACATGGTAATCCTGGAGAACTCAATT +CAGAACAAGTTCAGTGCAGGATCATACCCTCTGCTCTGGAGCTATGCCATGGGAGTAGGA +GTGGAACTTGAAAACTCTATGGGAGGTTTGAACTTTGGCCGATCTTACTTTGATCCAGCA +TATTTTAGATTAGGGCAAGAGATGGTAAGGAGGTCAGCTGGAAAGGTCAGTTCCACATTA +GCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCAGAGATTGCAATGCAT +ACTACTGAGGACAGGATCAGTAGAGCGGTTGGACCCAGACAAGCCCAAGTGTCATTTCTA +CACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGCAAGGAAGATAGGAGGGTC +AAACAGAGTCGGGGAGAAGCCAGGGAGAGCTACAGAGAAACCGGGTCCAGCAGAACAAGC +GATGCGAGAGCTGCCCATCTTCCAACCAACACACCCCTAGACATTGACACTGCATCGGAG +TCAAGCCAAGATCCGCAGGATAGTCGAAGGTCAGCTGACGCCCTGCTCAGGCTGCAAGCC +ATGGCAGGAATCTCGGAAGAACAAGGCTCAGACACAGACACCCCTAGAGTGTACAATGAC +AGAGATCTTCTAGACTAG +>D87053 +AATGCACACTACTGAGGACAGGATCAGTAGAGCGGTTGGACCCAGACAAACCCAAGTGTC +ATTTCTACACGGTGATCAAAGTGAAAATGAGCTACCGAGATGGGGGGGCAAGGAAGATAT +GAGGGTCAAACAGAGTCGAGGAGAAGCCAGGGAGAGCTACAGAGAAACCGGGCCCAGTAG +AGCAAGTGATGCGAGAGCTGCCCATCTTCCAACCGACACACCCCTAGACATTGACACTGC +ATCGGAGTTCAGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGAAGCCCTGCTCAGGCT +GCAAGCCATGGCAGGAATCTCGGAAGAACAAGGCTCAGACACGGACACCCCTAGAGTGTA +CAATGACAGAGATCTTCTAGACTAGGTGCGAGAGGCCGAG +>D01010 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACGGCCGAGGATGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGATCAGTAGAGCAGTTGGACCTAGACAA +GCCCAAGTGTCATTCCTACACGGTGATCAAAGTGAGAATGAGCTGCCGAGATTGGGGGGC +AAGGAGGACAGGAGGGTCAAACAGAGTCGAGGAGAAGCCGGGGAGAGCTACAGAGAAACC +GGGCCCAGCAGAACAAGTGATGCGAGACGTGCCCATCCTCCGACCGGCACACCCCTAGAC +ATTGACACTGCATCGGAGTTCAGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCC +CTGCTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGGCTCAGACACGGACACC +CCTAGAGTGTACAATGACAGAGATCTTCTAGACTAA +>AY923194 +AAGGTCAGTTCCACATTAGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGATCAGTAGAGCGGTTGGACCCAGACAA +GCCCAGGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGAAGC +AAGGAAGATAGGAGGGTCAAACAGAGTCGAGGAGAAGCCATGGAGAGCCACAGAGAAACC +GGGTCCGGTAGAACAAGTGATGCGAGAGCTGTCCATCTTCCAACCAGCACACCCCTAGAC +ATTGACACTGCATCGGAGTCAAGCCGAGATCCGCAGGATAGTCGAAGGTCAGCTGAAGCC +CTACTCAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGGCTCAGACACGGATACC +CCTAGAGTGTATAATGACAGAGATCTACTAGACTAG +>AY899309 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTC +TCAGAGATTGCAATGCATACTACTGAGGACAGGATCAGTAGAGCAGTCGGACCCAGACAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGC +AAGGAAGATAGGAGAGCCAAACAGAGCCGAGGAGAAGCCAGGGAGAGCTACAGAGAAACT +GGGTCCAGCAGAGCAAGTGATGCAAGAGCTGCCCATCTTCCAACCAGCACACCCCTAGAC +ATTGACAATGCATCGGAGACAAGCCAAGATCTGCAAGACAGTCGAAGGTCAGCTGACGCC +CTGCTCAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAGCAAGGCTCAGACACGGACACC +CCCAGAGTGTACAATGACAGAGATCTTCTAGACTAG +>AF266288 +ACCAAACAAAGTTGGGTAAGGATAGATCAATCAATGATCATATTCTAGTGCACTTAGGAT +TCAAGATCCTATTATCAGGGACAAGAGCAGGATTAGGGATATCCGAGATGGCCACACTTT +TAAGGAGCTTAGCATTGTTCAAAAGAAACAAGGACAAACCACCCATTACATCAGGATCCG +GTGGAGCCATCAGAGGAATCAAACACATTATTATAGTACCAATCCCTGGAGATTCCTCAA +TTACCACTCGATCCAGACTTCTGGACCGGTTGGTCAGGTTAATTGGAAACCCGGATGTGA +GCGGGCCCAAACTAACAGGGGCACTAATAGGTATATTATCCTTATTTGTGGAGTCTCCAG +GTCAATTGATTCAGAGGATCACCGATGACCCTGACGTTAGCATAAGGCTGTTAGAGGTTG +TCCAGAGTGACCAGTCACAATCTGGCCTTACCTTCGCATCAAGAGGTACCAACATGGAGG +ATGAGGCGGACCAATACTTTTCACATGATGATCCAATTAGTAGTGATCAATCCAGGTTCG +GATGGTTCGAGAACAAGGAAATCTCAGATATTGAAGTGCAAGACCCTGAGGGATTCAACA +TGATTCTGGGTACCATCCTAGCCCAAATTTGGGTCTTGCTCGCAAAGGCGGTTACGGCCC +CAGACACGGCAGCTGATTCGGAGCTAAGAAGGTGGATAAAGTACACCCAACAAAGAAGGG +TAGTTGGTGAATTTAGATTGGAGAGAAAATGGTTGGATGTGGTGAGGAACAGGATTGCCG +AGGACCTCTCCTTACGCCGATTCATGGTCGCTCTAATCCTGGATATCAAGAGAACACCCG +GAAACAAACCCAGGATTGCTGAAATGATATGTGACATTGATACATATATCGTAGAGGCAG +GATTAGCCAGTTTTATCCTGACTATTAAGTTTGGGATAGAAACTATGTATCCTGCTCTTG +GACTGCATGAATTTGCTGGTGAGTTATCCACACTTGAGTCCTTGATGAACCTTTACCAGC +AAATGGGGGAAACTGCACCCTACATGGTAATCCTGGAGAACTCAATTCAGAACAAGTTCA +GTGCAGGATCATACCCTCTGCTCTGGAGCTATGCCATGGGAGTAGGAGTGGAACTTGAAA +ACTCCATGGGAGGTTTGAACTTTGGCCGATCTTACTTTGATCCAGCATATTTTAGATTAG +GGCAAGAGATGGTAAGGAGGTCAGCTGGAAAGGTCAGTTCCACATTGGCATCTGAACTCG +GTATCACTGCCGAGGATGCAAGGCTTGTTTCAGAGATTGCAATGCATACTACTGAGGACA +AGATCAGTAGAGCGGTTGGACCCAGACAAGCCCAAGTATCATTTCTACACGGTGATCAAA +GTGAGAATGAGCTACCGAGATTGGGGGGCAAGGAAGATAGGAGGGTCAAACAGAGTCGAG +GAGAAGCCAGGGAGAGCTACAGAGAAACCGGGCCCAGCAGAGCAAGTGATGCGAGAGCTG +CCCATCTTCCAACCGGCACACCCCTAGACATTGACACTGCATCGGAGTCCAGCCAAGATC +CGCAGGACAGTCGAAGGTCAGCTGACGCCCTGCTTAGGCTGCAAGCCATGGCAGGAATCT +CGGAAGAACAAGGCTCAGACACGGACACCCCTATAGTGTACAATGACAGAAATCTTCTAG +ACTAGGTGCGAGAGGCCGAGGACCAGAACAACATCCGCCTACCCTCCATCATTGTTATAA +AAAACTTAGGAACCAGGTCCACACAGCCGCCAGCCCATCAACCATCCACTCCCACGATTG +GAGCCGATGGCAGAAGAGCAGGCACGCCATGTCAAAAACGGACTGGAATGCATCCGGGCT +CTCAAGGCCGAGCCCATCGGCTCACTGGCCATCGAGGAAGCTATGGCAGCATGGTCAGAA +ATATCAGACAACCCAGGACAGGAGCGAGCCACCTGCAGGGAAGAGAAGGCAGGCAGTTCG +GGTCTCAGCAAACCATGCCTCTCAGCAATTGGATCAACTGAAGGCGGTGCACCTCGCATC +CGCGGCCAGGGACCTGGAGAGAGCGATGACGACGCTGAAACTTTGGGAATCCCCCCAAGA +AATCTCCAGGCATCAAGCACTGGGTTACAGTGTTATTATGTTTATGATCACAGCGGTGAA +GCGGTTAAGGGAATCCAAGATGCTGACTCTATCATGGTTCAATCAGGCCTTGATGGTGAT +AGCACCCTCTCAGGAGGAGACAATGAATCTGAAAACAGCGATGTGGATATTGGCGAACCT +GATACCGAGGGATATGCTATCACTGACCGGGGATCTGCTCCCATCTCTATGGGGTTCAGG +GCTTCTGATGTTGAAACTGCAGAAGGAGGGGAGATCCACGAGCTCCTGAGACTCCAATCC +AGAGGCAACAACTTTCCGAAGCTTGGGAAAACTCTCAATGTTCCTCCGCCCCCGGACCCC +GGTAGGGCCAGCACTTCCGAGACACCCATTAAAAAGGGCACAGACGCGAGATTAGCCTCA +TTTGGAACGGAGATCGCGTCTTTATTGACAGGTGGTGCAACCCAATGTGCTCGAAAGTCA +CCCTCGGAACCATCAGGGCCAGGTGCACCTGCGGGGAATGTCCCCGAGTGTGTGAGCAAT +GCCGCACTGATACAGGAGTGGACACCCGAATCTGGTACCACAATCTCCCCGAGATCCCAG +AATAATGAAGAAGGGGGAGACTATTATGATGATGAGCTGTTCTCTGATGTCCAAGATATT +AAAACAGCCTTGGCCAAAATACACGAGGATAATCAGAAGATAATCTCCAAGCTAGAATCA +CTGCTGTTATTGAAGGGAGAAGTTGAGTCAATTAAGAAGCAGATCAACAGGCAAAATATC +AGCATATCCACCCTGGAAGGACACCTCTCAAGCATCATGATCGCCATTCCTGGACTTGGG +AAGGATCCCAACGACCCCACTGCAGATGTCGAAATCAATCCCGACTTGAAACCCATCATA +GGCAGAGATTCAGGCCGAGCACTGGCCGAAGTTCTCAAGAAACCCGTTGCCAGCCGACAA +CTCCAAGGAATGACAAATGGACGGACCAGTTCCAGAGGACAGCTGCTGAAGGAATTTCAG +CTAAAGCCGATCGGGAAAAAGATGAGCTCAGCCGTCGGGTTTGTTCCTGACACCGGCCCT +GCATCACGCAGTGTAATCCGCTCCATTATAAAATCCAGCCGGCTAGAGGAGGATCGGAAG +CGTTACCTGATGACTCTCCTTGATGATATCAAAGGAGCCAATGATCTTGCCAAGTTCCAC +CAGATGCTGATGAAGATAATAATGAAGTAGCTACAGCTCAACTTACCTGCCAACCCCATG +CCAGTCGACCCAACTAGTACAACCTAAATCCATTATAAAAAACTTAGGAGCAAAGTGATT +GCCTCCCAAGTTCCACAATGACAGAGATCTACGACTTCGACAAGTCGGCATGGGACATCA +AAGGGTCGATCGCTCCGATACAACCCACCACCTACAGTGATGGCAGGCTGGTGCCCCAGG +TCAGAGTCATAGATCCTGGTCTAGGCGACAGGAAGGATGAATGCTTTATGTACATGTTTC +TGCTGGGGGTTGTTGAGGGCAGCGATCCCCTAGGGCCTCCAATCGGGCGAGCATTTGGGT +CCCTGCCCTTAGGTGTTGGCAGATCCACAGCAAAGCCCGAAGAACTCCTCAAAGAGGCCA +CTGAGCTTGACATAGTTGTTAGACGTACAGCAGGGCTCAATGAAAAACTGGTGTTCTACA +ACAACACCCCACTAACTCTCCTCACACCTTGGAGAAAGGTCCTAACAACAGGGAGTGTCT +TCAACGCAAACCAAGTGTGCAATGCGGTTAATCTGATACCGCTCGATACCCCGCAGAGGT +TCCGTGTTGTTTATATGAGCATCACCCGTCTTTCGGATAACGGGTATTACACCGTTCCTA +GAAGAATGCTGGAATTCAGATCGGTCAATGCAGTGGCCTTCAACCTGCTGGTGACCCTTA +GGATTGACAAGGCGATAGGCCCTGGGAAGATCATCGACAATACAGAGCAACTTCCTGAGG +CAACATTTATGGTCCACATCGGGAACTTCAGGAGAAAGAAGAGTGAAGTCTACTCTGCCG +ATTATTGCAAAATGAAAATCGAAAAGATGGGCCTGGTTTTTGCACTTGGTGGGATAGGGG +GCACCAGTCTTCACATTAGAAGCACAGGCAAGATGAGCAAGACTCTCCATGCACAACTCG +GGTTCAAGAAGACCTTATGTTACCCGCTGATGGATATCAATGAAGACCTTAATCGATTAC +TCTGGAGGAGCAGATGCAAGATAGTAAGAATCCAGGCAGTTTTGCAGCCATCAGTTCCTC +AAGAATTCCGCATTTACGACGACGTGATCATAAATGATGACCAAGGACTATTCAAAGTTC +TGTAGACCGTAGTGCCCAGCAATGCCCGAAAACGACCCCCCTCACAATGACAGCCAGAAG +GCCCGGACAAAAAAGCCCCCTCCGAAAGACTCCACGGACCAAGCGAGAGGCCAGCCAGCA +GCCGACGGCAAGCGCGAACACCAGGCGGCCCCAGCACAGAACAGCCCTGACACAAGGCCA +CCACCAGCCACCCCAATCTGCATCCTCCTCGTGGGACCCCCGAGGACCAACCCCCAAGGC +TGCCCCCGATCCAAACCACCAACCGCATCCCCACCACCCCCGGGAAAGAAACCCCCAGCA +ATTGGAAGGCCCCTCCCCCTCTTCCTCAACACAAGAACTCCACAACCGAACCGCACAAGC +GACCGAGGTGACCCAACCGCAGGCATCCGACTCCCTAGACAGATCCTCTCTCCCCGGCAA +ACTAAACAAAACTTAGGGCCAAGGAACATACACACCCAACAGAACCCAGACCCCGGCCCA +CGGCGCCGCGCCCCCAACCCCCGACAACCAGAGGGAGCCCCCAACCAATCCCGCCGGTTC +CCCCGGTGCCCACAGGCAGGGACACCAACCCCCGAACAGACCCAGCACCCAACCATCGAC +AATCCAAGACGGGGGGGCCCCCCCAAAAAAAGCCCCCCAGGGGCCGACAGCCAGCACCGC +GAGGAAGCCCACCCACCCCACACACGACCACGGCAACCAAACCAGAACCCAGACCACCCT +GGGCCACCAGCTCCCAGACTCGGCCATCACCCCGCAGAAAGGAAAGGCCACAACCCGCGC +ACCCCAGCCCCGATCCGGCGGGGAGCCACCCAACCCGAACCAGCACCCAAGAGCGATCCC +CGAAGGACCCCCGAACCGCAAAGGACATCAGTATCCCACAGCCTCTCCAAGTCCCCCGGT +CTCCTCCTTTTCTCGAAGGGACCAAAAGATCAATCCACCACACCCGACGACACTCAACTC +CCCACCCCTAAAGGAGACACCGGGAATCCCAGAATCAAGACTCATCCAATGTCCATCATG +GGTCTCAAGGTGAACGTCTCTGCCATATTCATGGCAGTACTGTTAACTCTCCAGACACCC +ACCGGTCAAATCCATTGGGGCAATCTCTCTAAGATAGGGGTGGTAGGAATAGGAAGTGCA +AGCTACAAAGTTATGACTCGTTCCAGCCATCAATCATTAGTCATAAAATTAATGCCCAAT +ATAACTCTCCTCAATAACTGCACGAGGGTAGAGATTGCAGAATACAGGAGACTACTGAGA +ACAGTTTTGGAACCAATTAGAGATGCACTTAATGCAATGACCCAGAATATAAGACCGGTT +CAGAGTGTAGCTTCAAGTAGGAGACACAAGAGATTTGCGGGAGTAGTCCTGGCAGGTGCG +GCCCTAGGCGTTGCCACAGCTGCTCAGATAACAGCCGGCATTGCACTTCACCAGTCCATG +CTGAACTCTCAAGCCATCGACAATCTGAGAGCGAGCCTGGAAACTACTAATCAGGCAATT +GAGGCAATCAGACAAGCAGGGCAGGAGATGATATTGGCTGTTCAGGGTGTCCAAGACTAC +ATCAATAATGAGCTGATACCGTCTATGAACCAACTATCTTGTGATTTAATCGGCCAGAAG +CTCGGGCTCAAATTGCTCAGATACTATACAGAAATCCTGTCATTATTTGGCCCCAGCTTA +CGGGACCCCATATCTGCGGAGATATCTATCCAGGCTTTGAGCTATGCGCTTGGAGGAGAC +ATCAATAAGGTGTTAGAAAAGCTCGGATACAGTGGAGGTGATTTACTGGGCATCTTAGAG +AGCAGAGGAATAAAGGCCCGGATAACTCACGTCGACACAGAGTCCTACTTCATTGTCCTC +AGTATAGCCTATCCGACGCTGTCCGAGATTAAGGGGGTGATTGTCCACCGGCTAGAGGGG +GTCTCGTACAACATAGGCTCTCAAGAGTGGTATACCACTGTGCCCAAGTATGTTGCAACC +CAAGGGTACCTTATCTCGAATTTTGATGAGTCATCGTGTACTTTCATGCCAGAGGGGACT +GTGTGCAGCCAAAATGCCTTGTACCCGATGAGTCCTCTGCTCCAAGAATGCCTCCGGGGG +TCCACCAAGTCCTGTGCTCGTACACTCGTATCCGGGTCTTTTGGGAACCGGTTCATTTTA +TCACAAGGGAACCTAATAGCCAATTGTGCATCAATCCTTTGCAAGTGTTACACAACAGGA +ACGATCATTAATCAAGACCCTGACAAGATCCTAACATACATTGCTGCCGATCACTGCCCG +GTAGTCGAGGTGAACGGCGTGACCATCCAAGTCGGGAGCAGGAGGTATCCAGACGCTGTG +TACTTGCACAGAATTGACCTCGGTCCTCCCATATCATTGGAGAGGTTGGACGTAGGGACA +AATCTGGGGAATGCAATTGCTAAGTTGGAGGATGCCAAGGAATTGTTGGAGTCATCGGAC +CAGATATTGAGGAGTATGAAAGGTTTATCGAGCACTAGCATAGTCTACATCCTGATTGCA +GTGTGTCTTGGAGGGTTGATAGGGATCCCCGCTTTAATATGTTGCTGCAGGGGGCGTTGT +AACAAAAAGGGAGAACAAGTTGGTATGTCAAGACCAGGCCTAAAGCCTGATCTTACGGGA +ACATCAAAATCCTATGTAAGGTCGCTCTGATCCTCTACAACTCTTGAAACACAAATGTCC +CACAAGTCTCCTCTTCGTCATCAAGCAACCACCGCACCCAGCATCAAGCCCACCTGAAAT +TATCTCCGGCTTCCCTCTGGCCGAACAATATCGGTAGTTAATTAAAACTTAGGGTGCAAG +ATCATCCACAATGTCACCACAACGAGACCGGATAAATGCCTTCTACAAAGATAACCCCCA +TCCCAAGGGAAGTAGGATAGTCATTAACAGAGAACATCTTATGATTGATAGACCTTATGT +TTTGCTGGCTGTTCTGTTTGTCATGTCTCTGAGCTTGATCGGGTTGCTAGCCATTGCAGG +CATTAGACTTCATCGGGCAGCCATCTACACCGCAGAGATCCATAAAAGCCTCAGCACCAA +TCTAGATGTAACTAACTCAATCGAGCATCAGGTCAAGGACGTGCTGACACCACTCTTCAA +AATCATCGGTGATGAAGTGGGCCTGAGGACACCTCAGAGATTCACTGACCTAGTGAAATT +CATCTCTGACAAGATTAAATTCCTTAATCCGGATAGGGAGTACGACTTCAGAGATCTCAC +TTGGTGTATCAACCCGCCAGAGAGAATCAAATTGGATTATGATCAATACTGTGCAGATGT +GGCTGCTGAAGAGCTCATGAATGCATTGGTGAACTCAACTCTACTGGAGACCAGAACAAC +CAATCAGTTCCTAGCTGTCTCAAAGGGAAACTGCTCAGGGCCCACTACAATCAGAGGTCA +ATTCTCAAACATGTCGCTGTCCCTGTTAGACTTGTATTTAAGTCGAGGTTACAATGTGTC +ATCTATAGTCACTATGACATCCCAGGGAATGTATGGGGGAACTTACCTAGTGGAAAAGCC +TAATCTGAGCAGCAAAAGGTCAGAGTTGTCACAACTGAGCATGTACCGAGTGTTTGAAGT +AGGTGTTATCAGAAATCCGGGTTTGGGGGCTCCGGTGTTCCATATGACAAACTATCTTGA +GCAACCAGTCAGTAATGATCTCAGCAACTGTATGGTGGCTTTGGGGGAGCTCAAACTCGC +AGCCCTTTGTCACGGGGAAGATTCTATCACAATTCCCTATCAGGGATCAGGGAAAGGTGT +CAGCTTCCAGCTCGTCAAGCTAGGTGTCTGGAAATCCCCAACCGACATGCAATCCTGGGT +CCCCTTATCAACGGATGATCCAGTGATAGACAGGCTTTACCTCTCATCTCACAGAGGTGT +TATCGCTGACAATCAAGCAAAATGGGCTGTCCCGACAACACGAACAGATGACAAGTTGCG +AATGGAGACATGCTTCCAACAGGCGTGTAAGGGTAAAATCCAAGCACTCTGCGAGAATCC +CGAGTGGGCACCATTGAAGGATAACAGGATTCCTTCATACGGGGTCTTGTCTGTTGATCT +GAGTCTGACAGTTGAGCTTAAAATCAAAATTGCTTCGGGATTCGGGCCATTGATCACACA +CGGTTCAGGGATGGACCTATACAAATCCAACCACAACAATGTGTATTGGCTGACTATCCC +GCCAATGAAGAACCTAGCCTTAGGTGTAATCAACACATTGGAGTGGATACCGAGATTCAA +GGTTAGTCCCAACCTCTTCACTGTCCCAATTAAGGAAGCAGGCGAAGACTGCCATGCCCC +AACATACCTACCTGCGGAGGTGGATGGTGATGTCAAACTCAGTTCCAATCTGGTGATTCT +ACCTGGTCAAGATCTCCAATATGTTTTGGCAACCTACGATACTTCCAGGGTTGAACATGC +TGTGGTTTATTACGTTTACAGCCCAGGCCGCTCATTTTCTTACTTTTATCCTTTTAGGTT +GCCTATAAAGGGGGTCCCCATCGAATTACAAGTGGAATGCTTCACATGGGACCAAAAACT +CTGGTGCCGTCACTTCTGTGTGCTTGCGGACTCAGAATCTGGTGGACATATCACTCACTC +TGGGATGGTGGGCATGGGAGTCAGCTGCACAGTCACCCGGGAAGATGGAACCAATCGCAG +ATAGGGCTGCTAGTGAACCAATCACATGATGTCACCCAGACATCAGGCATACCCACTAGT +GTGAAATAGACATCAGAATTAAGAAAAACGTAGGGTCCAAGTGGTTCCCCGTTATGGACT +CGCTATCTGTCAACCAGATCTTATACCCTGAAGTTCACCTAGATAGCCCGATAGTTACCA +ATAAGATAGTAGCCATCCTGGAGTATGCTCGAGTCCCTCACGCTTACAGCCTGGAGGACC +CTACACTGTGTCAGAACATCAAGCACCGCCTAAAAAACGGATTTTCCAACCAAATGATTA +TAAACAATGTGGAAGTTGGGAATGTCATCAAGTCCAAGCTTAGGAGTTATCCGGCCCACT +CTCATATTCCATATCCAAATTGTAATCAGGATTTATTTAACATAGAAGACAAAGAGTCAA +CGAGGAAGATCCGTGAACTCCTCAAAAAGGGGAATTCGCTGTACTCCAAAGTCAGTGATA +AGGTTTTCCAATGCTTAAGGGACACTAACTCACGGCTTGGCCTAGGCTCCGAATTGAGGG +AGGACATCAAGGAGAAAGTTATTAACTTGGGAGTTTACATGCACAGCTCCCAGTGGTTTG +AGCCCTTTCTGTTTTGGTTTACAGTCAAGACTGAGATGAGGTCAGTGATTAAATCACAAA +CCCATACTTGCCATAGGAGGAGACACACACCTGTATTCTTCACTGGTAGTTCAGTTGAGT +TGCTAATCTCTCGTGACCTTGTTGCTATAATCAGTAAAGAGTCTCAACATGTATATTACC +TGACATTTGAACTGGTTTTGATGTATTGTGATGTCATAGAGGGGAGGTTAATGACAGAGA +CCGCTATGACTATTGATGCTAGGTATACAGAGCTTCTAGGAAGAGTCAGATACATGTGGA +AACTGATAGATGGTTTCTTCCCTGCACTCGGGAATCCAACTTATCAAATTGTAGCCATGC +TGGAGCCTCTTTCACTTGCTTACCTGCAGCTGAGGGATATAACAGTAGAACTCAGAGGTG +CTTTCCTTAACCACTGCTTTACTGAAATACATGATGTTCTTGACCAAAACGGGTTTTCTG +ATGAAGGTACTTATCATGAGTTAATTGAAGCTCTAGATTACATTTTCATAACTGATGACA +TACATCTGACAGGGGAGATTTTCTCATTTTTCAGAAGTTTCGGCCACCCCAGACTTGAAG +CAGTAACGGCTGCTGAAAATGTTAGGAAATACATGAATCAGCCTAAAGTCATTGTGTATG +AGACTCTGATGAAAGGTCATGCCATATTTTGTGGAATCATAATCAACGGCTATCGTGACA +GGCACGGAGGCAGTTGGCCACCGCTGACCCTCCCCCTGCATGCTGCAGACACAATCCGGA +ATGCTCAAGCTTCAGGTGAAGGGTTAACACATGAGCAGTGCGTTGATAACTGGAAATCTT +TTGCTGGAGTGAAATTTGGCTGCTTTATGCCTCTTAGCCTGGATAGTGATCTGACAATGT +ACCTAAAGGACAAGGCACTTGCTGCTCTCCAAAGGGAATGGGATTCAGTTTACCCGAAAG +AGTTCCTGCGTTACGACCCTCCCAAGGGAACCGGGTCACGGAGGCTTGTAGATGTTTTCC +TTAATGATTCGAGCTTTGACCCATATGATGTGATAATGTATGTTGTAAGTGGAGCTTACC +TCCATGACCCTGAGTTCAACCTGTCTTACAGCCTGAAAGAAAAGGAGATCAAGGAAACAG +GTAGACTTTTTGCTAAAATGACTTACAAAATGAGGGCATGCCAAGTGATTGCTGAAAATC +TAATCTCAAACGGGATTGGCAAATATTTTAAGGACAATGGGATGGCCAAGGATGAGCACG +ATTTGACTAAGGCACTCCACACTCTAGCTGTCTCAGGAGTCCCCAAAGATCTCAAAGAAA +GTCACAGGGGGGGGCCAGTCTTAAAAACCTACTCCCGAAGCCCAGTCCACACAAGTACCA +GGAACGTGAGAGCAGCAAAAGGGTTTATAGGGTTCCCTCAAGTAATTCGGCAGGACCAAG +ACACTGATCATCCGGAGAATATGGAAGCTTACGAGACAGTCAGTGCATTTATCACGACTG +ATCTCAAGAAGTACTGCCTTAATTGGAGATATGAGACCATCAGCTTGTTTGCACAGAGGC +TAAATGAGATTTACGGATTGCCCTCATTTTTCCAGTGGCTGCATAAGAGGCTTGAGACCT +CTGTCCTGTATGTAAGTGACCCTCATTGCCCCCCCGACCTTGACGCCCATATCCCGTTAT +ATAAAGTCCCCAATGATCAAATCTTCATTAAGTACCCTATGGGAGGTATAGAAGGGTATT +GTCAGAAGCTGTGGACCATCAGCACCATTCCCTATCTATACCTGGCTGCTTATGAGAGCG +GAGTAAGGATTGCTTCGTTAGTGCAAGGGGACAATCAGACCATAGCCGTAACAAAAAGGG +TACCCAGCACATGGCCCTACAACCTTAAGAAACGGGAAGCTGCTAGAGTAACTAGAGATT +ACTTTGTAATTCTTAGGCAAAGGCTACATGATATTGGCCATCACCTCAAGGCAAATGAGA +CAATTGTTTCATCACATTTTTTTGTCTATTCAAAAGGAATATATTATGATGGGCTACTTG +TGTCCCAATCACTCAAGAGCATCGCAAGATGTGTATTCTGGTCAGAGACTATAGTTGATG +AAACAAGGGCAGCATGCAGTAATATTGCTACAACAATGGCTAAAAGCATCGAGAGAGGTT +ATGACCGTTACCTTGCATATTCCCTGAACGTCCTAAAAGTGATACAGCAAATTCTGATCT +CTCTTGGCTTCACAATCAATTCAACCATGACCCGGGATGTAGTCATACCCCTCCTCACAA +ACAACGACCTCTTAATAAGGATGGCACTGTTGCCCGCTCCTATTGGGGGGATGAATTATC +TGAATATGAGCAGGCTGTTTGTCAGAAACATCGGTGATCCAGTAACATCATCAATTGCTG +ATCTCAAGAGAATGATTCTCGCCTCACTAATGCCTGAAGAGACCCTCCATCAAGTAATGA +CACAACAACCGGGGGACTCTTCATTCCTAGACTGGGCTAGCGACCCTTACTCAGCAAATC +TTGTATGTGTCCAGAGCATCACTAGACTCCTCAAGAACATAACTGCAAGGTTTGTCCTGA +TCCATAGTCCAAACCCAATGTTAAAAGGATTATTCCATGATGACAGTAAAGAAGAGGACG +AGGGACTGGCGGCATTCCTCATGGACAGGCATATTATAGTACCTAGGGCAGCTCATGAAA +TCCTGGATCATAGTGTCACAGGGGCAAGAGAGTCTATTGCAGGCATGCTGGATACCACAA +AAGGCCTGATTCGAGCCAGCATGAGGAAGGGGGGGTTAACCTCTCGAGTGATAACCAGAT +TGTCCAATTATGACTATGAACAATTCAGAGCAGGGATGGTGCTATTGACAGGAAGAAAGA +GAAATGTCCTCATTGACAAAGAGTCATGTTCAGTGCAGCTGGCGAGAGCTCTAAGAAGCC +ATATGTGGGCGAGGCTAGCTCGAGGACGGCCTATTTACGGCCTTGAGGTCCCTGATGTAC +TAGAATCTATGCGAGGCCACCTTATTCGGCGTCATGAGACATGTGTCATCTGCGAGTGTG +GATCAGTCAACTACGGATGGTTTTTTGTCCCCTCGGGTTGCCAACTGGATGATATTGACA +AGGAAACATCATCCTTGAGAGTCCCATATATTGGTTCTACCACTGATGAGAGAACAGACA +TGAAGCTTGCCTTCGTAAGAGCCCCAAGTCGATCCTTGCGATCTGCTGTTAGAATAGCAA +CAGTGTACTCATGGGCTTACGGTGATGATGATAGCTCTTGGAACGAAGCCTGGTTGTTGG +CTAGGCAAAGGGCCAATGTGAGCCTGGAGGAGCTAAGGGTGATCACTCCCATCTCAACTT +CGACTAATTTAGCGCATAGGTTGAGGGATCGTAGCACTCAAGTGAAATACTCAGGTACAT +CCCTTGTCCGAGTGGCGAGGTATACCACAATCTCCAACGACAATCTCTCATTTGTCATAT +CAGATAAGAAGGTTGATACTAACTTTATATACCAACAAGGAATGCTTCTAGGGTTGGGTG +TTTTAGAAACATTGTTTCGACTCGAGAAAGATACCGGATCATCTAACACGGTATTACATC +TTCACGTCGAAACAGATTGTTGCGTGATCCCGATGATAGATCATCCCAGGATACCCAGCT +CCCGCAAGCTAGAGCTGAGGGCAGAGCTATGTACCAACCCATTGATATATGATAATGCAC +CTTTAATTGACAGAGATGCAACAAGGCTATACACCCAGAGCCATAGGAGGCACCTTGTGG +AATTTGTTACATGGTCCACACCCCAACTATATCACATTTTAGCTAAGTCCACAGCACTAT +CTATGATTGACCTGGTAACAAAATTTGAGAAGGACCATATGAATGAAATTTCAGCTCTCA +TAGGGGATGACGATATCAATAGTTTCATAACTGAGTTTCTGCTCATAGAGCCAAGATTAT +TCACTATCTACTTGGGCCAGTGTGCGGCCATCAATTGGGCATTTGATGTACATTATCATA +GACCATCAGGGAAATATCAGATGGGTGAGCTGTTGTCATCGTTCCTTTCTAGAATGAGCA +AAGGAGTGTTTAAGGTGCTTGTCAATGCTCTAAGCCACCCAAAGATCTACAAGAAATTCT +GGCATTGTGGTATTATAGAGCCTATCCATGGTCCTTCACTTGATGCTCAAAACTTGCACA +CAACTGTGTGCAACATGGTTTACACATGCTATATGACCTACCTCGACCTGTTGTTGAATG +AAGAGTTAGAAGAGTTCACATTTCTCTTGTGTGAAAGCGACGAGGATGTAGTACCGGACA +GATTCGACAACATCCAGGCAAAACACTTATGTGTTCTGGCAGATTTGTACTGTCAACCAG +GGACCTGCCCACCAATTCGAGGTCTAAGACCGGTAGAGAAATGTGCAGTTCTAACCGACC +ATATCAAGGCAGAGGCTAGGTTATCTCCAGCAGGATCTTCGTGGAACATAAATCCAATTA +TTGTAGACCATTACTCATGCTCTCTGACTTATCTCCGGCGAGGATCGATCAAACAGATAA +GATTGAGAGTTGATCCAGGATTCATTTTCGACGCCCTCGCTGAGGTAAATGTCAGTCAGC +CAAAGATCGGCAGCAACAACATCTCAAATATGAGCATCAAGGATTTCAGACCCCCACACG +ATGATGTTGCAAAATTGCTCAAAGATATCAACACAAGCAAGCACAATCTTCCCATTTCAG +GGGGCAATCTCGCCAATTATGAAATCCATGCTTTCCGCAGAATCGGGTTGAACTCATCTG +CTTGCTACAAAGCTGTTGAGATATCAACATTAATTAGGAGATGCCTTGAGCCAGGGGAAG +ACGGCTTGTTCTTGGGTGAGGGATCGGGTTCTATGTTGATCACTTATAAGGAGATACTTA +AACTAAACAAGTGCTTCTATAATAGTGGGGTTTCCGCCAATTCTAGATCTGGTCAAAGGG +AATTAGCACCCTATCCCTCCGAAGTTGGCCTTGTCGAACACAGAATGGGAGTAGGTAATA +TTGTCAAAGTGCTCTTTAACGGGAGGCCCGAAGTCACGTGGGTAGGCAGTGTAGATTGCT +TCAATTTCATAGTTAGTAATATCCCTACCTCTAGTGTGGGGTTTATCCATTCAGATATAG +AGACCTTGCCTAACAAAGATACTATAGAGAAGCTAGAGGAATTGGCAGCCATCTTATCGA +TGGCTCTGCTCCTGGGCAAAATAGGATCAATACTGGTGATTAAGCTTATGCCTTTCAGCG +GGGATTTTGTTCAGGGATTTATAAGTTATGTAGGGTCTCATTATAGAGAAGTGAACCTTG +TATACCCTAGATACAGCAACTTCATATCTACTGAATCTTATTTGGTTATGACAGATCTCA +AGGCTAACCGGCTAATGAATCCTGAAAAGATTAAGCAGCAGATAATTGAATCATCTGTGA +GGACTTCACCTGGACTTATAGGTCACATCCTATCCATTAAGCAACTAAGCTGCATACAAG +CAATTGTGGGAGACGCAGTTAGTAGAGGTGATATCAATCCTACTCTGAAAAAACTTACAC +CTATAGAGCAGGTGCTGATCAATTGCGGGTTGGCAATTAACGGACCTAAGCTGTGCAAAG +AATTGATCCACCATGATGTTGCCTCAGGGCAAGATGGATTGCTTAATTCTATACTCATCC +TCTACAGGGAGTTGGCAAGATTCAAAGACAACCAAAGAAGTCAACAAGGGATGTTCCACG +CTTACCCCGTATTGGTAAGTAGCAGGCAACGAGAACTTATATCTAGGATCACCCGCAAAT +TTTGGGGGCACATTCTTCTTTACTCCGGGAACAGAAAGTTGATAAATAAGTTTATCCAGA +ATCTCAAGTCCGGCTATCTGATACTAGACTTACACCAGAATATCTTCGTTAAGAATCTAT +CCAAGTCAGAGAAACAGATTATTATGACGGGGGGTTTGAAACGTGAGTGGGTTTTTAAGG +TAACAGTCAAGGAGACCAAAGAATGGTATAAGTTAGTCGGATACAGTGCCCTGATTAAGG +ACTAATTGGTTGAACTCCGGAACCCTAATCCTGCCCTAGGTGGTTAGGCATTATTTGCAA +TATATTAAAGAAAACTTTGAAAATACGAAGTTTCTATTCCCAGCTTTGTCTGGT +>AF243457 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTT +TCAGAGATCGCAATGCATACTACTGAGGACAGGACCAGTAGAGCGGTTGGACCCAGACAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGC +AAGGAAGACAGGAGGGTCAAACAGGGTCGGGGAGAAGCCAGGGAGAGCTACAGAGAAACC +AGGTCCAGCAGAGCAAGTGATGCGAGAGCTGCCCATCTTCCAACCAGCACACCCCAAGAC +ATTGACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGCCGACGGTCAGCTGACGCC +CTGCTCAGGCTGCAAGCCATGGCAGGAATCTTGGAAGAACAAGGCTCAGACACGGACACC +CCTAGGGTGTACAATGATAGAGATCTTCTAGACTAG +>AF243449 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGACGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGATCAGTAGAGCGGTTGGACCCAGACAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGC +AAGGAAGACAGGAGGGTCAAACAGAGTCGAGGAGAAGCCAGGGAGAGCTACAGAGATACC +GGGTCCAGCAGAGCAAGTGATGCAAGAGCTGCCCATCTTCCAACCAGCACACCCCTAGAC +ATTGACACTGCATCGGAGTCAAGCCAAGATCCTCAGGACAGTCGAAGGTCAGCTGACGCC +CTGCTCAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGGCTCAGACACGGACACC +CCTCGAGTGTACAATGACAGAGATCTTCTAGACTAG +>AB695127 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGACCAGTAGAGCAGTTGGACCCAGACAA +GCTCAAGTGTCGTTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGC +AAGGAAGATAGGAGGGTCAGGCAGAGTCGGGGAGAAGCCAGGGAGAGTAATAGAGAAATC +GGGTCCAGCAGATTAAGTGACGCGAGAGCTGCCCATCTTCCGACCAGCACACCCCTAGAC +ATTGACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCC +CTGCTCAGGCTGCAAGCTATGGCAGGAATCCTGGAAGAACAAGGCTCAGACACGGACACC +CCCAGGGTGTACAATGACAGAGATCTTCTAGACTAG +>MZ712086 +GTGATCAATCCAGGTTCGGATGGTTCGAGAACAAGGAAATCTCAGATATCGAAGTGCAAG +ACCCTGAGGGATTTAACATGATTCTGGGTACCATTCTAGNNNNNNNNNNNNNNNNNNNCG +CAAAGGCGGTTACGGCCCCAGACACGGCAGCTGATTCGGAGCTAAGAAGGTGGATAAAGT +ACACCCAACAAAGAAGGGTAGTTGGTGAATTTAGATTGGAGNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNCTCCTTACGCCGATTCATGGTCGCTCTAATCCTGG +ATATCAAGAGGACACCCGGGAACAAACCTAGGATTGCCGAAATGATATGTGACATTGATA +CATATATCGTAGAGGCAGGATTAGCCAGTTTTATCCTGACTATTAAGTTTGGGATAGAAA +CTATGTATCATGCTCTTGGACTGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCT +CAATTCAGAACAAGTTCAGTGCAGGATCATACCCTCTGCTATGGAGCTATGCCATGGGAG +TAGGAGTGGAACTTGAAAACTCTATGGGGGGTTTGAACTTTGGTCGATCTTACTTTGATC +CAGCATATTTTAGATTAGGGCAAGAGATGGTGAGGAGGTCAGCTGGGAAGGTCAGTTCCA +CATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCAGAGATTGCAA +TGCATACTACTGAGGACAGGACCAGTAGAGCAGTTGGACCCAGACAAGCTCAAGTGTCAT +TTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGCAAGGAAGATAGGA +GGGTCAGACAGAGTNNNNNNNNNGCCAGGGAGAGCAACAGAGAAACCGGGTCCAGCAGAT +CAAGTGATGCGAGAGCTGCCCATCTTCCAACCAGCACACCCCTAGACATTGACACTGCAT +CGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGCCNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNTCATTGTTATAAAAAACTTAGGAACCAGGTCCACACAGCCGCCAGCCAACCAAC +CATCCACTCCCACGACTGGGGCCGATGGCAGAGGAGCAGGNNNNNNNNNNNNNNNNNNNN +NNNNNNNGCATCCGGGCTCTCAAGGCCGAGCCCATCGGCTCACTGGCCGTCGAGGAAGCC +ATGGCAGCATGGTCAGAAATATCAGACAACCCAGGACAGGACCGAGCCACCTGCAAGGAA +GAGAAGGCAGGCAGTTCGGGTCTCAGCAAACCATGCCTCTCAGCAATTGGATCAACTGAA +GGCGGTGCACCTCGCATCCGCGGTCAGGGATCTGGAGAGAGCGATGACGACGCTGAAGCT +TTGGGAATCCCCTCAAGAAATCTCCAGGCATCAAGCACTGGGTTACAGTGTTATCATGTT +TATGATCACAGCGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGGGATCTGCTCCC +ATCTCTATGGGGTTCAGGGCTTCTGATGTTGAAACTGCAGAAGGAGGTGAGATCCACGAG +CTCCTGAGACTCCAATCCAGAGGCAACAACTTTCCGAAGCTTGGGAAAACTCTCAATGTT +CCTCNNNNNNNNNNCCCCGGTAGGGCCAGCGCTTCCGAGACACCCATTAAAAAGGGCACA +GACGCGAGATTAGCCTCATTTGGAACGGAGATCGCGTCTTTATTGACAGGTGGTGCAACC +CAATGTGCTCGAAAGTCACCCTCGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCACCCGAATCTGGTACCACA +ATCTCCCCGAGATCCCAGAATAATGAAGAAGGGGGAGACTATTATGATGATGAGCTGTTC +TCCGATGTCCAAGACATCNNNNNNNNNNNNNNNNNNNNNCACGAGGATAATCAGAAGATA +ATCTCTAAACTAGAATCACTGCTGTTATTGAAGGGAGAAGTTGAGTCAATTAAGAAGCAG +ATCAACAAGCAAAATATCAGCATATCCACCCTGGAAGGACACCTCTCAAGCATCATGATC +GCCATTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATGTCGAACTCAATCCC +GACTTGAAACCCATCATAGGCAGAGATTCCGGCCGAGCACTGGCCGAAGTTCTCAAGAAA +CCCGCTGCCAGCCGACAACTCCAAGGAATGACAAATGGACGGACCAGTTCCAGAGGACAG +CTGCTGAAGGAATTTCAACTAAAGCCGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATTATAAAATCCAGTCGG +CTAGAGGAGGATCGGAAGCGTTACCTGATGACTCTCCTTGATGATATCAAAGGAGCTAAC +GATCTCACTAAGTTCCACCAGATGCTGATGAAGATAATAATGAAGTAGCTACAGCTCAAC +CTACCTGCCAACCTCATGCCAGTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNTCTGCTGGGGGTTGTTGAGGACAGCGATCCCYTAGGGTCTCCAA +TCGGGCGAGCATTTGGGTCYCTGCCCTTAGGTGTTGGTAGATCCACAGCAAAACCCGAAG +AGCTCCTCAAAGAGGCCACTGAGCTTGACATAGTTGTCAGACGTACAGCAGGGCTCAATG +AAAAACTGGTGTTCTACAACAACACCCCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNGAGTGTCTTCAACGCAAACCAAGTGTGCAATGCAGTTAATCTGATACCGC +TGGATACCCCGCAGAGGTTCCGTGTTGTTTATATGAGCATCACCCGTCTTTCGGATAACG +GGTATTACACCGTTCCTAGAAGAATGCTGGAGTTCAGATCGGTCAATGCAGTGGCCTTCA +ACCTGCTGGTGACCCTTAGGATTGACAAGGCGATTGGCCCTGGGAAGATCATCGACAATG +CAGAGCAACTTCCTGAGGCAACATTTATGGTCCACATCGGGAACTTCAGGAGAAAGAAGA +ATGAAGTCTACTCTGCCGATTATTGCAAGATGAAAATCGAAAAGATGGGCCTGGTTTTTG +CACTTGGGGGGATAGGAGGCACCAGTCTTCACATTAGAAGCACAGGCAAGATGAGCAAGA +CTCTCCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTGCCCGAAAACGACCCCCCG +CATAATGACAGCCAAAAGGCCCGGACAAAAAAGCCCCCTCCAAAAGACTCCACAGATCAA +GCGAGAGGCCAGCCAACAGCTGACGGCAAGCGTGAACACCAGGCGGCCCAAGCACAGAAC +AGCCCCGACCCAAGGCCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNGGTCGCCCCCGACCCAGACCACCAACCGTATCCCCATAGCCCCCG +GGAAAGAAACCCCCAGCAACTGGAAGGTCCCTCCCCCCCTCCCTCAACCCAAAAGCCCCG +CAACCGAATCGCACAAGCGACCGAGGTGACYYAACCGCAGGCATCCGACTCCCCAGACAG +ATCCTCTCCCCCCGGCAAACTAAACAAAACTTAGGGCCAAGGAACATACACACCCGACAG +AACCCAGACCCCGGCCCACGACGCCGCGCCCCCATCCCCCGACAACCAGAGAGAGCCCCC +AACCAATCCCGCCGGCTCCCCCGGCGCCCACAGGCAGGCACACCAACCCCYGAACNNNNN +NNNNNCCCAGCCACCGACAGTCCAAGAGGGGGGGCCCCCCCCAAAAAAAGGCCCCCAGGG +GCCGACAGCCAGCACCGCAAGGAAGCCCACCCACCCCACACACGACCACGGCAACCGAAC +CAGAATCCAGACCACCCTGGGCCACCAGCTCCCAGATTCGGCCACCACCCCGCAGAAAGG +AAAGGCCACAATCCGCACACCCCAGCCCCGATCCGGCGGACAGCCACCCAACCCGAACCA +GCACCCAAGANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCGACATCAGCATCCCACAGC +CTCCCCAAGTCCCTCGATCTCCTCYTCTTCTTGAAGGGACCAAAAGATCAATCCACCACA +CCCGACGACACTCGATTCCCCACCCCTAAAGGAGACACCGAGAATCCCAGAATCAAGACT +CNNNNNNTGTCCATCATGGGTCTCAAGGTGAACGTCTCTGTCATAGTCATGGCAGTACTG +TTAACTCTTCAAACACCAACCGGTCAAATCCATTGGGGCAATCTCNNNNNNNNAGGGGTG +GTAGGGATAGGAAGTGCAAGCTACAAAGTTATGACTCGTTCCAGCCATCAATCATTAGTC +ATAAAGTTAATGCCCAATATAACTCTCCTCAACANNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCAAGTAGGAGACACAAGAGATTTGCGGGA +GTTGTTCTGGCAGGTGCGGCCCTAGGCGTTGCCACAGCTGCTCAGATAACAGCCGGTATT +GCACTTCACCAGTCCATGCTGAACTCTCAAGCCATCGACAATCTGAGAGCGAGCCTGGAA +ACTACCAATCAGGCAATTGAGGCAATCAGGCAAGCAGGGCAGGAGATGATATTGGCTGTT +CAGGGTGTCCAAGACTACATCAATAATGAGCTGATACCGTCTATGAACCAACTATCTTGT +GATTTAATCGGCCAGAAGCTAGGGCTCAAGTTGCTCAGATACTATACAGAAATCCTGTCA +TTATTTGGCCCCAGCTTACGGGRCCCCATATCKGCGGAGATATCTATCCAGGCTTTGAGC +TATGCGCTTGGAGGAGATATCAATAAGGTATTGGAAAAGCTCGGATACAGTGGAGGTGAT +CTACTGGGCATCTTGGAGAGCAGAGGAATAAAGGCCCGGATAACTCACGTCGACACAGAG +TCCTACTTCATTGTACTCAGTATAGCCTATCCGACGCTGTCCGAGATTAAAGGGGTGATT +GTCCACCGGNNNNAGGGGGTCTCGTACAACATAGGCTCTCAAGAGTGGTATACCACTGTG +CCCAAGTATGTTGCAACCCAAGGGTACCTTATCTCGAATTTTGATGAGTCATCGTGCACT +TTCATGCCAGAGGGGACTGTGTGCAGCCAAAATGCCTTGTACCCGATGAGTCCTCTGCTC +CANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNCTGCCGGGTGGTTGAAGTGAATGGTGTGACCATCCAAGTCGGGAGCAGG +AGGTATCCAGACGCTGTGTACTTGCACAGGATTGACCTCGGTCCTCCCATATCATTGNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +TGCTGCAGGGGGCGTTGTAACAARAAGGGAGAACAAGTTGGTATGTCAAGACCAGGCCTA +AAGCCTGATCTTACAGGAACATCAAAATCCTATGTAAGGTCACTCTGATCCTCTACAACT +CTTGGAACACAAATGTCCCACAAGTCTCCTCTTCGTCATCAAGCAACCACCGGAGCCAGC +ATCAAGCCCATCTGAAATTGCCTCCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNACCACTCTTCAAGATCATCGGTGATGAAGTGGGCTTGAGGACACCTCAGAGATT +CACTGACCTAGTGAAATTCATCTCTGACAAGATTAAATTCCTTAATCCGGATAGGGAATA +CGACTTCAGAGATCTCASTTGGTGTATCAACCCGCCAGAGAGAATCAAATTGGATTATGA +TCAATACTGTGCAGATGTGGCTGCTGAAGAACTCATGAATGCACTGGTGAACTCAACTCT +ACTGGAGGCCAGGGCAACCAATCAGTTCCTAGCTGTCTCAAAGGGNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNACCTGGTCAAGATCTCCAATATGTTTTGGCAACCTACGATAC +TTCCAGAGTTGAACATGCTGTAGTTTATTACGTTTACAGCCCAAGCCGCTCATTTTCTTA +CTTTTATCCTTTTAGGTTGCCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTGCGGACTCGGAATCTGG +TGGACATATCACTCACTCTGGGATGGTGGGCATGGGAGTCAGCTGCACAGCCACTCGGGA +AGATGGAACCAACCGCAGATAGGGCTGCCAGTGAGCCAATCACATGATGTCACCCAGACA +TCAGGCATCCCCACTAGTGTGGAATAGACATCAGAATTAAGAAAAACGTAGGGTCCAAGT +GGTTCCTCGTTATGGACTCGCTATCTGTCAACCAGATCTTATACCCTGAAGTTCACCTAG +ATAGCCCGATAGTCACCAATAAGATAGTAGCTATCCTGGAGTATGCTCGAGTCCCTCACG +CTTACAGCCTGGAGGACTCTACACTGTATCAGAACATCAAGCACCGCCTNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAGTCTACATGC +ATAGCTCCCAATGGTTTGAGCCCTTCCTGTTTTGGTTCACAGTCAAGACTGAGATGAGGT +CAGTGATTAAATCGCAAACCCATACTTGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNCTCGTGACCTTGTTGCTATAATCAGTAAAGAGT +CTCAACATGTATATTACCTGACGTTTGAACTGGTTTTGATGTATTGTGATGTCATAGAGG +GGAGATTAATGACAGAGACCGCTATGACCATTGATGCTAGGTATACAGAGCTTCTAGGAA +GAGTCAGATACATGTGGAAACTGATAGATGGTTTCTTCCCTGCACTCGGGAATTCAACTT +ATCAAATTGTAGCCATGCTGGAGCCACTTTCACTTGCTTACCTGCAACTGAGGGACATAA +CNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAGGGCATGCC +AAGTGATTGCTGAAAATCTAATCTCAAACGGGATTGGCAAATATTTTAAGRACAATGGGA +TGGCCAAGGACGAGCACGATTTGACTAAGGCACTCCACACTCTAGCTGTCTCAGGAGTCC +CCAAAGATCTCAAAGAAAGTCACAGGGGGGGGCCAGTCTCAAAAACCCACTCCCGAAGCC +CAGTCCACACAAGTACCAGGAACGTGAAAGCAGAAAAAGGGTTTATAGGATTCCNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNACCCGGAGAATATGGAAGCTTACGAGACAGTCA +GTGCATTTATCACAACTGATCTCAAGAAGTACTGCCTTAATTGGAGATATGAGACCATCA +GCTTATTTNNNNNNNNNNNNNNNNNNNNNNNNNNATTACCCTCATTTTTCCAGTGGCTGC +ATAAGAGGCTTGAAACCTCTGTCCTCTATGTAAGTGACCCTCATTGCCCCCCTGACCTTG +ACGCCCATGTCCCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNATGGACCATCAGCACCATTCCCTATTTATACC +TGGCTGCTTATGAAAGCGGAGTAAGGATTGCTTCATTAGTGCAAGGGGACAATCAGACCA +TAGCCGTAACAAAAAGGGTACCCAGCACATGGCCTTACAACCTCAAGAAACGGGAAGCTG +CTAGAGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNCTCTTAGCTTCACAATCAATTCAACCATGACTCGGGATGTAG +TCATACCCCTCCTCACAAACAACGATCTCTTAATCAGGATGGCACTGTTGCCCGCTCCTA +TTGGGGGGATGAATTATCTGAATATGAGCAGGCTGTTTGTCAGAAACATCGGTGATCCAG +TAACATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGGGGGGTTAACCT +CTCGAGTGATAACCAGATTATCCAATTATGACTATGAACAATTTAGAGCAGGGATGGTGC +TATTGACAGGAAGAAAGAGAAATGTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTGG +CTAGAGCCCYAAGAAGCCATATGTGGGCAAGGCTAGCTCGAGGACGGCCTATTTACGGCC +TTGAGGTCCCTGATGTACTAGAATCTATGCGAGGCCACCTTATTCGGCGTCATGAGACAT +GTGTCATCTGCGAGTGTGGATCAGTCAACTACGGATGGTTTTTTGTCCCCTCGGGTTGCC +AACTGGATGATATTGACAAGGAAACATCATCCTTGAGAGTCCCATATATTGGTTCTACCA +CTGATGAGAGAACAGACATGAAGCTTGCCTTCGTAAGAGCCCCANNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTATGGTGATGATGATAGCTCTTGGA +ACGAGGCCTGGTTGTTGGCAAGGCAAAGGGCCAATGTGAGCCTGGAGGAGCTAAGGGTGA +TCACTCCCATCTCAACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNCAGGCACATCTCTTGTCCGAGTGGCAAGGTATACCACAATCTCCAACGACA +ATCTCTCATTTGTCATATCAGATAAGAAGGTTGATACTAACTTTATATACCAACAAGGAA +TGCTTCTAGGGTTAGGTATCTTAGAAACATTGTTTCGACTCGAGAAAGATACCGNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATCCATGGTCCTTCGCTTG +ATGCTCAAAATTTGCACACAACTGTGTGCAACATGGTTTACACATGTTATATGACCTACC +TCGACCTGCTGTTGAATGAAGAGTTAGAAGAGTTCACATTTCTTTTGTGTGAAAGCGACG +AGGATGTAGTACCGGACAGATTCGACAATATCCAGGCAAAACACCTGTGTGNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCCGTCAGGGTYTTCGT +GGAACATAAATCCAATTGTTGTAGACCATTACTCATGCTCTCTGACTTATCTCCGGCGAG +GATCGATCAAGCAGATAAGATTGAGAGTTGATCCAGGATTCATTTTTGACGCCCTCGCTG +AGGTAAATGTCAGTCAGCCAAAGATCGGGAGCAACAACTTCCCAAATATGAGCATCAAGG +ATTTTAGGCCTCCTCACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCCTTGTCGAACACA +GAATGGGGGTAGGTAATATTGTCAAGGTGCTCTTTAATGGGAGGCCCGAAGTCACGTGGG +TAGGCAGTATAGATTGCTTCAATTTCATAGTCAGTAATATCCCCACCTCTAGTGTGGGGT +TTATCCATTCAGATATAGAGACCTTACCTAACAAAGACACTATAGAGAAGCTAGAGGAAT +TGGCAGCCATCTTATCGATGGCTCTGCTCCTTGGCAAAATAGGATCAATACTGGTGATTA +AGCTTATGCCTTTCAGCGGGGATTTTGTTCAGGGATTTATAAGCTATGTAGGGTCTCATT +ATAGAGAAGTGAACCTTGTCTACCCTAGATACAGCAACTTCATATCTACTGAATCTTATT +TAGTTATGACAGATCTCAAAGCTAACCGGCTAATGAACCCTGAAAAGATTAAGCAGNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNGGGGGCATATTCTTCTGTACTCTGGGAACAGAAAGTTGA +TAAATCGGTTTATCCAAAATCTCAAGTCCGGCTATCTAATACTAGACTTACACCAGAATA +TCTTCGTTAAGAATCTATCTAAGTCAGAGAAAGAGATTATTATGACGGGGGGTTTGAAAC +GTGAGTGGGTTTTTAGGGTAACAGTCAAGGAGACCAAAGAATGGTATAAGTTAGTCGGAT +ACAGCGCCCTGATTAAGGA +>JX026868 +AAGGTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTT +TCAGAGATTGCAATGCATACTACTGAGGACAGGATCAGCAGAGCAGTTGGACCCAGACAA +GCCCAAGTGTCATTTCTACACGGTGATCAAAGTGAGAGTGAGCTGCCGAGATTGGGGGGT +AAGGAGGACAGGAGGGTCAAACAGAACCGAGGAGAAGCCGGGGAGAGCCACAGAGAAACC +GAGCCTAGCAGAGCAAGTGATGTGAGAGCTGCCCATCCTCCAACCGGCACACCCCTAGAC +ATTGACACTGCATCGGAGTTCAGCCAAGATCCGCAGGACAGTCGAAGGTCAGCCGATGCC +CTGCTTAGGCTGCAAGCCATGGCAGGAATCTCGGAAGAACAAGACTCAGACACGGACACC +CCTAGAGTGTACAATGACAGAGACCTTCTAGGAYTAGGTG +>KU684406 +GTCAGTTCCACATTGGCATCTGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCA +GAGATTGCAATGCACAATATTGAGGACAGGACCAGTAGAGCAGTTGGACCCAGACAAGCT +CAAGTGTCGTTTCTACACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGCAAG +GAAGATAGGAGGGTCAGGCGGAGTAGGGGAGAAGCCAGGGAGAGATAGTGAGAAATCGGG +TCCAGCAGATTGAGTGACGCGAGAGCTGCCCATCTTCCGACCAGCACACCCCTGGACATT +GACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGGTCAGCTGACGCCCTG +CTCAGGCTGCAGGCTATGGCAGGAATCCTGGAAGAACAAGGCTCAGACACGGACACCCCC +AGGGTGTACAATGACAGAGATCTCCTAGAC diff --git a/nextclade_genome/defaults/include_strains.txt b/nextclade_genome/defaults/include_strains.txt new file mode 100644 index 0000000..3d0d805 --- /dev/null +++ b/nextclade_genome/defaults/include_strains.txt @@ -0,0 +1,101 @@ +# WHO Genotypes extracted from Table 1, +# https://www.who.int/publications/i/item/WER8832 + +# 1A +#AB047330 # WHO genotype, unused because it distorts the tree +AF188704 # WHO genotype +JF727653 # WHO genotype +JX524186 # selected as mid-basal sample relative to WHO samples +M30776 # WHO genotype + +# 1B +AY968207 # WHO genotype +AY968208 # WHO genotype +AY968209 # WHO genotype +JX524184 # selected as more basal relative to WHO samples +OM735621 # selected as more basal relative to WHO samples +OM735623 # selected as mid-basal relative to WHO samples + +# 1C +AY968211 # WHO genotype +AY968212 # WHO genotype +AY968217 # WHO genotype +JN635283 # selected as more basal relative to WHO samples +OM735624 # selected as semi-basal relative to WHO samples + +# 1D +AB072384 # selected as less basal relative to WHO samples +AB233426 # selected as mid-basal relative to WHO samples +AY968214 # WHO genotype +AY968216 # WHO genotype + +# 1E +AY968210 # WHO genotype +AY968221 # WHO genotype +DQ085331 # selected as more basal relative to WHO samples +JX679263 # selected as more basal relative to WHO samples +OM735633 # selected as more basal relative to WHO samples + +# 1F +AY326351 # selected as more basal relative to WHO samples +AY968213 # WHO genotype +AY968215 # WHO genotype +FJ875035 # selected as more basal relative to WHO samples +OM735681 # selected as more basal relative to WHO samples + +# 1G +AM258945 # WHO genotype +EF588970 # WHO genotype +EF588978 # WHO genotype +FN546969 # selected as more basal relative to WHO samples +KC962558 # selected as mid-basal relative to WHO samples + +# 1H +AM258953 # WHO genotype +DQ897934 # selected as more basal relative to WHO samples +EF199893 # selected as more basal relative to WHO samples +EF199889 # selected as more basal relative to WHO samples +HG326276 # WHO genotype + +# 1I +AF039122 # WHO genotype +AF039132 # selected as more basal relative to WHO samples +AY161351 # selected as more basal relative to WHO samples +AY161359 # selected as mid-basal relative to WHO samples +AY161360 # WHO genotype + +# 1J +AB285129 # WHO genotype +AB285130 # WHO genotype +JQ900444 # selected as more basal relative to WHO samples +JQ900445 # selected as more basal relative to WHO samples +KU601207 # selected as more basal relative to WHO samples + +# 2A +AY258322 # WHO genotype +AY258323 # WHO genotype +FJ875030 # cladistic with AY258323 +OM735684 # cladistic with AY258322 + +# 2B +AY968218 # WHO genotype +AY968219 # WHO genotype +AY968220 # WHO genotype +JN635293 # selected as mid-basal relative to WHO samples +KC288129 # selected as mid-basal relative to WHO samples + +# 2C +AY247016 # selected as more basal relative to WHO samples +AY247017 # selected as more basal relative to WHO samples +DQ085340 # WHO genotype +DQ388279 # WHO genotype + +# 1A "emperical" clade; samples _not_ from WHO paper; based on a clade +# observed in the whole genome tree, with three samples selected +# randomly as representative samples +KU958641 # vax derived +MK787190 # vax derived +OQ296622 # vax derived + +# RA 27/3 vaccine +L78917 diff --git a/nextclade_genome/defaults/reference_genome.gb b/nextclade_genome/defaults/reference_genome.gb new file mode 100644 index 0000000..1fa7b33 --- /dev/null +++ b/nextclade_genome/defaults/reference_genome.gb @@ -0,0 +1,220 @@ +LOCUS NC_076948 9761 bp ss-RNA linear VRL 05-MAY-2023 +DEFINITION Rubella virus strain RVi/Bismarck.ND.USA/23.08/2B, complete genome. +ACCESSION NC_076948 +VERSION NC_076948.1 +DBLINK BioProject: PRJNA485481 +KEYWORDS RefSeq. +SOURCE Rubella virus + ORGANISM Rubella virus + Viruses; Riboviria; Orthornavirae; Kitrinoviricota; Alsuviricetes; + Hepelivirales; Matonaviridae; Rubivirus; Rubivirus rubellae. +REFERENCE 1 (bases 1 to 9761) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (03-MAY-2023) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 2 (bases 1 to 9761) + AUTHORS Kirkness,E.F., Halpin,R., Bera,J., Fedorova,N., Overton,L., + Stockwell,T., Amedeo,P., Bishop,B., Chen,H., Edworthy,P., Gupta,N., + Katzel,D., Li,K., Schobel,S., Shrivastava,S., Thovarai,V., Wang,S., + Abernathy,E., Chen,M.H., Icenogle,J., Wentworth,D.E. and Bellini,W. + TITLE Direct Submission + JOURNAL Submitted (26-AUG-2011) J. Craig Venter Institute, 9704 Medical + Center Drive, Rockville, MD 20850, USA +COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final + NCBI review. The reference sequence is identical to JN635296. + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..9761 + /organism="Rubella virus" + /mol_type="genomic RNA" + /strain="RVi/Bismarck.ND.USA/23.08/2B" + /host="Homo sapiens" + /db_xref="taxon:11041" + /geo_loc_name="USA: North Dakota" + /collection_date="2008" + /genotype="2B" + /note="infection originated in India" + CDS 41..6391 + /ID="cds-YP_010801281.1" + /Dbxref="GenBank:YP_010801281.1,GeneID:80539971" + /Name="NSP" + /gbkey="CDS" + /gene="NSP" + /locus_tag="NSP" + /product="non-structural polyprotein p200" + /protein_id="YP_010801281.1" + CDS 6511..9702 + /ID="cds-YP_010801282.1" + /Dbxref="GenBank:YP_010801282.1,GeneID:80539970" + /Name="SP" + /gbkey="CDS" + /gene="SP" + /locus_tag="SP" + /product="structural polyprotein" + /protein_id="YP_010801282.1" +ORIGIN + 1 caatgggagc taacggacct cgcttaggac tcctattccc atggagaaac tcctagatga + 61 ggttcttgcc cccggtgggc cttataactt aaccgtcggc agttgggtaa gagaccatgt + 121 ccgctcaatt gtcgagggtg cgtgggaagt gcgcgatgtt gttaccgctg cccaaaagcg + 181 ggccatcgta gctgtgatac ccagacctgt tttcacgcag atgcaggtta gtgatcaccc + 241 agcactccac gcaatttcgc ggtatacccg ccgccattgg atcgagtggg gccctaaaga + 301 agccctacac gtcctcatcg accccagccc gggcctgctc cgcgaggtcg ctcgcgtcga + 361 gcgccgctgg gtcgcattgt gcctccacag gacggcacgc aaactcgcca ccgccctggc + 421 cgagacggcc ggtgaggcgt ggcacgccga ctacgtgtgc gcgctgcgtg gcgcaccgag + 481 cggccccttt tacgtccacc cagaggacgt cccgcacggc ggtcgcgccg tggcggacag + 541 atgcttgctt tactacacac ccatgcagat gtgcgagctg atgcgcacca tcgacgccac + 601 cctgcttgtg gcggttgact tgtggccggt cgcccttgcg gcgcacgtcg gtgacgactg + 661 ggacgacctg ggcatcgcct ggcacctcga ccacgacggg ggttgccccg ccgactgccg + 721 tggcgccggc gctgggccca cgcccggcta cacccgcccc tgcactacgc gcatctacca + 781 ggtcctgccg gacaccgccc accccgggcg cctctaccgg tgcgggcccc gcctgtggac + 841 acgcgattgc gccgtagccg aactttcatg ggaggtcgcc cagcattgcg ggcaccaggc + 901 gcgtattcgc gccgtgcgat gcgccctcgc cattcgccac gtgcgcagcc tccaacccag + 961 cgcgcgcgtc cggctcccgg accttgtcca tctcgccgag gtgggccggt ggcggtggtt + 1021 cagcctcccc cgccccgtct ttcagcgtat gctgtcctat tgcaagaccc tgagcccgga + 1081 cgcgtactac agcgagcgcg tgttcaagtt caagaacgcc ctgagtcaca gcatcacgct + 1141 cgcgggcaac gtgctgcagg aggggtggaa gggcacgtgc gccgaggagg acgcgttgtg + 1201 tgcgtacgtg gccttccgcg cgtggcagtc caacgcgagg ctggccggga tcatgaaagg + 1261 cgcgaagcgc tgcgccgccg actccttgag cgtggccggc tggcttgaca ccatctggga + 1321 cgccatcaag cggttcttcg gcagcgtgcc cctcgccgag cgcatggagg agtgggagca + 1381 ggacgccgcg gtcgcggcct ttgatcgcgg cccccttgag gacggcgggc accacttggt + 1441 aaccgtgcaa cctcccaagc cgccgccccg ccccgagatc gccgcaacgt ggattgtcca + 1501 cgccgcgagc gcagaccgcc actgtgcgtg cgccccccgc tgcgacgtcc cacgcgaacg + 1561 cccctctgcg cctccgggcc cgccagatga cgaggcgctc atcccgccgt ggctgtttgc + 1621 cgagcaccgc gcccttcgtt gccgcgagtg ggatttcgag gcccttcgcg cgcgcgccga + 1681 cacggcggcc acgcccgccc cactagcccc acgccccgcg cgccacccca ccgtgctcta + 1741 ccgccacccc gcccaccacg gcccgtggct cactcttgac gagcccggcg aggctgacgc + 1801 ggccctggtc ttatgcgacc cacttggcca gccgctccgg ggcccggagc gccatttcac + 1861 cgccggcgcg cacatgtgcg cgcaggcgcg ggggctccag gcctttgtcc gcgtcgtgcc + 1921 cccgcccgag cgcccctggg ctgacggggg cgctagaacg tgggcgaagt tcttccgcag + 1981 ctgcgcctgg gcacagcgct tgctcggcga gccggcagtc atgcacctcc cgtacaccga + 2041 cggcgacgtg ccgcagctga ttgcgctggc cctgcgcacg ctggcccaac agggggccgc + 2101 cttggcactc tcggtgcgtg acctgcccgg gggcgcggcg ttcgacgcca acgcggtcac + 2161 cgccgccgtg cgcgcgggct ccagtcagcc cgcgccggcc gcaccccccc cggacaactc + 2221 ctcgccacca cgccgtgcac ggcggtcgcg gcgacccccg gacacccgcg gccctccgcc + 2281 ctccgcgccc gcccgcgacc cgccgccgcc cgcccccagc ccgcccacgc caccccgcgc + 2341 gcgcggcccg gccccagcca ccctagagga gcccgcggat cgcgcgcgtg acgccgagca + 2401 ggaggttgcc tgcgacccga gcggccccgc cgcgccaacc agggcggacc caaacagcga + 2461 catcgtcgaa agctacgccc gcgccgccgg tcctgtgcac cttcgcgtcc gcgacatcat + 2521 ggacccccca cctggttgca aggtcgtggt caatgccgcg aacgaggggc tgctcgccgg + 2581 atctggcgtc tgcggtgcca tcttcgccaa agccgccgcg gccctcgccg aggactgccg + 2641 gcgcctcgcc ccatgcccca ccggcgaggc ggtggcgacg cccggccacg gctgcgggta + 2701 tgcgcacatc atccacgcgg tcgctcctcg gcgccctccg gaccccgccg ccctcgagca + 2761 gcgcgaagcc ctgctcgaac gcgcctaccg cagcatcgtc gcgctggccg ccgcgcgggg + 2821 gtgggcgtgt gttgcctgcc ccctcctcgg cgctggcgtc tacggctggt ctgccgcgga + 2881 gtcccttcga gccgcgctcg cggccacgcg cgccgagccc gccgagcgcg tgagcctgca + 2941 tatatgccac cccgaccgcg ccacgctgac gcacgcctcc gtgctcgtgg gcgcggggct + 3001 cgctgccagg cgcgtcagcc cccctccgac cgagcccctc gcaccccgcc ccgtcggcga + 3061 cccgggccga tcgacccagc gcagcgcgtc acccccggcc accccctctg gggaggccac + 3121 cgcgcctgag tcgcgcgggt gccaggggtg cgaactctgc cggtacacgc gcgtcaccaa + 3181 tgaccgcgcg tatgtcaacc tctggctcga gcgcgaccgc ggcgccaccg gttgggcgat + 3241 gcgcatcccc gaagtggtcg tctacgggcc ggaacacctc gccgcgcact ttccattaaa + 3301 ccactacagt gtgctcaagc ccgcggaggt gaggcccccg cggggcatgt gcgggagtga + 3361 catgtggcgc tgccgcggct ggcagggcat gccgcaggtg cggtgcaccc cctccaatgc + 3421 tcacgccgcc ctgtgccgca caggcgtgcc ccctcgagtg agcacgcgag gcgatgagcg + 3481 agaccctaac acctgctggc tccgcgccgc cgccaacgtt gcgcaggctg cgcgcgcctg + 3541 cggcgcctac acgagtgccg ggtgcccccg gtgcgcctac ggccgcgctc tcagcgaagc + 3601 ccgcacccac gaggacttcg ccgccctgag ccagcggtgg agcgcaagcc acgccgacgc + 3661 ctcccctgac ggcaccggag accccctcga ccccctgatg gagaccgtgg gctgcgcctg + 3721 ttcgcgcgta tgggtcggct ccgagcacga ggcccccccc gaccacctcc tggtgtccct + 3781 ccaccgtgcg cccaacggtc cgtggggcgt ggtgctcgaa gtgcgcgcgc gccccgaggg + 3841 gggtaacccc accggccact ttgtttgcgc ggtcggtggc ggcccacgcc gtgtttcgga + 3901 ccgcccccac ctttggctcg cggttccact ctcccggggc ggaggcactt gcgccgcgac + 3961 cgacgagggg ctggcccagg cgtattacga cgacctcgag gtgcgccgcc tcggggacga + 4021 cgccatggct cgggcggccc tcgccgcggt ccaacgcccc cgcaagggcc cctacaacat + 4081 catggtatgg aacatggccg cgggcgctgg caagaccact cgcatcctcg ccgccttcac + 4141 gcgcgaagac ctgtacgtct gccccaccaa cgcgctcctg cacgagatcc aggccaaact + 4201 ccgcgcgcgc gacatcgaca tcaagaacgc cgccacctac gagcgcgcgc tgacgaaacc + 4261 gcttgccgcc taccgccgta tctacatcga cgaggcgttc accctcggcg gcgagtattg + 4321 cgcgttcgtt gccagccaaa ctaccgcgga ggtgatctgc gtcggcgatc gggaccagtg + 4381 cggcccgcac tacgccaaca actgccgcac ccccgtccct gaccgctggc ccaccgagcg + 4441 ctcacgccac acttggcgtt tccccgattg ctgggcggct cgtttgcgcg cggggctcga + 4501 ctacgacgtc gagggcgagc acgccggcac ttttgcctgc aacctctggg acggccgcca + 4561 ggtcgacctc cacctcgcct tctcgcgcga gaccgtgcgc cgccttcacg aggctggcat + 4621 acgcgcatac accgtgcgcg aggcccaggg catgagcgtt ggcaccgcct gcatccacgt + 4681 cggtcgggac ggcacggacg tggccctggc tctgacacgc gacctcgcca tcgtcagcct + 4741 gacccgggcc tccgacgccc tctacctcca cgagctcgag gacggctcac tgcgcgctgc + 4801 ggggctcagc gcatttctcg acgccggggc actggcggag ctcaaggagg ttcccgccgg + 4861 catcgatcgc gtcgttgccg tcgagcaggc gccgccacca ttaccgcccg ccgacggcat + 4921 ccccgaggcc caggacgtgc cgcccttttg cccccgcacc ttggaggagc tcgtcttcgg + 4981 ccgcgccggc cacccccatt acgcggacct caaccgcgta accgagggcg agcgagaggt + 5041 gcggtacatg cgcatctcgc gtcacctgct caacaagaac cacactgaaa tgcccgggac + 5101 ggaacgcgtt ctgagcgccg tttgcgctgt gcggcgctac cgcgcgggtg aggacgggtc + 5161 gaccctgcgc accgccgtgg cccgccagca cccgcgcccc ttccgccaga ttccaccccc + 5221 gcgcgtcact gctggagtcg ctcaagagtg gcgcctgacg tacctgcggg agcgaatcga + 5281 cctcaccgac gtctatacac agatgggcgt ggccgcgcga gagctcaccg accgctacgc + 5341 gcgccgctac cccgagattt ttgccggcat gtgcaccgcg cagagcctga gcgtccccgc + 5401 cttcctcaaa gccaccttga agtgcgtgga cgccgccctc ggccccaggg acaccgaaga + 5461 ctgccatgcc gcccagggga aagccggcct cgagatccgc gcatgggcca aagagtgggt + 5521 tcaggtcatg tccccgcact tccgcgccat ccagaagatc attatgcgcg ccttgcgccc + 5581 gcaatttctc gtggccgccg ggcacacgga gcccgaggtt gacgcgtggt ggcaggctca + 5641 ctacaccacc aatgccatcg aggtcgattt caccgagttc gacatgaatc agacccttgc + 5701 cactcgggac gtcgagctcg agatcagcgc cgctctccta ggcctcccct gcgccgaaga + 5761 ctaccgcgcg ctccgcgccg gcagctactg caccctgcgc gaactgggca ttactgagac + 5821 cggctgcgag cgcacaagcg gcgagcccgc cacgctactg cacaacacca ccgtggccat + 5881 gtgcatggct atgcgcatgg tcccgaaggg cgtgcgctgg gccgggattt tccagggtga + 5941 cgacatggtc atcttcctcc ctgagggcgc acgcggcgcg gcactcaagt ggaccccctc + 6001 tgaggtcggc ttgtttggct tccacatccc ggtgaagcac gtgagcaccc caacccccag + 6061 cttctgcgga catgtcggca ccgcggccgg cctcttccac gatgtcatgc accaggcaat + 6121 caaggtgctc tgccgccgct ttgaccccga cgtgctcgag gaacagcagg tagccctcct + 6181 tgaccgcctc cggggagtct acgcggccct gcccgacacc gttgctgcca acgccgcgta + 6241 ctacgactac agcgcggagc gcgttctcgc gatcgtgcgc gaactcaccg catacgcgcg + 6301 ggggcgcggc cttgatcatc cggccaccat cggcgcgctt gaggaaatcc agacccccta + 6361 cgcgcgcgcc aacctccacg acgccgacta gcgcccgtgc acgtagggcc tttaatctta + 6421 ctactctaac caggtcatca cccaccgttg tctcgccgca tctggtgggt acttcactcc + 6481 tgccatttgg gagagcccca gggtgcccaa atggcttcca ctacccccat cactatggag + 6541 gacctccaga aggccctcga ggcacaatcc cgcgccctcc gcgcggagct cgccgccggc + 6601 gccttgcagt cacgccggcc gcggccacag cgacagcgcg actccagcac ctccggagac + 6661 gactctggcc gcgactcagg agggcctcgc cgccgccgcg gcaaccgggg ccgcggccag + 6721 cgtcaagact ggtccaaggc cccccccccc cccgaggagc ggcaggagag ccgctcccag + 6781 actccggccc ctaagccgcc gcgggcgccg ccacaggtgc cccaaccccc gcgcatgcaa + 6841 accgggcgcg gaggcactgc cccgcgcccc gagctggggc cgccgactaa cccattccag + 6901 gcggccgtcg cgcgtggcct gcgcccgccc ctccatgacc ccgacaccga ggcacccact + 6961 gaggcctgtg tgacctcatg gctctggagc gagggcgagg gcgccgtctt ctaccgcgtc + 7021 gatctgcact tcaccaactt gggcaccccc ccactcgacg aggacggccg ctgggacccg + 7081 gcgctcatgt acaacccctg cgggcccgag ccgcccgctc acgtcgtccg tgcgtacaac + 7141 caacctgccg gcgacgtcag gggcatctgg ggcaagggcg agcgcaccta cgccgagcag + 7201 gatttccgcg tcggtggcac gcgctggcac cgactgttgc gcatgccagt gcgcggcctc + 7261 gacggcgaca gcgccccgct ccccccccat actaccgagc gtatcgagac ccgctcggcg + 7321 cgccatcctt ggcgcatccg cttcggtgcc cctcaggtct tcctcgccgg gctcttgctc + 7381 gcggccgttg ccgtcggcac cgcgcgcgcc gggcttcagc cccgcgctga catagcggcg + 7441 cctcccgcgc cgccgcaggc cccccgcggg cacgggaagc actacggtca tcaccaccat + 7501 caattgccgt ttcttgggca cgacggccac cacgggggca ccttgcgcgt cggtgagcac + 7561 caccgaaacg ccagcgacgt gctgcccggc cactggctcc aaggcgggtg gggttgttac + 7621 aacctgagcg actggcatca gggcactcac gtctgtcaca ctaagcacat ggacttctgg + 7681 tgcgtggagc acgaccggcc accgcccgcc accccgacgc ctcccaccac cgcggcgaac + 7741 tccacgactg ccgccacccc cgctactacg ccggcacctt gccacgccgg cctcaatgac + 7801 agctgcggcg gcttcctgtc cgggtgcggg ccgatgcgac tgcgccatgg cgcggacacc + 7861 cggtgcggtc ggttaatctg tgggctgtcc accactgccc agtacccgcc cacccgattc + 7921 ggctgcgcca tgcggtgggg ccttccccct tgggaactgg tcgtccttac ggcccgtccc + 7981 gaggacggct ggacttgccg cggcgtgccc gcccacccag gtacccgctg cccagaactg + 8041 gtgagcccca tggggcgcgc gacttgctcc ccagcttcgg ccctctggct tgccacagcc + 8101 aatgcgctgt cccttgacca tgcgctcgcg gccgttgtcc tgttggtccc gtgggttttg + 8161 atactcatgg tatgccgccg cgcctgccgc cgccgcggcg ccgccgctgc cctcaccgcg + 8221 gtcgtcctgc aggggtacac cccccccgcc tacggcgagg aggctttcac ctacctctgc + 8281 actgcaccgg ggtgtgccac tcaaacaccc gtccctgtgc gcctcgccgg cgtccgcttt + 8341 gagtccaaga ttgtggacgg tggctgcttt gccccatggg acctcgaggc caccggagcc + 8401 tgcatctgcg agatccccac cgatgtctcg tgcgagggcc tgggggcctg ggtccccgca + 8461 gccccgtgcg cgcgcatctg gaacggcacg cagcgcgcat gcaccttctg ggctgtcaac + 8521 gcctactcgt ccggcgggta cgcgcagctg gcgtcctatt tcaaccccgg cggcagctac + 8581 tacaagcaat accaccccac cgcgtgcgac gttgaacctg ccttcgggca cagcgacgcg + 8641 gcctgctggg gcttccccac cgacaccgtg atgagtgtgt tcgccctcgc cagctacgtc + 8701 cagcaccccg acaagaccgt cagggtcaag ttccacacag aaaccaggac cgtctggcag + 8761 ctctccgtag ccggcgtgtc gtgcaacgtc acgaccgaac atccgttctg taacacgccg + 8821 cacggacaac tcgaggtcca ggtcccgccc gaccccggcg acctggttga gtacattatg + 8881 aattacaccg gcaatcaaca gtcccggtgg ggcctcggga gcccgaactg ccacggcccc + 8941 gactgggcct ccccggtttg ccagcgccac tctcccgact gttcgcggct cgtgggggcc + 9001 acgccagagc gcccccggct gcgcctcgtc gatgccgacg acccccttct gcgcaccgcc + 9061 ccggggccgg gcgaggtgtg ggtcacgcct gtcataggct ctcaggcgcg caagtgcgga + 9121 ctccacatac gcgccggacc gtacggccac gccaccgtcg aaatgcctga gtggatccac + 9181 gcccacacta ccagcgaccc ctggcacccg cccggcccct tgggactcaa gttcaagaca + 9241 gtccgcccag tggtcctacc gcgcgcgtta gcgccccctc gcaacgtgcg cgtaaccggc + 9301 tgctaccagt gtgggacccc cgcgctggtg gagggccttg ccccaggagg agggaactgc + 9361 catctcacca tcaacggcga ggacgccggc gcctttcccc ctgggaagtt cgtcaccgcc + 9421 gccctcctca acaccccccc gccctaccaa gtgagttgcg ggggtgagag cgaccgcgcg + 9481 agcgcgcggg tcattgaccc cgccgcgcag tcgttcaccg gcgtggtgta tggcacacac + 9541 accaccgctg tatcggagac ccggcagacc tgggcggagt gggctgctgc tcactggtgg + 9601 cagctcactc tgggcgttat ttgcgccctc ctgtttgctg gcctacttgc ttgctgtgcc + 9661 aaatgcttat actacttgcg cggcgctata gcgccgcgct agtgggcccc cgcgcgaaac + 9721 ccgcattagc ccactagatt tccgcacccg ttgctgcata g +// From afd060380faa9349b70c63e137d33d2983cb81bd Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Fri, 22 Aug 2025 18:38:10 +0200 Subject: [PATCH 2/2] nextclade: remove measles content from readme --- .../defaults/genome_dataset/README.md | 15 ++++++--------- .../defaults/genome_dataset/pathogen.json | 4 ++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/nextclade_genome/defaults/genome_dataset/README.md b/nextclade_genome/defaults/genome_dataset/README.md index e6448c1..da0db8f 100644 --- a/nextclade_genome/defaults/genome_dataset/README.md +++ b/nextclade_genome/defaults/genome_dataset/README.md @@ -2,21 +2,18 @@ | Key | Value | | ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| name | Measles complete genome +| name | Rubella complete genome | authors | [Nextstrain](https://nextstrain.org) | -| reference | NC_001498.1 | -| workflow | https://github.com/nextstrain/measles/tree/main/nextclade | -| path | `nextstrain/measles/genome/WHO-2012` | +| reference | NC_076948 | +| workflow | https://github.com/nextstrain/rubella/tree/main/nextclade_genome | +| path | `nextstrain/rubella/genome` | ## Scope of this dataset -This dataset assigns genotypes to measles samples based on [criteria outlined by the WHO](https://www.who.int/publications/i/item/WER8709). -The WHO has defined 24 measles genotypes based on N gene and H gene sequences from 28 reference strains. For new measles samples, genotypes can be assigned based on genetic similarity to the reference strains in the "N450" region (a 450 bp region of the N gene). +This dataset assigns genotypes to rubella samples based on guidance by the WHO (["Rubella virus nomenclature update: 2013"](https://www.who.int/publications/i/item/WER8832)). -The reference tree used in this dataset includes sequences for the 28 reference strains, along with (nearly) complete genomes of other representative strains for most genotypes. -This dataset can be used to assign genotypes to any sequence that includes at least 400 bp of the N450 region, including whole genome sequences. -In addition, this dataset implements simple quality control metrics based to the amount of missing sequence, the number of ambiguous nucleotides, frameshifts or stop codons, and clusters of mutations relative to sequences in the reference tree. +The reference tree used in this dataset includes uses a complete rubella virus genome, whole the nomenclature by the WHO is typically defined based on the E1 segment. ## Features diff --git a/nextclade_genome/defaults/genome_dataset/pathogen.json b/nextclade_genome/defaults/genome_dataset/pathogen.json index 366282a..43790f6 100644 --- a/nextclade_genome/defaults/genome_dataset/pathogen.json +++ b/nextclade_genome/defaults/genome_dataset/pathogen.json @@ -17,8 +17,8 @@ "defaultCds": "N", "attributes": { "name": "Rubella", - "reference name": "Ichinose-B95a", - "reference accession": "NC_001498 " + "reference name": "RVi/Bismarck.ND.USA/23.08/2B", + "reference accession": "NC_076948" }, "alignmentParams":{ "retryReverseComplement": true,