diff --git a/data-upload/csv_parser.py b/data-upload/csv_parser.py
new file mode 100644
index 00000000..1a108b32
--- /dev/null
+++ b/data-upload/csv_parser.py
@@ -0,0 +1,91 @@
+"""
+CSV Parser for openPIP 2.0
+Accepts a simplified flat CSV and normalizes it to the same
+ParsedInteraction model used by the PSI-MI TAB parser so that
+the same validation and DB insertion pipeline handles both formats.
+
+Minimum required CSV columns: protein_a, protein_b
+Optional columns: interaction_type, score, publication,
+                  author, dataset, year
+"""
+
+import csv
+import re
+from models import ParsedInteraction, Protein, Dataset, InteractionCategory
+
+
+REQUIRED_CSV_COLUMNS = {"protein_a", "protein_b"}
+
+
+def parse_csv(filepath: str) -> list:
+    """
+    Parse a CSV file and return a list of ParsedInteraction objects.
+    """
+    interactions = []
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        headers = set(reader.fieldnames or [])
+
+        missing = REQUIRED_CSV_COLUMNS - headers
+        if missing:
+            raise ValueError(
+                f"CSV is missing required columns: {missing}. "
+                f"At minimum 'protein_a' and 'protein_b' are required."
+            )
+
+        for row in reader:
+            protein_a_raw = row.get("protein_a", "").strip()
+            protein_b_raw = row.get("protein_b", "").strip()
+
+            if not protein_a_raw or not protein_b_raw:
+                continue
+
+            is_uniprot_a = _looks_like_uniprot(protein_a_raw)
+            is_uniprot_b = _looks_like_uniprot(protein_b_raw)
+
+            protein_a = Protein(
+                uniprot_id=protein_a_raw if is_uniprot_a else None,
+                gene_name=None if is_uniprot_a else protein_a_raw,
+            )
+            protein_b = Protein(
+                uniprot_id=protein_b_raw if is_uniprot_b else None,
+                gene_name=None if is_uniprot_b else protein_b_raw,
+            )
+
+            score_raw = row.get("score", "").strip()
+            score = score_raw if score_raw else None
+
+            interaction_type_raw = row.get("interaction_type", "").strip()
+            category = InteractionCategory(
+                category_name=interaction_type_raw
+            ) if interaction_type_raw else None
+
+            dataset = Dataset(
+                pubmed_id=row.get("publication", "").strip() or None,
+                author=row.get("author", "").strip() or None,
+                name=row.get("dataset", "").strip() or None,
+                year=row.get("year", "").strip() or None,
+            )
+
+            interactions.append(ParsedInteraction(
+                protein_a=protein_a,
+                protein_b=protein_b,
+                score=score,
+                category=category,
+                dataset=dataset,
+                raw=dict(row),
+            ))
+
+    return interactions
+
+
+def _looks_like_uniprot(s: str) -> bool:
+    """
+    Rough check for UniProt accession format.
+    Examples: P12345, Q67890, A0A000
+    """
+    return bool(re.match(
+        r'^[OPQ][0-9][A-Z0-9]{3}[0-9]$|^[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$',
+        s
+    ))
\ No newline at end of file
diff --git a/data-upload/mitab_parser.py b/data-upload/mitab_parser.py
new file mode 100644
index 00000000..d1ba44c9
--- /dev/null
+++ b/data-upload/mitab_parser.py
@@ -0,0 +1,287 @@
+"""
+PSI-MI TAB v2.7 Parser for openPIP 2.0
+Replaces the PHP upload parsing logic with a clean Python implementation.
+Spec: https://psicquic.github.io/MITAB27Format.html
+
+Output maps directly to openpip.sql schema via models.py
+"""
+
+import csv
+import re
+import io
+from dataclasses import dataclass
+from typing import Optional
+from models import ParsedInteraction, Protein, Organism, Dataset, InteractionCategory
+
+
+MITAB27_COLUMNS = [
+    "unique_id_a", "unique_id_b",
+    "alt_id_a", "alt_id_b",
+    "alias_a", "alias_b",
+    "interaction_detection_method",
+    "author",
+    "publication_id",
+    "taxid_a", "taxid_b",
+    "interaction_type",
+    "source_database",
+    "interaction_id",
+    "confidence_score",
+    "complex_expansion",
+    "bio_role_a", "bio_role_b",
+    "exp_role_a", "exp_role_b",
+    "interactor_type_a", "interactor_type_b",
+    "xref_a", "xref_b", "xref_interaction",
+    "annotation_a", "annotation_b", "annotation_interaction",
+    "host_organism_taxid",
+    "parameters",
+    "creation_date",
+    "update_date",
+    "checksum_a", "checksum_b", "checksum_interaction",
+    "negative",
+    "features_a", "features_b",
+    "stoichiometry_a", "stoichiometry_b",
+    "participant_identification_a", "participant_identification_b",
+]
+
+MITAB27_COLUMN_COUNT = 42
+
+
+@dataclass
+class MITABField:
+    """Represents a parsed db:value(description) field."""
+    db: str
+    value: str
+    description: Optional[str] = None
+
+
+class MITABParseError(Exception):
+    pass
+
+
+# ─────────────────────────────────────────────────────────────
+# Field-level parsing
+# ─────────────────────────────────────────────────────────────
+
+def parse_field(raw: str) -> list:
+    """
+    Parse one PSI-MI TAB cell into a list of MITABField objects.
+    Handles: db:value(description) | db:value | - (empty)
+    """
+    if raw.strip() in ("-", ""):
+        return []
+
+    results = []
+    entries = _split_pipe(raw)
+
+    for entry in entries:
+        entry = entry.strip()
+        if not entry or entry == "-":
+            continue
+        # Remove surrounding quotes from value if present
+        match = re.match(
+            r'^([^:(]+):"?([^"(|]+?)"?(?:\((.+)\))?$',
+            entry
+        )
+        if match:
+            db, value, desc = match.groups()
+            results.append(MITABField(
+                db=db.strip(),
+                value=value.strip(),
+                description=desc.strip() if desc else None
+            ))
+        else:
+            results.append(MITABField(db="unknown", value=entry))
+    return results
+
+
+def _split_pipe(s: str) -> list:
+    """Split by | while respecting quoted strings."""
+    parts = []
+    current = []
+    in_quotes = False
+    for char in s:
+        if char == '"':
+            in_quotes = not in_quotes
+            current.append(char)
+        elif char == '|' and not in_quotes:
+            parts.append(''.join(current))
+            current = []
+        else:
+            current.append(char)
+    if current:
+        parts.append(''.join(current))
+    return parts
+
+
+# ─────────────────────────────────────────────────────────────
+# Extraction helpers — map MITABFields to openpip.sql columns
+# ─────────────────────────────────────────────────────────────
+
+def _extract_id(fields: list, db_names: tuple) -> Optional[str]:
+    """Extract value from MITABFields matching any of the given db names."""
+    for f in fields:
+        if f.db.lower() in db_names:
+            return f.value
+    return None
+
+
+def _extract_description(fields: list) -> Optional[str]:
+    """
+    Extract gene name from alias fields.
+    In PSI-MI TAB alias fields, the VALUE is the gene name
+    and the description tells us the type e.g. (gene name).
+    We return the value only when description confirms it is a gene name.
+    """
+    for f in fields:
+        if f.description and "gene name" in f.description.lower():
+            return f.value
+    # fallback — return first value if no gene name type found
+    for f in fields:
+        if f.value:
+            return f.value
+    return None
+
+
+def _extract_taxid(fields: list) -> Optional[str]:
+    """Extract numeric taxid from taxid:9606(human) format."""
+    for f in fields:
+        if f.db.lower() == "taxid":
+            return f.value
+    return None
+
+
+def _extract_common_name(fields: list) -> Optional[str]:
+    """Extract organism common name from taxid field description."""
+    for f in fields:
+        if f.db.lower() == "taxid" and f.description:
+            return f.description
+    return None
+
+
+# ─────────────────────────────────────────────────────────────
+# Core builder — maps one parsed row to openpip.sql schema
+# ─────────────────────────────────────────────────────────────
+
+def _build_interaction(row: list, raw: dict) -> ParsedInteraction:
+    """
+    Build a ParsedInteraction from a raw tab-separated row.
+    Maps PSI-MI TAB 2.7 columns directly to openpip.sql schema fields.
+    """
+    uid_a    = parse_field(row[0])
+    uid_b    = parse_field(row[1])
+    alt_a    = parse_field(row[2])
+    alt_b    = parse_field(row[3])
+    alias_a  = parse_field(row[4])
+    alias_b  = parse_field(row[5])
+    author   = parse_field(row[7])
+    pub_id   = parse_field(row[8])
+    taxid_a  = parse_field(row[9])
+    taxid_b  = parse_field(row[10])
+    int_type = parse_field(row[11])
+    conf     = parse_field(row[14])
+
+    # protein table
+    protein_a = Protein(
+        uniprot_id=_extract_id(uid_a + alt_a, ("uniprotkb", "uniprot")),
+        ensembl_id=_extract_id(uid_a + alt_a, ("ensembl",)),
+        entrez_id=_extract_id(uid_a + alt_a, ("entrez", "entrezgene")),
+        gene_name=_extract_description(alias_a),
+    )
+    protein_b = Protein(
+        uniprot_id=_extract_id(uid_b + alt_b, ("uniprotkb", "uniprot")),
+        ensembl_id=_extract_id(uid_b + alt_b, ("ensembl",)),
+        entrez_id=_extract_id(uid_b + alt_b, ("entrez", "entrezgene")),
+        gene_name=_extract_description(alias_b),
+    )
+
+    # organism table
+    organism_a = Organism(
+        taxid_id=_extract_taxid(taxid_a),
+        common_name=_extract_common_name(taxid_a),
+    )
+    organism_b = Organism(
+        taxid_id=_extract_taxid(taxid_b),
+        common_name=_extract_common_name(taxid_b),
+    )
+
+    # interaction_category table
+    category = None
+    if int_type:
+        category = InteractionCategory(
+            category_name=int_type[0].description or int_type[0].value
+        )
+
+    # dataset table
+    pubmed    = _extract_id(pub_id, ("pubmed",))
+    author_str = author[0].value if author else None
+    dataset = Dataset(
+        pubmed_id=pubmed,
+        author=author_str,
+    ) if (pubmed or author_str) else None
+
+    # interaction.score — varchar(10) in DB, keep as string
+    score = None
+    if conf:
+        try:
+            score = str(round(float(conf[0].value), 6))[:10]
+        except ValueError:
+            score = None
+
+    return ParsedInteraction(
+        protein_a=protein_a,
+        protein_b=protein_b,
+        organism_a=organism_a,
+        organism_b=organism_b,
+        score=score,
+        category=category,
+        dataset=dataset,
+        raw=raw,
+    )
+
+
+# ─────────────────────────────────────────────────────────────
+# Public API
+# ─────────────────────────────────────────────────────────────
+
+def parse_mitab27(filepath: str) -> list:
+    """
+    Parse a PSI-MI TAB 2.7 file.
+    Returns a list of ParsedInteraction objects.
+    """
+    interactions = []
+    with open(filepath, 'r', encoding='utf-8') as f:
+        reader = csv.reader(f, delimiter='\t')
+        for line_num, row in enumerate(reader, start=1):
+            if not row or row[0].startswith('#'):
+                continue
+            if len(row) < 15:
+                raise MITABParseError(
+                    f"Line {line_num}: Expected at least 15 columns, "
+                    f"got {len(row)}."
+                )
+            while len(row) < MITAB27_COLUMN_COUNT:
+                row.append('-')
+            raw = dict(zip(MITAB27_COLUMNS, row))
+            interactions.append(_build_interaction(row, raw))
+    return interactions
+
+
+def parse_mitab27_from_string(content: str) -> list:
+    """
+    Parse PSI-MI TAB 2.7 from a raw string.
+    Useful for API upload endpoints that receive file content directly.
+    """
+    interactions = []
+    reader = csv.reader(io.StringIO(content), delimiter='\t')
+    for line_num, row in enumerate(reader, start=1):
+        if not row or row[0].startswith('#'):
+            continue
+        if len(row) < 15:
+            raise MITABParseError(
+                f"Line {line_num}: Too few columns ({len(row)})"
+            )
+        while len(row) < MITAB27_COLUMN_COUNT:
+            row.append('-')
+        raw = dict(zip(MITAB27_COLUMNS, row))
+        interactions.append(_build_interaction(row, raw))
+    return interactions
\ No newline at end of file
diff --git a/data-upload/models.py b/data-upload/models.py
new file mode 100644
index 00000000..0839cdaf
--- /dev/null
+++ b/data-upload/models.py
@@ -0,0 +1,72 @@
+"""
+Data models for openPIP 2.0 — Python dataclasses that directly mirror
+the openpip.sql database schema. These are the target output objects
+of the PSI-MI TAB and CSV parsers in this directory.
+
+Table mappings:
+    Protein             -> protein table
+    Organism            -> organism table
+    Dataset             -> dataset table
+    InteractionCategory -> interaction_category table
+    ParsedInteraction   -> interaction table + interaction_dataset +
+                           interaction_interaction_category +
+                           interaction_support_information
+"""
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class Protein:
+    """Maps to the `protein` table in openpip.sql"""
+    gene_name: Optional[str] = None
+    protein_name: Optional[str] = None
+    uniprot_id: Optional[str] = None
+    ensembl_id: Optional[str] = None
+    entrez_id: Optional[str] = None
+    sequence: Optional[str] = None
+    description: Optional[str] = None
+
+
+@dataclass
+class Organism:
+    """Maps to the `organism` table in openpip.sql"""
+    taxid_id: Optional[str] = None
+    common_name: Optional[str] = None
+    scientific_name: Optional[str] = None
+
+
+@dataclass
+class Dataset:
+    """Maps to the `dataset` table in openpip.sql"""
+    name: Optional[str] = None
+    pubmed_id: Optional[str] = None
+    author: Optional[str] = None
+    year: Optional[str] = None
+    interaction_status: Optional[str] = None
+    description: Optional[str] = None
+
+
+@dataclass
+class InteractionCategory:
+    """Maps to the `interaction_category` table in openpip.sql"""
+    category_name: Optional[str] = None
+
+
+@dataclass
+class ParsedInteraction:
+    """
+    Maps to the `interaction` table plus related junction tables:
+    interaction_dataset, interaction_interaction_category,
+    interaction_support_information
+    """
+    protein_a: Protein = field(default_factory=Protein)
+    protein_b: Protein = field(default_factory=Protein)
+    organism_a: Organism = field(default_factory=Organism)
+    organism_b: Organism = field(default_factory=Organism)
+    score: Optional[str] = None
+    category: Optional[InteractionCategory] = None
+    dataset: Optional[Dataset] = None
+    support_info: dict = field(default_factory=dict)
+    raw: dict = field(default_factory=dict)
\ No newline at end of file
diff --git a/data-upload/requirements.txt b/data-upload/requirements.txt
index 7cb6656b..9deb423b 100644
--- a/data-upload/requirements.txt
+++ b/data-upload/requirements.txt
@@ -1 +1,2 @@
 selenium
+pytest
\ No newline at end of file
diff --git a/data-upload/tests/__pycache__/test_parser.cpython-311-pytest-9.0.2.pyc b/data-upload/tests/__pycache__/test_parser.cpython-311-pytest-9.0.2.pyc
new file mode 100644
index 00000000..72d76796
Binary files /dev/null and b/data-upload/tests/__pycache__/test_parser.cpython-311-pytest-9.0.2.pyc differ
diff --git a/data-upload/tests/sample.mitab27.txt b/data-upload/tests/sample.mitab27.txt
new file mode 100644
index 00000000..788a5a4b
--- /dev/null
+++ b/data-upload/tests/sample.mitab27.txt
@@ -0,0 +1,4 @@
+#unique_id_a	unique_id_b	alt_id_a	alt_id_b	alias_a	alias_b	interaction_detection_method	author	publication_id	taxid_a	taxid_b	interaction_type	source_database	interaction_id	confidence_score	complex_expansion	bio_role_a	bio_role_b	exp_role_a	exp_role_b	interactor_type_a	interactor_type_b	xref_a	xref_b	xref_interaction	annotation_a	annotation_b	annotation_interaction	host_organism_taxid	parameters	creation_date	update_date	checksum_a	checksum_b	checksum_interaction	negative	features_a	features_b	stoichiometry_a	stoichiometry_b	participant_identification_a	participant_identification_b
+uniprotkb:P12345	uniprotkb:Q67890	ensembl:ENSP00000001	ensembl:ENSP00000002	uniprotkb:BRCA1_HUMAN(gene name)	uniprotkb:TP53_HUMAN(gene name)	psi-mi:"MI:0018"(two hybrid)	Smith et al. (2020)	pubmed:12345678	taxid:9606(human)	taxid:9606(human)	psi-mi:"MI:0915"(physical association)	psi-mi:"MI:0469"(IntAct)	intact:EBI-12345	author-score:0.85	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
+uniprotkb:P98765	uniprotkb:Q11111	-	-	uniprotkb:MYC_HUMAN(gene name)	uniprotkb:MAX_HUMAN(gene name)	psi-mi:"MI:0096"(pull down)	Jones et al. (2021)	pubmed:87654321	taxid:9606(human)	taxid:9606(human)	psi-mi:"MI:0915"(physical association)	psi-mi:"MI:0469"(IntAct)	intact:EBI-67890	author-score:0.72	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
+uniprotkb:A00001	uniprotkb:A00002	-	-	-	-	psi-mi:"MI:0018"(two hybrid)	Brown et al. (2019)	pubmed:11111111	taxid:9606(human)	taxid:9606(human)	psi-mi:"MI:0407"(direct interaction)	psi-mi:"MI:0469"(IntAct)	intact:EBI-99999	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-	-
\ No newline at end of file
diff --git a/data-upload/tests/test_parser.py b/data-upload/tests/test_parser.py
new file mode 100644
index 00000000..5220f663
--- /dev/null
+++ b/data-upload/tests/test_parser.py
@@ -0,0 +1,350 @@
+import sys
+import os
+
+# Make sure imports work from tests/ subfolder
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from mitab_parser import parse_mitab27, parse_field
+from csv_parser import parse_csv
+from validator import validate_file
+from models import ParsedInteraction, Protein, Organism, Dataset
+
+# ─────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────
+
+SAMPLE_MITAB = os.path.join(os.path.dirname(__file__), "sample.mitab27.txt")
+
+
+# ─────────────────────────────────────────────
+# parse_field tests
+# ─────────────────────────────────────────────
+
+def test_parse_field_basic():
+    """Standard db:value format"""
+    result = parse_field("uniprotkb:P12345")
+    assert len(result) == 1
+    assert result[0].db == "uniprotkb"
+    assert result[0].value == "P12345"
+    assert result[0].description is None
+
+
+def test_parse_field_with_description():
+    """db:value(description) format"""
+    result = parse_field("uniprotkb:P12345(BRCA1_HUMAN)")
+    assert len(result) == 1
+    assert result[0].db == "uniprotkb"
+    assert result[0].value == "P12345"
+    assert result[0].description == "BRCA1_HUMAN"
+
+
+def test_parse_field_empty_dash():
+    """-  means no value in PSI-MI TAB"""
+    result = parse_field("-")
+    assert result == []
+
+
+def test_parse_field_empty_string():
+    """Empty string also means no value"""
+    result = parse_field("")
+    assert result == []
+
+
+def test_parse_field_multiple_values():
+    """Multiple values separated by pipe"""
+    result = parse_field("uniprotkb:P12345|ensembl:ENSP00000001")
+    assert len(result) == 2
+    assert result[0].db == "uniprotkb"
+    assert result[0].value == "P12345"
+    assert result[1].db == "ensembl"
+    assert result[1].value == "ENSP00000001"
+
+
+def test_parse_field_psi_mi_quoted():
+    """PSI-MI fields often have quoted values like psi-mi:"MI:0018"(two hybrid)"""
+    result = parse_field('psi-mi:"MI:0018"(two hybrid)')
+    assert len(result) == 1
+    assert result[0].db == "psi-mi"
+    assert "MI:0018" in result[0].value
+    assert result[0].description == "two hybrid"
+
+
+# ─────────────────────────────────────────────
+# parse_mitab27 file tests
+# ─────────────────────────────────────────────
+
+def test_parse_file_row_count():
+    """Sample file has 3 data rows (1 header/comment line ignored)"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert len(interactions) == 3
+
+
+def test_parse_file_protein_a_uniprot():
+    """protein_a.uniprot_id correctly extracted from unique_id_a"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].protein_a.uniprot_id == "P12345"
+
+
+def test_parse_file_protein_b_uniprot():
+    """protein_b.uniprot_id correctly extracted from unique_id_b"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].protein_b.uniprot_id == "Q67890"
+
+
+def test_parse_file_protein_a_ensembl():
+    """ensembl_id extracted from alt_id_a"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].protein_a.ensembl_id == "ENSP00000001"
+
+
+def test_parse_file_protein_b_ensembl():
+    """ensembl_id extracted from alt_id_b"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].protein_b.ensembl_id == "ENSP00000002"
+
+
+def test_parse_file_gene_name_a():
+    """gene_name extracted from alias_a description field"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].protein_a.gene_name == "BRCA1_HUMAN"
+
+
+def test_parse_file_gene_name_b():
+    """gene_name extracted from alias_b description field"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].protein_b.gene_name == "TP53_HUMAN"
+
+
+def test_parse_file_score():
+    """confidence score correctly parsed as string for DB storage"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].score == "0.85"
+
+
+def test_parse_file_score_missing():
+    """missing score stored as None"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    # third row has no score
+    assert interactions[2].score is None
+
+
+def test_parse_file_organism_taxid():
+    """organism taxid correctly extracted"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].organism_a.taxid_id == "9606"
+
+
+def test_parse_file_interaction_category():
+    """interaction_category.category_name populated from interaction_type"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].category is not None
+    assert "physical association" in interactions[0].category.category_name.lower()
+
+
+def test_parse_file_dataset_pubmed():
+    """dataset.pubmed_id extracted from publication_id"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert interactions[0].dataset.pubmed_id == "12345678"
+
+
+def test_parse_file_dataset_author():
+    """dataset.author extracted from author field"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert "Smith" in interactions[0].dataset.author
+
+
+def test_parse_file_returns_parsed_interaction():
+    """Each row returns a ParsedInteraction instance"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    for ix in interactions:
+        assert isinstance(ix, ParsedInteraction)
+
+
+def test_parse_file_protein_objects():
+    """protein_a and protein_b are Protein instances"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert isinstance(interactions[0].protein_a, Protein)
+    assert isinstance(interactions[0].protein_b, Protein)
+
+
+def test_parse_file_raw_preserved():
+    """Raw row dict is preserved for debugging"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    assert "unique_id_a" in interactions[0].raw
+    assert "unique_id_b" in interactions[0].raw
+
+
+# ─────────────────────────────────────────────
+# CSV parser tests
+# ─────────────────────────────────────────────
+
+def test_csv_parser_basic(tmp_path):
+    """Basic CSV with required columns parses correctly"""
+    csv_file = tmp_path / "test.csv"
+    csv_file.write_text(
+        "protein_a,protein_b,interaction_type,score,publication\n"
+        "P12345,Q67890,physical association,0.9,12345678\n"
+        "P98765,Q11111,direct interaction,0.5,87654321\n"
+    )
+    interactions = parse_csv(str(csv_file))
+    assert len(interactions) == 2
+
+
+def test_csv_parser_protein_a(tmp_path):
+    """protein_a uniprot_id correctly set"""
+    csv_file = tmp_path / "test.csv"
+    csv_file.write_text(
+        "protein_a,protein_b\n"
+        "P12345,Q67890\n"
+    )
+    interactions = parse_csv(str(csv_file))
+    assert interactions[0].protein_a.uniprot_id == "P12345"
+
+
+def test_csv_parser_protein_b(tmp_path):
+    """protein_b uniprot_id correctly set"""
+    csv_file = tmp_path / "test.csv"
+    csv_file.write_text(
+        "protein_a,protein_b\n"
+        "P12345,Q67890\n"
+    )
+    interactions = parse_csv(str(csv_file))
+    assert interactions[0].protein_b.uniprot_id == "Q67890"
+
+
+def test_csv_parser_score(tmp_path):
+    """score correctly extracted"""
+    csv_file = tmp_path / "test.csv"
+    csv_file.write_text(
+        "protein_a,protein_b,score\n"
+        "P12345,Q67890,0.95\n"
+    )
+    interactions = parse_csv(str(csv_file))
+    assert interactions[0].score == "0.95"
+
+
+def test_csv_parser_missing_required_columns(tmp_path):
+    """Missing protein_a or protein_b raises ValueError"""
+    csv_file = tmp_path / "test.csv"
+    csv_file.write_text(
+        "gene,score\n"
+        "BRCA1,0.9\n"
+    )
+    try:
+        parse_csv(str(csv_file))
+        assert False, "Should have raised ValueError"
+    except ValueError as e:
+        assert "protein_a" in str(e) or "protein_b" in str(e)
+
+
+def test_csv_parser_returns_parsed_interaction(tmp_path):
+    """CSV parser returns ParsedInteraction instances"""
+    csv_file = tmp_path / "test.csv"
+    csv_file.write_text(
+        "protein_a,protein_b\n"
+        "P12345,Q67890\n"
+    )
+    interactions = parse_csv(str(csv_file))
+    assert isinstance(interactions[0], ParsedInteraction)
+
+
+def test_csv_parser_pubmed(tmp_path):
+    """publication column maps to dataset.pubmed_id"""
+    csv_file = tmp_path / "test.csv"
+    csv_file.write_text(
+        "protein_a,protein_b,publication\n"
+        "P12345,Q67890,12345678\n"
+    )
+    interactions = parse_csv(str(csv_file))
+    assert interactions[0].dataset.pubmed_id == "12345678"
+
+
+# ─────────────────────────────────────────────
+# Validator tests
+# ─────────────────────────────────────────────
+
+def test_validator_valid_file():
+    """All rows in sample file pass validation"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    results = validate_file(interactions)
+    assert all(r.is_valid for r in results)
+
+
+def test_validator_missing_protein_a():
+    """Row with no protein_a identifier fails validation"""
+    ix = ParsedInteraction(
+        protein_a=Protein(),  # no uniprot_id, no gene_name
+        protein_b=Protein(uniprot_id="Q67890")
+    )
+    from validator import validate_interaction
+    result = validate_interaction(ix, line_num=1)
+    assert not result.is_valid
+    assert len(result.errors) > 0
+
+
+def test_validator_missing_protein_b():
+    """Row with no protein_b identifier fails validation"""
+    ix = ParsedInteraction(
+        protein_a=Protein(uniprot_id="P12345"),
+        protein_b=Protein()
+    )
+    from validator import validate_interaction
+    result = validate_interaction(ix, line_num=1)
+    assert not result.is_valid
+
+
+def test_validator_warns_no_uniprot():
+    """Row with non-UniProt identifier generates warning"""
+    ix = ParsedInteraction(
+        protein_a=Protein(gene_name="BRCA1"),  # has gene_name but no uniprot_id
+        protein_b=Protein(gene_name="TP53")
+    )
+    from validator import validate_interaction
+    result = validate_interaction(ix, line_num=1)
+    assert result.is_valid  # not an error, just a warning
+    assert len(result.warnings) > 0
+
+
+def test_validator_returns_list():
+    """validate_file returns a list of results"""
+    interactions = parse_mitab27(SAMPLE_MITAB)
+    results = validate_file(interactions)
+    assert isinstance(results, list)
+    assert len(results) == len(interactions)
+
+
+# ─────────────────────────────────────────────
+# Run all tests manually if needed
+# ─────────────────────────────────────────────
+
+if __name__ == "__main__":
+    # parse_field tests
+    test_parse_field_basic()
+    test_parse_field_with_description()
+    test_parse_field_empty_dash()
+    test_parse_field_empty_string()
+    test_parse_field_multiple_values()
+    test_parse_field_psi_mi_quoted()
+
+    # mitab parser tests
+    test_parse_file_row_count()
+    test_parse_file_protein_a_uniprot()
+    test_parse_file_protein_b_uniprot()
+    test_parse_file_protein_a_ensembl()
+    test_parse_file_protein_b_ensembl()
+    test_parse_file_gene_name_a()
+    test_parse_file_gene_name_b()
+    test_parse_file_score()
+    test_parse_file_score_missing()
+    test_parse_file_organism_taxid()
+    test_parse_file_interaction_category()
+    test_parse_file_dataset_pubmed()
+    test_parse_file_dataset_author()
+    test_parse_file_returns_parsed_interaction()
+    test_parse_file_protein_objects()
+    test_parse_file_raw_preserved()
+
+    # CSV tests need tmp_path — skip in manual run
+    print("CSV and validator tests require pytest — run: pytest tests/")
+
+    print("\nAll manual tests passed.")
\ No newline at end of file
diff --git a/data-upload/validator.py b/data-upload/validator.py
new file mode 100644
index 00000000..a56e53bb
--- /dev/null
+++ b/data-upload/validator.py
@@ -0,0 +1,83 @@
+"""
+Validation layer for openPIP 2.0
+Validates ParsedInteraction objects before DB insertion.
+Works for both PSI-MI TAB and CSV parsed data since both
+produce the same ParsedInteraction model.
+"""
+
+from dataclasses import dataclass
+from models import ParsedInteraction
+
+
+@dataclass
+class ValidationResult:
+    is_valid: bool
+    errors: list
+    warnings: list
+
+
+def validate_interaction(ix: ParsedInteraction, line_num: int) -> ValidationResult:
+    """
+    Validate a single ParsedInteraction.
+    Errors block DB insertion. Warnings are logged but allowed.
+    """
+    errors = []
+    warnings = []
+
+    # At least one identifier required for each interactor
+    has_a = (
+        ix.protein_a.uniprot_id or
+        ix.protein_a.gene_name or
+        ix.protein_a.ensembl_id
+    )
+    has_b = (
+        ix.protein_b.uniprot_id or
+        ix.protein_b.gene_name or
+        ix.protein_b.ensembl_id
+    )
+
+    if not has_a:
+        errors.append(
+            f"Line {line_num}: Interactor A has no identifier "
+            f"(uniprot_id, gene_name, or ensembl_id required)"
+        )
+    if not has_b:
+        errors.append(
+            f"Line {line_num}: Interactor B has no identifier "
+            f"(uniprot_id, gene_name, or ensembl_id required)"
+        )
+
+    # Warn if no UniProt ID — UniProt REST annotation will be skipped
+    if has_a and not ix.protein_a.uniprot_id:
+        warnings.append(
+            f"Line {line_num}: No UniProt ID for interactor A "
+            f"— UniProt annotation fetch will be skipped"
+        )
+    if has_b and not ix.protein_b.uniprot_id:
+        warnings.append(
+            f"Line {line_num}: No UniProt ID for interactor B "
+            f"— UniProt annotation fetch will be skipped"
+        )
+
+    # Warn if score present but not numeric
+    if ix.score is not None:
+        try:
+            float(ix.score)
+        except ValueError:
+            warnings.append(
+                f"Line {line_num}: Score '{ix.score}' is not numeric"
+            )
+
+    return ValidationResult(
+        is_valid=len(errors) == 0,
+        errors=errors,
+        warnings=warnings,
+    )
+
+
+def validate_file(interactions: list) -> list:
+    """Validate all interactions from a parsed file."""
+    return [
+        validate_interaction(ix, i + 1)
+        for i, ix in enumerate(interactions)
+    ]
\ No newline at end of file