diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py index 2da6c7c..8088c6a 100644 --- a/lib/inat_inferrer.py +++ b/lib/inat_inferrer.py @@ -640,7 +640,7 @@ def limit_leaf_scores_that_include_humans(self, leaf_scores, strategy=None): return top_results.head(1) # if requesting a more limited approach to human exclusion - if strategy == "limited" and self.taxonomy.mammals_taxon is not None: + if self.taxonomy.mammals_taxon is not None: mammals_results = top_results.query( f"left > {self.taxonomy.mammals_taxon['left']} and " f"right < {self.taxonomy.mammals_taxon['right']}" diff --git a/tests/fixtures/taxonomy.csv b/tests/fixtures/taxonomy.csv index deefb95..bce3457 100644 --- a/tests/fixtures/taxonomy.csv +++ b/tests/fixtures/taxonomy.csv @@ -24,3 +24,7 @@ parent_taxon_id,taxon_id,rank_level,leaf_class_id,iconic_class_id,spatial_class_ 43367,43575,30,,,23,Hominidae 43575,43583,20,,,24,Homo 43583,43584,10,,,25,Homo sapiens +40151,43698,40,,,26,Rodentia +43698,44185,30,,,27,Muridae +44185,44678,20,,,28,Mus +44678,44705,10,,,29,Mus musculus diff --git a/tests/test_inat_inferrer.py b/tests/test_inat_inferrer.py index 0643adc..df93fd9 100644 --- a/tests/test_inat_inferrer.py +++ b/tests/test_inat_inferrer.py @@ -112,6 +112,8 @@ def test_results_are_unchanged_if_they_dont_include_humans(self, inatInferrer): for n in range(20): results.append({ "taxon_id": n, + "left": 0, + "right": 0, "combined_score": 1.0 - (n * 0.01) }) results = pd.DataFrame(results) @@ -120,46 +122,98 @@ def test_results_are_unchanged_if_they_dont_include_humans(self, inatInferrer): def test_results_are_unchanged_if_they_dont_include_humans_in_top_10(self, inatInferrer): assert inatInferrer.taxonomy.human_taxon["name"] == "Homo sapiens" assert inatInferrer.taxonomy.human_taxon["taxon_id"] == 43584 + results = [] for n in range(20): results.append({ "taxon_id": n, + "left": 0, + "right": 0, "combined_score": 1.0 - (n * 0.01) }) results.append({ "taxon_id": inatInferrer.taxonomy.human_taxon["taxon_id"], + "left": inatInferrer.taxonomy.human_taxon["left"], + "right": inatInferrer.taxonomy.human_taxon["right"], "combined_score": 0.001 }) results = pd.DataFrame(results) assert results.equals(inatInferrer.limit_leaf_scores_that_include_humans(results)) def test_results_are_empty_if_humans_are_in_top_10_but_not_first(self, inatInferrer): + # ... and the results contain another mammal assert inatInferrer.taxonomy.human_taxon["name"] == "Homo sapiens" assert inatInferrer.taxonomy.human_taxon["taxon_id"] == 43584 + mouse_taxon = inatInferrer.taxonomy.df.query("name == 'Mus musculus'").iloc[0] + results = [] for n in range(5): results.append({ "taxon_id": n, + "left": 0, + "right": 0, "combined_score": 1.0 - (n * 0.01) }) + results.append({ + "taxon_id": mouse_taxon["taxon_id"], + "left": mouse_taxon["left"], + "right": mouse_taxon["right"], + "combined_score": 0.002 + }) results.append({ "taxon_id": inatInferrer.taxonomy.human_taxon["taxon_id"], + "left": inatInferrer.taxonomy.human_taxon["left"], + "right": inatInferrer.taxonomy.human_taxon["right"], "combined_score": 0.001 }) results = pd.DataFrame(results) assert inatInferrer.limit_leaf_scores_that_include_humans(results).empty + def test_humans_excluded_if_humans_are_in_top_10_but_not_first(self, inatInferrer): + assert inatInferrer.taxonomy.human_taxon["name"] == "Homo sapiens" + assert inatInferrer.taxonomy.human_taxon["taxon_id"] == 43584 + + results = [] + for n in range(5): + results.append({ + "taxon_id": n, + "left": 0, + "right": 0, + "combined_score": 1.0 - (n * 0.01) + }) + results.append({ + "taxon_id": inatInferrer.taxonomy.human_taxon["taxon_id"], + "left": inatInferrer.taxonomy.human_taxon["left"], + "right": inatInferrer.taxonomy.human_taxon["right"], + "combined_score": 0.001 + }) + results = pd.DataFrame(results) + assert results.head(5).equals(inatInferrer.limit_leaf_scores_that_include_humans(results)) + def test_results_are_empty_if_humans_are_first_by_small_margin(self, inatInferrer): + # ... and the results contain another mammal assert inatInferrer.taxonomy.human_taxon["name"] == "Homo sapiens" assert inatInferrer.taxonomy.human_taxon["taxon_id"] == 43584 + mouse_taxon = inatInferrer.taxonomy.df.query("name == 'Mus musculus'").iloc[0] + results = [] results.append({ "taxon_id": inatInferrer.taxonomy.human_taxon["taxon_id"], + "left": inatInferrer.taxonomy.human_taxon["left"], + "right": inatInferrer.taxonomy.human_taxon["right"], "combined_score": 1.0 }) + results.append({ + "taxon_id": mouse_taxon["taxon_id"], + "left": mouse_taxon["left"], + "right": mouse_taxon["right"], + "combined_score": 0.95 + }) for n in range(5): results.append({ "taxon_id": n, + "left": 0, + "right": 0, "combined_score": 0.9 - (n * 0.01) }) results = pd.DataFrame(results) @@ -171,12 +225,16 @@ def test_only_humans_returned_if_first_by_large_margin(self, inatInferrer): results = [] human_result = { "taxon_id": inatInferrer.taxonomy.human_taxon["taxon_id"], + "left": inatInferrer.taxonomy.human_taxon["left"], + "right": inatInferrer.taxonomy.human_taxon["right"], "combined_score": 1.0 } results.append(human_result) for n in range(5): results.append({ "taxon_id": n, + "left": 0, + "right": 0, "combined_score": 0.5 - (n * 0.01) }) results = pd.DataFrame(results) diff --git a/tests/test_model_taxonomy_dataframe.py b/tests/test_model_taxonomy_dataframe.py index c38a120..7461d56 100644 --- a/tests/test_model_taxonomy_dataframe.py +++ b/tests/test_model_taxonomy_dataframe.py @@ -41,15 +41,15 @@ def test_human_taxon(self, capsys, taxonomy): def test_print(self, capsys, taxonomy): ModelTaxonomyDataframe.print(taxonomy.df) captured = capsys.readouterr() - assert "├──Animalia :: 0:33" in captured.out - assert "│ └──Chordata :: 1:32" in captured.out + assert "├──Animalia :: 0:41" in captured.out + assert "│ └──Chordata :: 1:40" in captured.out def test_print_with_aggregated_combined_score(self, capsys, taxonomy): taxonomy.df["aggregated_combined_score"] = 1 ModelTaxonomyDataframe.print(taxonomy.df) captured = capsys.readouterr() - assert "├──Animalia :: 0:33" in captured.out - assert "│ └──Chordata :: 1:32" in captured.out + assert "├──Animalia :: 0:41" in captured.out + assert "│ └──Chordata :: 1:40" in captured.out def test_print_with_lambda(self, capsys, taxonomy): ModelTaxonomyDataframe.print(taxonomy.df, display_taxon_lambda=(