diff --git a/.github/workflows/CICD-dev.yml b/.github/workflows/CICD-dev.yml index 09a7e85..ab9bd5e 100644 --- a/.github/workflows/CICD-dev.yml +++ b/.github/workflows/CICD-dev.yml @@ -62,7 +62,7 @@ jobs: refresh-dev-staging-deployment: name: Refresh Dev Staging Deployment needs: build-and-push-dev-docker-image - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Refresh Dev inatvisionapi uses: actions/github-script@v6 @@ -89,7 +89,7 @@ jobs: name: Notify Slack needs: build-and-push-dev-docker-image if: ${{ success() || failure() }} - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: iRoachie/slack-github-actions@v2.3.2 if: env.SLACK_WEBHOOK_URL != null diff --git a/.github/workflows/CICD-main.yml b/.github/workflows/CICD-main.yml index 0e4c956..53c4ddb 100644 --- a/.github/workflows/CICD-main.yml +++ b/.github/workflows/CICD-main.yml @@ -65,7 +65,7 @@ jobs: refresh-main-staging-deployment: name: Refresh Main Staging Deployment needs: build-and-push-main-docker-image - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Refresh Main inatvisionapi uses: actions/github-script@v6 @@ -92,7 +92,7 @@ jobs: name: Notify Slack needs: build-and-push-main-docker-image if: ${{ success() || failure() }} - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: iRoachie/slack-github-actions@v2.3.2 if: env.SLACK_WEBHOOK_URL != null diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py index f7943db..2da6c7c 100644 --- a/lib/inat_inferrer.py +++ b/lib/inat_inferrer.py @@ -613,7 +613,10 @@ def common_ancestor_from_aggregated_scores( return common_ancestor - def limit_leaf_scores_that_include_humans(self, leaf_scores): + def limit_leaf_scores_that_include_humans(self, leaf_scores, strategy=None): + if strategy == "never_exclude": + return leaf_scores + if self.taxonomy.human_taxon is None: return leaf_scores @@ -636,6 +639,19 @@ def limit_leaf_scores_that_include_humans(self, leaf_scores): if human_score_margin > 1.5: return top_results.head(1) + # if requesting a more limited approach to human exclusion + if strategy == "limited" and self.taxonomy.mammals_taxon is not None: + mammals_results = top_results.query( + f"left > {self.taxonomy.mammals_taxon['left']} and " + f"right < {self.taxonomy.mammals_taxon['right']}" + ) + # if there is only 1 Mammals taxon (human), return all non-human results + if mammals_results.index.size == 1: + non_human_results = top_results.query( + f"taxon_id != {self.taxonomy.human_taxon['taxon_id']}" + ) + return non_human_results + # otherwise return no results return leaf_scores.head(0) diff --git a/lib/inat_vision_api.py b/lib/inat_vision_api.py index 025973f..3d65503 100644 --- a/lib/inat_vision_api.py +++ b/lib/inat_vision_api.py @@ -178,7 +178,8 @@ def score_image(self, form, file_path, lat, lng, filter_taxon_id, geomodel, embedding = predictions_for_image["features"] return InatVisionAPIResponses.aggregated_object_response( leaf_scores, aggregated_scores, self.inferrer, - embedding=embedding + embedding=embedding, + human_exclusion_strategy=form.human_exclusion.data ) # legacy dict response @@ -192,6 +193,7 @@ def score_image(self, form, file_path, lat, lng, filter_taxon_id, geomodel, self.inferrer, common_ancestor_rank_type=common_ancestor_rank_type, embedding=embedding, + human_exclusion_strategy=form.human_exclusion.data, debug=self.debug ) diff --git a/lib/inat_vision_api_responses.py b/lib/inat_vision_api_responses.py index f9a56c0..abb56c1 100644 --- a/lib/inat_vision_api_responses.py +++ b/lib/inat_vision_api_responses.py @@ -21,10 +21,13 @@ def array_response(leaf_scores, inferrer): @staticmethod def object_response(leaf_scores, inferrer, common_ancestor_rank_type=None, - embedding=None, debug=False): + embedding=None, debug=False, human_exclusion_strategy=None): leaf_scores = InatVisionAPIResponses.limit_leaf_scores_for_response(leaf_scores) leaf_scores = InatVisionAPIResponses.update_leaf_scores_scaling(leaf_scores) - post_human_exclusion_scores = inferrer.limit_leaf_scores_that_include_humans(leaf_scores) + + post_human_exclusion_scores = inferrer.limit_leaf_scores_that_include_humans( + leaf_scores, strategy=human_exclusion_strategy + ) human_exclusion_cleared_results = False if not leaf_scores.empty and post_human_exclusion_scores.empty: human_exclusion_cleared_results = True @@ -85,7 +88,9 @@ def aggregated_tree_response(aggregated_scores, inferrer): return "
" + "
".join(printable_tree) + "
" @staticmethod - def aggregated_object_response(leaf_scores, aggregated_scores, inferrer, embedding=None): + def aggregated_object_response( + leaf_scores, aggregated_scores, inferrer, embedding=None, human_exclusion_strategy=None + ): top_leaf_combined_score = aggregated_scores.query( "leaf_class_id.notnull()" ).sort_values( @@ -100,7 +105,10 @@ def aggregated_object_response(leaf_scores, aggregated_scores, inferrer, embeddi "normalized_aggregated_combined_score", ascending=False ).head(100) - top_100_leaves = inferrer.limit_leaf_scores_that_include_humans(top_100_leaves) + + top_100_leaves = inferrer.limit_leaf_scores_that_include_humans( + top_100_leaves, strategy=human_exclusion_strategy + ) aggregated_scores = InatVisionAPIResponses.update_aggregated_scores_scaling( aggregated_scores diff --git a/lib/model_taxonomy_dataframe.py b/lib/model_taxonomy_dataframe.py index 180d024..78839a1 100644 --- a/lib/model_taxonomy_dataframe.py +++ b/lib/model_taxonomy_dataframe.py @@ -7,6 +7,7 @@ class ModelTaxonomyDataframe: def __init__(self, path, thresholds_path): self.load_mapping(path, thresholds_path) self.set_human_taxon() + self.set_mammals_taxon() def load_mapping(self, path, thresholds_path): self.df = pd.read_csv( @@ -78,6 +79,20 @@ def set_human_taxon(self): self.human_taxon = human_rows.iloc[0] + def set_mammals_taxon(self): + self.mammals_taxon = None + if self.human_taxon is None: + return + + mammals_rows = self.df.query( + f"name == 'Mammalia' and left < {self.human_taxon['left']} and " + f"right > {self.human_taxon['right']}" + ) + if mammals_rows.empty: + return + + self.mammals_taxon = mammals_rows.iloc[0] + @staticmethod def children(df, taxon_id): if taxon_id == 0: diff --git a/lib/templates/home.html b/lib/templates/home.html index a331292..53d3353 100644 --- a/lib/templates/home.html +++ b/lib/templates/home.html @@ -49,6 +49,12 @@

Slim vs Legacy Model


+ +

diff --git a/lib/web_forms.py b/lib/web_forms.py index e37942f..b1cc17c 100644 --- a/lib/web_forms.py +++ b/lib/web_forms.py @@ -15,4 +15,5 @@ class ImageForm(FlaskForm): aggregated = StringField("aggregated") return_embedding = StringField("return_embedding") common_ancestor_rank_type = StringField("common_ancestor_rank_type") + human_exclusion = StringField("human_exclusion") format = StringField("format")