diff --git a/scripts/assign-colors b/scripts/assign-colors index e42a44d..295914d 100755 --- a/scripts/assign-colors +++ b/scripts/assign-colors @@ -14,7 +14,7 @@ if __name__ == '__main__': parser.add_argument('--ordering', type=str, required=True, help="""Input TSV file defining the color ordering where the first - column is the field and the second column is the trait in that field. + column is the category and the second column is the trait in that category. Blank lines are ignored. Lines starting with '#' will be ignored as comments.""") parser.add_argument('--color-schemes', type=str, required=True, help="Input color schemes where each line is a different color scheme separated by tabs.") @@ -23,6 +23,16 @@ if __name__ == '__main__': metadata. If the metadata includes a 'focal' column that only contains boolean values, then restrict colors to traits for rows where 'focal' is set to True.""") + parser.add_argument('--ignore-categories', type=str, default=[], nargs='*', + help="""Do not create colors for these categories even if they are + included in the metadata and ordering TSV. This is useful for ignoring + categories in specific builds even if they share the same default + ordering TSV.""") + parser.add_argument('--force-include-categories', type=str, default=[],nargs='*', + help="""Force include all color orderings for these categories even if + there are traits not included in the metadata TSV. This is useful for + creating colorings for traits not (yet) present in metadata to solve + bootstrapping issue.""") parser.add_argument('--output', type=str, required=True, help="Output colors TSV file to be passed to augur export.") args = parser.parse_args() @@ -42,6 +52,8 @@ if __name__ == '__main__': else: name = array[0] trait = array[1] + if name in args.ignore_categories: + continue if name not in assignment: assignment[name] = [trait] else: @@ -53,7 +65,7 @@ if __name__ == '__main__': if args.metadata: metadata = pd.read_csv(args.metadata, delimiter='\t') for name, trait in assignment.items(): - if name in metadata: + if name in metadata and name not in args.force_include_categories: if 'focal' in metadata and metadata['focal'].dtype == 'bool': focal_list = metadata.loc[metadata['focal'], name].unique() subset_focal = [x for x in assignment[name] if x in focal_list]