From 76bb89f380ce41363bf2438e33cf405b503d08ea Mon Sep 17 00:00:00 2001 From: Bugale Bugalit Date: Fri, 16 Jan 2026 15:39:35 +0200 Subject: [PATCH] fix: handle subtitles with dialogue in name --- buganime/buganime.py | 3 +- tests/data/9.json | 630 +++++++++++++++++++++++++++++++++++++++++ tests/test_buganime.py | 1 + 3 files changed, 633 insertions(+), 1 deletion(-) create mode 100644 tests/data/9.json diff --git a/buganime/buganime.py b/buganime/buganime.py index ba3510d..7f66d23 100644 --- a/buganime/buganime.py +++ b/buganime/buganime.py @@ -72,7 +72,8 @@ def _get_subtitle_stream_index() -> int: for i, stream in enumerate(subtitle_streams): match stream: case {'tags': {'language': str() as lang}} if lang in ('en', 'eng'): - if all(x not in stream['tags'].get('title', '').upper() for x in ('S&S', 'SIGNS', 'FORCED')) and \ + if (all(x not in stream['tags'].get('title', '').upper() for x in ('S&S', 'SIGNS', 'FORCED')) or + ('DIALOGUE' in stream['tags'].get('title', '').upper())) and \ stream['codec_name'].lower() in SUPPORTED_SUBTITLE_CODECS: relevant_streams.append((i, stream)) if not relevant_streams: diff --git a/tests/data/9.json b/tests/data/9.json new file mode 100644 index 0000000..cfe4c65 --- /dev/null +++ b/tests/data/9.json @@ -0,0 +1,630 @@ +{ + "streams": [ + { + "index": 0, + "codec_name": "h264", + "codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10", + "profile": "High", + "codec_type": "video", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "width": 1920, + "height": 1080, + "coded_width": 1920, + "coded_height": 1080, + "closed_captions": 0, + "film_grain": 0, + "has_b_frames": 2, + "sample_aspect_ratio": "1:1", + "display_aspect_ratio": "16:9", + "pix_fmt": "yuv420p", + "level": 40, + "color_range": "tv", + "color_space": "bt709", + "color_transfer": "bt709", + "color_primaries": "bt709", + "chroma_location": "left", + "field_order": "progressive", + "refs": 1, + "is_avc": "true", + "nal_length_size": "4", + "r_frame_rate": "30000/1001", + "avg_frame_rate": "30000/1001", + "time_base": "1/1000", + "start_pts": 0, + "start_time": "0.000000", + "bits_per_raw_sample": "8", + "extradata_size": 58, + "disposition": { + "default": 1, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "title": "ToonsHub AMZN.WEB-DL", + "BPS": "7025693", + "DURATION": "00:04:00.039000000", + "NUMBER_OF_FRAMES": "7194", + "NUMBER_OF_BYTES": "210805048", + "_STATISTICS_WRITING_APP": "mkvmerge v96.0 ('It's My Life') 64-bit", + "_STATISTICS_WRITING_DATE_UTC": "2026-01-06 16:07:23", + "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" + } + }, + { + "index": 1, + "codec_name": "eac3", + "codec_long_name": "ATSC A/52B (AC-3, E-AC-3)", + "codec_type": "audio", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "sample_fmt": "fltp", + "sample_rate": "48000", + "channels": 2, + "channel_layout": "stereo", + "bits_per_sample": 0, + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/1000", + "start_pts": 0, + "start_time": "0.000000", + "bit_rate": "224000", + "disposition": { + "default": 1, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "language": "jpn", + "title": "Japanese (日本語)", + "BPS": "224000", + "DURATION": "00:04:00.064000000", + "NUMBER_OF_FRAMES": "7502", + "NUMBER_OF_BYTES": "6721792", + "_STATISTICS_WRITING_APP": "mkvmerge v96.0 ('It's My Life') 64-bit", + "_STATISTICS_WRITING_DATE_UTC": "2026-01-06 16:07:23", + "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" + } + }, + { + "index": 2, + "codec_name": "ass", + "codec_long_name": "ASS (Advanced SSA) subtitle", + "codec_type": "subtitle", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/1000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 240064, + "duration": "240.064000", + "extradata_size": 1224, + "disposition": { + "default": 1, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "language": "eng", + "title": "English [Dialogue & (Still half-arsed) Signs (geckyzz)]", + "BPS": "250", + "DURATION": "00:03:37.100000000", + "NUMBER_OF_FRAMES": "83", + "NUMBER_OF_BYTES": "6794", + "_STATISTICS_WRITING_APP": "mkvmerge v96.0 ('It's My Life') 64-bit", + "_STATISTICS_WRITING_DATE_UTC": "2026-01-06 16:07:23", + "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" + } + }, + { + "index": 3, + "codec_name": "ass", + "codec_long_name": "ASS (Advanced SSA) subtitle", + "codec_type": "subtitle", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/1000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 240064, + "duration": "240.064000", + "extradata_size": 1224, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "language": "eng", + "title": "English [Dialogue & Signs(?) (OceanVeil; Official)]", + "BPS": "226", + "DURATION": "00:03:37.100000000", + "NUMBER_OF_FRAMES": "81", + "NUMBER_OF_BYTES": "6135", + "_STATISTICS_WRITING_APP": "mkvmerge v96.0 ('It's My Life') 64-bit", + "_STATISTICS_WRITING_DATE_UTC": "2026-01-06 16:07:23", + "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" + } + }, + { + "index": 4, + "codec_name": "ass", + "codec_long_name": "ASS (Advanced SSA) subtitle", + "codec_type": "subtitle", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/1000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 240064, + "duration": "240.064000", + "extradata_size": 1224, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "language": "ind", + "title": "Indonesian (bahasa Indonesia) [Dialog & Markah (seadanya) (geckyzz)]", + "BPS": "255", + "DURATION": "00:03:37.100000000", + "NUMBER_OF_FRAMES": "83", + "NUMBER_OF_BYTES": "6939", + "_STATISTICS_WRITING_APP": "mkvmerge v96.0 ('It's My Life') 64-bit", + "_STATISTICS_WRITING_DATE_UTC": "2026-01-06 16:07:23", + "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" + } + }, + { + "index": 5, + "codec_name": "ass", + "codec_long_name": "ASS (Advanced SSA) subtitle", + "codec_type": "subtitle", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/1000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 240064, + "duration": "240.064000", + "extradata_size": 802, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "language": "jpn", + "title": "Japanese (日本語) [転写 (OpenAI Whispher, geckyzz)]", + "BPS": "178", + "DURATION": "00:03:31.670000000", + "NUMBER_OF_FRAMES": "68", + "NUMBER_OF_BYTES": "4722", + "_STATISTICS_WRITING_APP": "mkvmerge v96.0 ('It's My Life') 64-bit", + "_STATISTICS_WRITING_DATE_UTC": "2026-01-06 16:07:23", + "_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES" + } + }, + { + "index": 6, + "codec_name": "ttf", + "codec_long_name": "TrueType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 151520, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "Exo2-Bold.ttf", + "mimetype": "application/x-truetype-font" + } + }, + { + "index": 7, + "codec_name": "otf", + "codec_long_name": "OpenType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 4533028, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "NotoSansJP-Regular.otf", + "mimetype": "application/vnd.ms-opentype" + } + }, + { + "index": 8, + "codec_name": "ttf", + "codec_long_name": "TrueType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 395104, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "timesbd.ttf", + "mimetype": "application/x-truetype-font" + } + }, + { + "index": 9, + "codec_name": "ttf", + "codec_long_name": "TrueType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 237720, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "timesbi.ttf", + "mimetype": "application/x-truetype-font" + } + }, + { + "index": 10, + "codec_name": "ttf", + "codec_long_name": "TrueType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 246400, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "timesi.ttf", + "mimetype": "application/x-truetype-font" + } + }, + { + "index": 11, + "codec_name": "ttf", + "codec_long_name": "TrueType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 333612, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "Ubuntu-B.ttf", + "mimetype": "application/x-truetype-font" + } + }, + { + "index": 12, + "codec_name": "ttf", + "codec_long_name": "TrueType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 353824, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "Ubuntu-R.ttf", + "mimetype": "application/x-truetype-font" + } + }, + { + "index": 13, + "codec_name": "ttf", + "codec_long_name": "TrueType font", + "codec_type": "attachment", + "codec_tag_string": "[0][0][0][0]", + "codec_tag": "0x0000", + "r_frame_rate": "0/0", + "avg_frame_rate": "0/0", + "time_base": "1/90000", + "start_pts": 0, + "start_time": "0.000000", + "duration_ts": 21605760, + "duration": "240.064000", + "extradata_size": 386440, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "tags": { + "filename": "Ubuntu-RI.ttf", + "mimetype": "application/x-truetype-font" + } + } + ], + "format": { + "filename": "C:\\Temp\\Torrents\\[TokekHutan] I'll Live a Long Life to Dote on My Favorite Stepbrother! - S01E01v2 [AMZN.WEB-DL 1080P AVC, EAC3, MULTi][85B04D6B].mkv", + "nb_streams": 14, + "nb_programs": 0, + "format_name": "matroska,webm", + "format_long_name": "Matroska / WebM", + "start_time": "0.000000", + "duration": "240.064000", + "size": "224282704", + "bit_rate": "7474097", + "probe_score": 100, + "tags": { + "encoder": "libebml v1.4.5 + libmatroska v1.7.1", + "creation_time": "2026-01-06T16:07:23.000000Z" + } + } +} \ No newline at end of file diff --git a/tests/test_buganime.py b/tests/test_buganime.py index 9353d8a..38a744c 100644 --- a/tests/test_buganime.py +++ b/tests/test_buganime.py @@ -105,6 +105,7 @@ def test_parse_filename(path: str, result: buganime.TVShow | buganime.Movie) -> ('6.json', transcode.VideoInfo(audio_index=1, subtitle_index=0, width=1920, height=1080, fps='30/1', frames=7425)), ('7.json', transcode.VideoInfo(audio_index=1, subtitle_index=0, width=1920, height=1080, fps='24000/1001', frames=0)), ('8.json', transcode.VideoInfo(audio_index=2, subtitle_index=3, width=1920, height=1080, fps='24000/1001', frames=36240)), + ('9.json', transcode.VideoInfo(audio_index=1, subtitle_index=0, width=1920, height=1080, fps='30000/1001', frames=7194)), ]