Audio-Classification-pytorch/preprocessing.py at main · siuoly/Audio-Classification-pytorch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/bin/python

from config import config
from pprint import pprint
import glob
from pathlib import Path
import os
import soundfile as sf
import librosa as ra
import numpy as np
from tqdm import tqdm
from joblib import Parallel, delayed
from tool.data_path import (dataset_arg,
                            get_audio_filenames,
                            get_source_audio_paths,
                            get_resample_audio_paths,
                            get_feature_paths)
import pandas as pd
process_arg = config['preprocessing']


audio_size = process_arg["new_sr"] * process_arg["times"]
def resample_fixlength_and_save(file, new_path):
    if file.name != new_path.name:
        raise RuntimeError("resmaple file", file.name, new_path.name)
    new_path.parent.mkdir(exist_ok=True)
    wav, _ = ra.load(file, sr=process_arg['new_sr'])
    if wav.shape[-1] != audio_size:
        wav = ra.util.fix_length(wav, size=audio_size)
    sf.write(new_path, wav, process_arg['new_sr'])


def create_resampled_folder(resample_audio_folder):
    print("create resample audio folder ", resample_audio_folder)
    resample_audio_folder.mkdir(parents=True)
    audio_paths = get_source_audio_paths()
    new_audio_paths = get_resample_audio_paths()

    arguments = zip(audio_paths, new_audio_paths)
    Parallel(n_jobs=12)(delayed(resample_fixlength_and_save)(file, new_path)
                        for file, new_path in tqdm(arguments, total=len(audio_paths)))


def get_an_filename():
    filenames = get_source_audio_paths()
    file = filenames[1]
    return file


# preprocessing : resample, melspectrogram
def show_preprocessing_message():
    file = get_an_filename()
    wav, sr = ra.load(file, sr=None)
    new_wav = ra.resample(wav, orig_sr=sr,
                          target_sr=process_arg['new_sr'])
    mel = ra.feature.melspectrogram(y=new_wav, sr=process_arg['new_sr'],
                                    **process_arg['mel_arg'])
    print("audio folder", dataset_arg["audio_folder"])
    print(f"wav shape :{wav.shape}, sr: {sr}")
    print(f"new wav shape:{new_wav.shape}, sr: {process_arg['new_sr']} ")
    print("mel argument:")
    pprint(process_arg['mel_arg'])
    print(f"spectrogram feature shape:{mel.shape}")


def get_mel_of_file(file):
    wav, sr = ra.load(file, sr=process_arg['new_sr'])
    mel = ra.feature.melspectrogram(y=wav, sr=sr, **process_arg['mel_arg'])
    if process_arg["dbscale"] is True:
        mel = ra.power_to_db(mel)[None,...]  # (f,t) --> (1,f,t)
    return mel


# 這個後來打算拿掉
# def get_stft_and_phase_of_file(file):
#     wav, sr = ra.load(file, sr=process_arg['new_sr'])
#     stft = ra.stft(y=wav, **process_arg['stft_arg'])
#     if config["in_channel"] == 2:
#         phase = np.angle(stft)
#         return np.stack([np.abs(stft)**2, phase])
#     else:
#         return np.abs(stft)**2

def processing_a_audio(file):
    if process_arg["feature"] == "mel":
        feature = get_mel_of_file(file)
    # elif process_arg["feature"] == "stft":
    #     feature = get_stft_and_phase_of_file(file)
    else:
        raise RuntimeError(f"Unknown processing feature {process_arg['feature']}")
    if process_arg["delta"]:
        feature = get_detlta_feature(feature)
    if process_arg["normalize"]:
        feature = normalized_data(feature)
    return feature


def save_feature(save_path: Path, feature: np.ndarray):
    save_path.parent.mkdir(exist_ok=True)
    np.save(save_path, feature)
    return save_path


def processing_and_save_a_file(file: Path, save_path: Path):
    if file.stem != save_path.stem[:-4]:  # check xx.wav , xxx.wav.npy
        raise RuntimeError(f"filename is not save filename", file.stem, save_path.stem)
    feature = processing_a_audio(file)
    save_feature(save_path, feature)


def make_data_folder():
    data_folder = Path(config['feature_folder'])
    if data_folder.exists():
        print( f"feature folder {data_folder} exist." )
        return False
    else:
        data_folder.mkdir(parents=True, exist_ok=True)
        print( f"create data folder:{data_folder}" )
    return True


def get_detlta_feature(feature):
    return np.concatenate([feature,
                           ra.feature.delta(feature, order=1),
                           ra.feature.delta(feature, order=2)])


def normalized_data(x):
    axis = (1, 2)  # 取(0,1,2)最後兩維度
    return (x-x.mean(axis=axis, keepdims=True)) / \
        x.std(axis=axis, keepdims=True)


def main(show=True):
    meta = dataset_arg['meta_file']
    meta = pd.read_csv(meta)
    if show:
        show_preprocessing_message()

    resample_audio_folder = Path(process_arg['resample_audio_folder'])
    resample_audio_folder = resample_audio_folder/f"{config['dataset']}_sr{process_arg['new_sr']}"
    if not resample_audio_folder.exists():
        create_resampled_folder(resample_audio_folder)
    else:
        print("resample audio folder ", resample_audio_folder, "exists")

    dataset_arg['audio_folder'] = str(resample_audio_folder) +'/'

    if not make_data_folder():
        return  # 跳出 preprocessing
    audio_paths = get_source_audio_paths()
    arguments = zip(audio_paths, get_feature_paths(meta))

    Parallel(n_jobs=12)(
            delayed(processing_and_save_a_file)(file, new_path)
            for file, new_path in tqdm(arguments, total=len(audio_paths)))

if __name__ == "__main__":
    main(show=True)