[DEV] update normalisation preoprorcess
This commit is contained in:
parent
93912b54a3
commit
4b3c96595e
@ -14,7 +14,10 @@ import math
|
||||
import json
|
||||
import resampy
|
||||
import numpy as np
|
||||
import scipy.io.wavfile as wavfile
|
||||
import scipy.io.wavfile
|
||||
import scipy.signal
|
||||
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-i", "--input", help="Input directory",
|
||||
@ -81,21 +84,137 @@ for elem in audio_corpus_element:
|
||||
debug.error(" ==> missing field 'audio_filename' ...")
|
||||
filename = os.path.join(os.path.dirname(elem), input_data["audio_filename"])
|
||||
if filename[-3:] == "wav":
|
||||
sample_rate, audio_data = wavfile.read(filename)
|
||||
sample_rate, audio_data = scipy.io.wavfile.read(filename)
|
||||
else:
|
||||
debug.error("Not supported file type: '" + str(filename[-3:]) + "' suported: [wav]")
|
||||
debug.info("Read: " + str(len(audio_data)) + " sample(s)")
|
||||
|
||||
audio_16k = resampy.resample(audio_data, 48000, 16000)
|
||||
|
||||
#######################################################
|
||||
## Step 1: Generate speech areas (work on input, because I am sure that the data is not modified
|
||||
#######################################################
|
||||
audio_data_absolute = np.absolute(audio_data)
|
||||
shape = np.copy(audio_data_absolute)
|
||||
# average of the anbiant noise
|
||||
avg = 0
|
||||
# count represent the number of basic sample to etalonate the generic ambiant noise
|
||||
count = 0
|
||||
count_all = 0
|
||||
set_value = 32767
|
||||
# number of sampel of the anbiant noise calculation
|
||||
ambiant_basic_size = 48000/4 # : 1/4 s ==> the cormus is manage to have 1 second before starting to have real data voice
|
||||
for sample_audio in audio_data_absolute:
|
||||
#shape[count_all] = 0
|
||||
count_all += 1
|
||||
if count < ambiant_basic_size:
|
||||
# Remove the 0 padding
|
||||
if count == 0 and sample_audio == 0:
|
||||
continue
|
||||
elif count == 0:
|
||||
debug.info("start data " + str(count_all) + " samples (at value 0)")
|
||||
count += 1
|
||||
avg += sample_audio
|
||||
elif count == ambiant_basic_size:
|
||||
count += 1
|
||||
avg /= ambiant_basic_size
|
||||
debug.info("basic ambiant is: " + str(avg) + " start annalyse at : " + str(count_all/48000) + " sec")
|
||||
if avg <= 327.67:
|
||||
avg = int(327.67)
|
||||
else:
|
||||
avg = int(avg * 1.1)
|
||||
## set_value = avg
|
||||
debug.info(" inspect at " + str(avg))
|
||||
else:
|
||||
if sample_audio >= avg:
|
||||
shape[count_all-1] = set_value;
|
||||
|
||||
#filename_out = os.path.join(args.output, input_data["audio_filename"] + "_ori.wav")
|
||||
#scipy.io.wavfile.write(filename_out, 48000, audio_data)
|
||||
#filename_out = os.path.join(args.output, input_data["audio_filename"] + "_abs.wav")
|
||||
#scipy.io.wavfile.write(filename_out, 48000, audio_data_absolute)
|
||||
#filename_out = os.path.join(args.output, input_data["audio_filename"] + "_shape1.wav")
|
||||
#scipy.io.wavfile.write(filename_out, 48000, shape)
|
||||
|
||||
windows = int(48000 * 0.02)
|
||||
count_all = 0
|
||||
count = 0
|
||||
for sample_audio in shape:
|
||||
count_all += 1
|
||||
if sample_audio == set_value:
|
||||
count = windows
|
||||
continue
|
||||
count -= 1
|
||||
if count >= 0:
|
||||
shape[count_all-1] = set_value;
|
||||
shape = np.flip(shape)
|
||||
count_all = 0
|
||||
count = 0
|
||||
for sample_audio in shape:
|
||||
count_all += 1
|
||||
if sample_audio == set_value:
|
||||
count = windows
|
||||
continue
|
||||
count -= 1
|
||||
if count >= 0:
|
||||
shape[count_all-1] = set_value;
|
||||
else:
|
||||
shape[count_all-1] = 0
|
||||
shape = np.flip(shape)
|
||||
|
||||
filename_out = os.path.join(args.output, input_data["audio_filename"] + "_shape.wav")
|
||||
scipy.io.wavfile.write(filename_out, 48000, shape)
|
||||
|
||||
count_all = 0
|
||||
table_voice_detected = [[0,False]]
|
||||
previous = False
|
||||
# use numpy slicing: [start:stop:step] https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#basic-slicing-and-indexing
|
||||
for sample_audio in shape[3:len(shape):3]:
|
||||
count_all += 1
|
||||
if previous == False:
|
||||
if sample_audio == set_value:
|
||||
table_voice_detected.append([count_all, True])
|
||||
previous = True
|
||||
else:
|
||||
if sample_audio == 0:
|
||||
table_voice_detected.append([count_all, False])
|
||||
previous = False
|
||||
|
||||
#######################################################
|
||||
## Step 2: Resample
|
||||
#######################################################
|
||||
audio_16k = resampy.resample(audio_data, 48000, 16000, filter='kaiser_best')
|
||||
#audio_16k = resampy.resample(audio_data, 48000, 16000, filter='kaiser_fast')
|
||||
|
||||
debug.info("write: " + str(len(audio_16k)) + " sample(s)")
|
||||
|
||||
filename_out = os.path.join(args.output, input_data["audio_filename"])
|
||||
wavfile.write(filename_out, 16000, audio_16k)
|
||||
input_data["audio_sample_rate"] = 16000
|
||||
scipy.io.wavfile.write(filename_out, 16000, audio_16k)
|
||||
# create new data format:
|
||||
output_data = {
|
||||
"value": input_data["value"],
|
||||
"language": input_data["language"],
|
||||
"audio_sample_rate": 16000,
|
||||
"audio_filename": input_data["audio_filename"],
|
||||
"VAD": table_voice_detected,
|
||||
"action": [
|
||||
{
|
||||
"type": "resampling",
|
||||
"tool": "resampy",
|
||||
"desc": "48000 ==> 16000",
|
||||
"src": elem,
|
||||
},{
|
||||
"type": "auto VAD",
|
||||
"tool": "internal_1",
|
||||
"desc": "When control the input data, with small noise, we can detect voice, just with signal power ...",
|
||||
"src": elem,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
filename_out_json = os.path.join(args.output, os.path.basename(elem))
|
||||
with open(filename_out_json, 'w') as outfile:
|
||||
json.dump(input_data, outfile, indent="\t")
|
||||
json.dump(output_data, outfile, indent="\t")
|
||||
|
||||
|
||||
debug.info("Finish")
|
||||
|
Loading…
x
Reference in New Issue
Block a user