[DEV] update normalisation preoprorcess

2019-04-01 22:21:19 +02:00 · 2019-04-01 22:21:19 +02:00 · 4b3c96595e
commit 4b3c96595e
parent 93912b54a3
1 changed files with 125 additions and 6 deletions
--- a/normalizer_corpus/audio-reco-corpus/preprocessCorpus.py
+++ b/normalizer_corpus/audio-reco-corpus/preprocessCorpus.py
@ -14,7 +14,10 @@ import math
 import json
 import resampy
 import numpy as np
-import scipy.io.wavfile as wavfile
+import scipy.io.wavfile
 import scipy.signal
 parser = argparse.ArgumentParser()
 parser.add_argument("-i", "--input", help="Input directory",
@ -81,21 +84,137 @@ for elem in audio_corpus_element:
 		debug.error(" ==> missing field 'audio_filename' ...")
 	filename = os.path.join(os.path.dirname(elem), input_data["audio_filename"])
 	if filename[-3:] == "wav":
-		sample_rate, audio_data = wavfile.read(filename)
+		sample_rate, audio_data = scipy.io.wavfile.read(filename)
 	else:
 		debug.error("Not supported file type: '" + str(filename[-3:]) + "' suported: [wav]")
 	debug.info("Read: " + str(len(audio_data)) + " sample(s)")
-	audio_16k = resampy.resample(audio_data, 48000, 16000)
+	
 	#######################################################
 	## Step 1: Generate speech areas (work on input, because I am sure that the data is not modified
 	#######################################################
 	audio_data_absolute = np.absolute(audio_data)
 	shape = np.copy(audio_data_absolute)
 	# average of the anbiant noise
 	avg = 0
 	# count represent the number of basic sample to etalonate the generic ambiant noise
 	count = 0
 	count_all = 0
 	set_value = 32767
 	# number of sampel of the anbiant noise calculation
 	ambiant_basic_size = 48000/4 # : 1/4 s ==> the cormus is manage to have 1 second before starting to have real data voice
 	for sample_audio in audio_data_absolute:
 		#shape[count_all] = 0
 		count_all += 1
 		if count < ambiant_basic_size:
 			# Remove the 0 padding
 			if count == 0 and sample_audio == 0:
 				continue
 			elif count == 0:
 				debug.info("start data " + str(count_all) + " samples (at value 0)")
 			count += 1
 			avg += sample_audio
 		elif count == ambiant_basic_size:
 			count += 1
 			avg /= ambiant_basic_size
 			debug.info("basic ambiant is: " + str(avg) + " start annalyse at : " + str(count_all/48000) + " sec")
 			if avg <= 327.67:
 				avg = int(327.67)
 			else:
 				avg = int(avg * 1.1)
 			## set_value = avg
 			debug.info("    inspect at " + str(avg))
 		else:
 			if sample_audio >= avg:
 				shape[count_all-1] = set_value;
 	#filename_out = os.path.join(args.output, input_data["audio_filename"] + "_ori.wav")
 	#scipy.io.wavfile.write(filename_out, 48000, audio_data)
 	#filename_out = os.path.join(args.output, input_data["audio_filename"] + "_abs.wav")
 	#scipy.io.wavfile.write(filename_out, 48000, audio_data_absolute)
 	#filename_out = os.path.join(args.output, input_data["audio_filename"] + "_shape1.wav")
 	#scipy.io.wavfile.write(filename_out, 48000, shape)
 	windows = int(48000 * 0.02)
 	count_all = 0
 	count = 0
 	for sample_audio in shape:
 		count_all += 1
 		if sample_audio == set_value:
 			count = windows
 			continue
 		count -= 1
 		if count >= 0:
 			shape[count_all-1] = set_value;
 	shape = np.flip(shape)
 	count_all = 0
 	count = 0
 	for sample_audio in shape:
 		count_all += 1
 		if sample_audio == set_value:
 			count = windows
 			continue
 		count -= 1
 		if count >= 0:
 			shape[count_all-1] = set_value;
 		else:
 			shape[count_all-1] = 0
 	shape = np.flip(shape)
 	filename_out = os.path.join(args.output, input_data["audio_filename"] + "_shape.wav")
 	scipy.io.wavfile.write(filename_out, 48000, shape)
 	count_all = 0
 	table_voice_detected = [[0,False]]
 	previous = False
 	# use numpy slicing: [start:stop:step] https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#basic-slicing-and-indexing
 	for sample_audio in shape[3:len(shape):3]:
 		count_all += 1
 		if previous == False:
 			if sample_audio == set_value:
 				table_voice_detected.append([count_all, True])
 				previous = True
 		else:
 			if sample_audio == 0:
 				table_voice_detected.append([count_all, False])
 				previous = False
 	#######################################################
 	## Step 2: Resample
 	#######################################################
 	audio_16k = resampy.resample(audio_data, 48000, 16000, filter='kaiser_best')
 	#audio_16k = resampy.resample(audio_data, 48000, 16000, filter='kaiser_fast')
 	debug.info("write: " + str(len(audio_16k)) + " sample(s)")
 	filename_out = os.path.join(args.output, input_data["audio_filename"])
-	wavfile.write(filename_out, 16000, audio_16k)
+	scipy.io.wavfile.write(filename_out, 16000, audio_16k)
-	input_data["audio_sample_rate"] = 16000
+	# create new data format:
 	output_data = {
 		"value": input_data["value"],
 		"language": input_data["language"],
 		"audio_sample_rate": 16000,
 		"audio_filename": input_data["audio_filename"],
 		"VAD": table_voice_detected,
 		"action": [
 			{
 				"type": "resampling",
 				"tool": "resampy",
 				"desc": "48000 ==> 16000",
 				"src": elem,
 			},{
 				"type": "auto VAD",
 				"tool": "internal_1",
 				"desc": "When control the input data, with small noise, we can detect voice, just with signal power ...",
 				"src": elem,
 			},
 		],
 	}
 	filename_out_json = os.path.join(args.output, os.path.basename(elem))
 	with open(filename_out_json, 'w') as outfile:
-		json.dump(input_data, outfile, indent="\t")
+		json.dump(output_data, outfile, indent="\t")
 debug.info("Finish")