[DEV] encode in 16k personal corpus
This commit is contained in:
parent
d64ac95dd9
commit
d8dcf70f1b
269
debug.py
Normal file
269
debug.py
Normal file
@ -0,0 +1,269 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
##
|
||||||
|
## @author Edouard DUPIN
|
||||||
|
##
|
||||||
|
## @copyright 2012, Edouard DUPIN, all right reserved
|
||||||
|
##
|
||||||
|
## @license MPL v2.0 (see license file)
|
||||||
|
##
|
||||||
|
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import re
|
||||||
|
|
||||||
|
debug_level=3
|
||||||
|
debug_color=False
|
||||||
|
|
||||||
|
color_default= ""
|
||||||
|
color_red = ""
|
||||||
|
color_green = ""
|
||||||
|
color_yellow = ""
|
||||||
|
color_blue = ""
|
||||||
|
color_purple = ""
|
||||||
|
color_cyan = ""
|
||||||
|
|
||||||
|
|
||||||
|
debug_lock = threading.Lock()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Set log level of the console log system
|
||||||
|
## @param[in] id (int) Value of the log level:
|
||||||
|
## 0: None
|
||||||
|
## 1: error
|
||||||
|
## 2: warning
|
||||||
|
## 3: info
|
||||||
|
## 4: debug
|
||||||
|
## 5: verbose
|
||||||
|
## 6: extreme_verbose
|
||||||
|
##
|
||||||
|
def set_level(id):
|
||||||
|
global debug_level
|
||||||
|
debug_level = id
|
||||||
|
#print "SetDebug level at " + str(debug_level)
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Get the current debug leval
|
||||||
|
## @return The value of the log level. Show: @ref set_level
|
||||||
|
##
|
||||||
|
def get_level():
|
||||||
|
global debug_level
|
||||||
|
return debug_level
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Enable color of the console Log system
|
||||||
|
##
|
||||||
|
def enable_color():
|
||||||
|
global debug_color
|
||||||
|
debug_color = True
|
||||||
|
global color_default
|
||||||
|
color_default= "\033[00m"
|
||||||
|
global color_red
|
||||||
|
color_red = "\033[31m"
|
||||||
|
global color_green
|
||||||
|
color_green = "\033[32m"
|
||||||
|
global color_yellow
|
||||||
|
color_yellow = "\033[33m"
|
||||||
|
global color_blue
|
||||||
|
color_blue = "\033[01;34m"
|
||||||
|
global color_purple
|
||||||
|
color_purple = "\033[35m"
|
||||||
|
global color_cyan
|
||||||
|
color_cyan = "\033[36m"
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Disable color of the console Log system
|
||||||
|
##
|
||||||
|
def disable_color():
|
||||||
|
global debug_color
|
||||||
|
debug_color = True
|
||||||
|
global color_default
|
||||||
|
color_default= ""
|
||||||
|
global color_red
|
||||||
|
color_red = ""
|
||||||
|
global color_green
|
||||||
|
color_green = ""
|
||||||
|
global color_yellow
|
||||||
|
color_yellow = ""
|
||||||
|
global color_blue
|
||||||
|
color_blue = ""
|
||||||
|
global color_purple
|
||||||
|
color_purple = ""
|
||||||
|
global color_cyan
|
||||||
|
color_cyan = ""
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a extreme verbose log
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def extreme_verbose(input, force=False):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 6 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_blue + input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a verbose log
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def verbose(input, force=False):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 5 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_blue + input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a log (every time
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def display(input, force=False):
|
||||||
|
global debug_lock
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_blue + input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a debug log
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def debug(input, force=False):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 4 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_green + input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print an info log
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def info(input, force=False):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 3 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a warning log
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def warning(input, force=False):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 2 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_purple + "[WARNING] " + input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a todo log
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def todo(input, force=False):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 3 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_purple + "[TODO] " + input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print an error log
|
||||||
|
## @param[in] input (string) Value to print if level is enough
|
||||||
|
## @param[in] thread_id (int) Current thead ID of the builder thread
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
## @param[in] crash (bool) build error has appear ==> request stop of all builds
|
||||||
|
##
|
||||||
|
def error(input, thread_id=-1, force=False, crash=True):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 1 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_red + "[ERROR] " + input + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
if crash == True:
|
||||||
|
exit(-1)
|
||||||
|
#os_exit(-1)
|
||||||
|
#raise "error happend"
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a log for a specific element action like generateing .so or binary ...
|
||||||
|
## @param[in] type (string) type of action. Like: "copy file", "StaticLib", "Prebuild", "Library" ...
|
||||||
|
## @param[in] lib (string) Name of the library/binary/package that action is done
|
||||||
|
## @param[in] dir (string) build direction. ex: "<==", "==>" ...
|
||||||
|
## @param[in] name (string) Destination of the data
|
||||||
|
## @param[in] force (bool) force display (no check of log level)
|
||||||
|
##
|
||||||
|
def print_element(type, lib, dir, name, force=False):
|
||||||
|
global debug_lock
|
||||||
|
global debug_level
|
||||||
|
if debug_level >= 3 \
|
||||||
|
or force == True:
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(color_cyan + type + color_default + " : " + color_yellow + lib + color_default + " " + dir + " " + color_blue + name + color_default)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Print a compilation return (output)
|
||||||
|
## @param[in] my_string (string) Std-error/std-info that is generate by the build system
|
||||||
|
##
|
||||||
|
def print_compilator(my_string):
|
||||||
|
global debug_color
|
||||||
|
global debug_lock
|
||||||
|
if debug_color == True:
|
||||||
|
my_string = my_string.replace('\\n', '\n')
|
||||||
|
my_string = my_string.replace('\\t', '\t')
|
||||||
|
my_string = my_string.replace('error:', color_red+'error:'+color_default)
|
||||||
|
my_string = my_string.replace('warning:', color_purple+'warning:'+color_default)
|
||||||
|
my_string = my_string.replace('note:', color_green+'note:'+color_default)
|
||||||
|
my_string = re.sub(r'([/\w_-]+\.\w+):', r'-COLORIN-\1-COLOROUT-:', my_string)
|
||||||
|
my_string = my_string.replace('-COLORIN-', color_yellow)
|
||||||
|
my_string = my_string.replace('-COLOROUT-', color_default)
|
||||||
|
|
||||||
|
debug_lock.acquire()
|
||||||
|
print(my_string)
|
||||||
|
debug_lock.release()
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Get the list of default color
|
||||||
|
## @return A map with keys: "default","red","green","yellow","blue","purple","cyan"
|
||||||
|
##
|
||||||
|
def get_color_set() :
|
||||||
|
global color_default
|
||||||
|
global color_red
|
||||||
|
global color_green
|
||||||
|
global color_yellow
|
||||||
|
global color_blue
|
||||||
|
global color_purple
|
||||||
|
global color_cyan
|
||||||
|
return {
|
||||||
|
"default": color_default,
|
||||||
|
"red": color_red,
|
||||||
|
"green": color_green,
|
||||||
|
"yellow": color_yellow,
|
||||||
|
"blue": color_blue,
|
||||||
|
"purple": color_purple,
|
||||||
|
"cyan": color_cyan,
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
102
proprocessCorpus.py
Executable file
102
proprocessCorpus.py
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
##
|
||||||
|
## @author Edouard DUPIN
|
||||||
|
##
|
||||||
|
## @license MPL v2.0 (see license file)
|
||||||
|
##
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tools
|
||||||
|
import debug
|
||||||
|
import argparse
|
||||||
|
import math
|
||||||
|
import json
|
||||||
|
import resampy
|
||||||
|
import numpy as np
|
||||||
|
import scipy.io.wavfile as wavfile
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-i", "--input", help="Input directory",
|
||||||
|
default="")
|
||||||
|
parser.add_argument("-o", "--output", help="Output directory",
|
||||||
|
default="out")
|
||||||
|
parser.add_argument("-v", "--verbose", help="display all LOGS",
|
||||||
|
default=False,
|
||||||
|
action='store_true')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
debug.info("***********************************")
|
||||||
|
|
||||||
|
# ---------------------------------------------
|
||||||
|
# -- check input
|
||||||
|
# ---------------------------------------------
|
||||||
|
if args.verbose == True:
|
||||||
|
debug.set_level(6);
|
||||||
|
if args.input == None \
|
||||||
|
or args.input == "" :
|
||||||
|
debug.error("must set an input directory")
|
||||||
|
|
||||||
|
tools.create_directory(args.output)
|
||||||
|
|
||||||
|
debug.info("==================================================================================================")
|
||||||
|
debug.info("== Preprocess corpus data: " + args.input + " to " + args.output)
|
||||||
|
debug.info("==================================================================================================")
|
||||||
|
|
||||||
|
debug.info("Get list of corpus files:")
|
||||||
|
audio_corpus_element = tools.get_list_of_file_in_path(args.input, ["*.json"], recursive=True)
|
||||||
|
|
||||||
|
debug.info("Corpus count " + str(len(audio_corpus_element)) + " element(s)")
|
||||||
|
elem_id = 0
|
||||||
|
for elem in audio_corpus_element:
|
||||||
|
debug.info("---------------------------[ " + str(elem_id) + " / " + str(len(audio_corpus_element)) + " ]---------------------------------------")
|
||||||
|
elem_id += 1
|
||||||
|
debug.info("Element: " + elem)
|
||||||
|
with open(elem) as file:
|
||||||
|
input_data = json.load(file)
|
||||||
|
"""{
|
||||||
|
"user": "Edouard DUPIN",
|
||||||
|
"value": "bonjour",
|
||||||
|
"language": "FR_fr",
|
||||||
|
"time": 3088499332851,
|
||||||
|
"audio_format": "int16",
|
||||||
|
"audio_channel": 1,
|
||||||
|
"audio_sample_rate": 48000,
|
||||||
|
"audio_filename": "FR_fr_Edouard DUPIN_3088499332851.raw"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if "audio_format" not in input_data.keys():
|
||||||
|
debug.error(" ==> missing field 'audio_format' ...")
|
||||||
|
if input_data["audio_format"] != "int16":
|
||||||
|
debug.error(" ==> field 'audio_format' have wrong value: '" + str(input_data["audio_format"]) + "' suported: [int16]")
|
||||||
|
if "audio_channel" not in input_data.keys():
|
||||||
|
debug.error(" ==> missing field 'audio_channel' ...")
|
||||||
|
if input_data["audio_channel"] != 1:
|
||||||
|
debug.error(" ==> field 'audio_channel' have wrong value: '" + str(input_data["audio_channel"]) + "' suported: [1]")
|
||||||
|
if "audio_sample_rate" not in input_data.keys():
|
||||||
|
debug.error(" ==> missing field 'audio_sample_rate' ...")
|
||||||
|
if input_data["audio_sample_rate"] != 48000:
|
||||||
|
debug.error(" ==> field 'audio_sample_rate' have wrong value: '" + str(input_data["audio_sample_rate"]) + "' suported: [48000]")
|
||||||
|
if "audio_filename" not in input_data.keys():
|
||||||
|
debug.error(" ==> missing field 'audio_filename' ...")
|
||||||
|
filename = os.path.join(os.path.dirname(elem), input_data["audio_filename"])
|
||||||
|
if filename[-3:] == "wav":
|
||||||
|
sample_rate, audio_data = wavfile.read(filename)
|
||||||
|
else:
|
||||||
|
debug.error("Not supported file type: '" + str(filename[-3:]) + "' suported: [wav]")
|
||||||
|
debug.info("Read: " + str(len(audio_data)) + " sample(s)")
|
||||||
|
|
||||||
|
audio_16k = resampy.resample(audio_data, 48000, 16000)
|
||||||
|
|
||||||
|
debug.info("write: " + str(len(audio_16k)) + " sample(s)")
|
||||||
|
|
||||||
|
filename_out = os.path.join(args.output, input_data["audio_filename"])
|
||||||
|
wavfile.write(filename_out, 16000, audio_16k)
|
||||||
|
input_data["audio_sample_rate"] = 16000
|
||||||
|
filename_out_json = os.path.join(args.output, os.path.basename(elem))
|
||||||
|
with open(filename_out_json, 'w') as outfile:
|
||||||
|
json.dump(input_data, outfile, indent="\t")
|
||||||
|
|
||||||
|
|
||||||
|
debug.info("Finish")
|
||||||
|
|
126
tools.py
Normal file
126
tools.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
##
|
||||||
|
## @author Edouard DUPIN
|
||||||
|
##
|
||||||
|
## @copyright 2012, Edouard DUPIN, all right reserved
|
||||||
|
##
|
||||||
|
## @license MPL v2.0 (see license file)
|
||||||
|
##
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import errno
|
||||||
|
import fnmatch
|
||||||
|
import stat
|
||||||
|
|
||||||
|
|
||||||
|
def create_directory(path):
|
||||||
|
try:
|
||||||
|
os.stat(path)
|
||||||
|
except:
|
||||||
|
os.makedirs(path)
|
||||||
|
|
||||||
|
def create_directory_of_file(file):
|
||||||
|
path = os.path.dirname(file)
|
||||||
|
create_directory(path)
|
||||||
|
|
||||||
|
def file_write_data(path, data):
|
||||||
|
print(" write file: " + path)
|
||||||
|
create_directory_of_file(path)
|
||||||
|
file = open(path, "w")
|
||||||
|
file.write(data)
|
||||||
|
file.close()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def remove_file(path):
|
||||||
|
if os.path.isfile(path):
|
||||||
|
os.remove(path)
|
||||||
|
elif os.path.islink(path):
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
|
def file_read_data(path, binary=False):
|
||||||
|
print("path= " + path)
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
return ""
|
||||||
|
if binary == True:
|
||||||
|
file = open(path, "rb")
|
||||||
|
else:
|
||||||
|
file = open(path, "r")
|
||||||
|
data_file = file.read()
|
||||||
|
file.close()
|
||||||
|
return data_file
|
||||||
|
|
||||||
|
def copy_file(src, dst):
|
||||||
|
print("copy " + src + " ==> " + dst)
|
||||||
|
create_directory_of_file(dst)
|
||||||
|
shutil.copyfile(src, dst)
|
||||||
|
|
||||||
|
def copy_anything(src, dst):
|
||||||
|
print(" copy anything : '" + str(src) + "'")
|
||||||
|
print(" to : '" + str(dst) + "'")
|
||||||
|
if os.path.isdir(os.path.realpath(src)):
|
||||||
|
tmp_path = os.path.realpath(src)
|
||||||
|
tmp_rule = ""
|
||||||
|
else:
|
||||||
|
tmp_path = os.path.dirname(os.path.realpath(src))
|
||||||
|
tmp_rule = os.path.basename(src)
|
||||||
|
|
||||||
|
for root, dirnames, filenames in os.walk(tmp_path):
|
||||||
|
deltaRoot = root[len(tmp_path):]
|
||||||
|
while len(deltaRoot) > 0 \
|
||||||
|
and ( deltaRoot[0] == '/' \
|
||||||
|
or deltaRoot[0] == '\\' ):
|
||||||
|
deltaRoot = deltaRoot[1:]
|
||||||
|
if deltaRoot != "":
|
||||||
|
return
|
||||||
|
tmpList = filenames
|
||||||
|
if len(tmp_rule) > 0:
|
||||||
|
tmpList = fnmatch.filter(filenames, tmp_rule)
|
||||||
|
# Import the module :
|
||||||
|
for cycleFile in tmpList:
|
||||||
|
#for cycleFile in filenames:
|
||||||
|
copy_file(os.path.join(tmp_path, deltaRoot, cycleFile),
|
||||||
|
os.path.join(dst, deltaRoot, cycleFile))
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
## @brief Get list of all Files in a specific path (with a regex)
|
||||||
|
## @param[in] path (string) Full path of the machine to search files (start with / or x:)
|
||||||
|
## @param[in] regex (string) Regular expression to search data
|
||||||
|
## @param[in] recursive (bool) List file with recursive search
|
||||||
|
## @param[in] remove_path (string) Data to remove in the path
|
||||||
|
## @return (list) return files requested
|
||||||
|
##
|
||||||
|
def get_list_of_file_in_path(path, filter, recursive = False, remove_path=""):
|
||||||
|
out = []
|
||||||
|
if os.path.isdir(os.path.realpath(path)):
|
||||||
|
tmp_path = os.path.realpath(path)
|
||||||
|
else:
|
||||||
|
print("[E] path does not exist : '" + str(path) + "'")
|
||||||
|
|
||||||
|
for root, dirnames, filenames in os.walk(tmp_path):
|
||||||
|
deltaRoot = root[len(tmp_path):]
|
||||||
|
while len(deltaRoot) > 0 \
|
||||||
|
and ( deltaRoot[0] == '/' \
|
||||||
|
or deltaRoot[0] == '\\' ):
|
||||||
|
deltaRoot = deltaRoot[1:]
|
||||||
|
if recursive == False \
|
||||||
|
and deltaRoot != "":
|
||||||
|
return out
|
||||||
|
tmpList = []
|
||||||
|
for elem in filter:
|
||||||
|
tmpppp = fnmatch.filter(filenames, elem)
|
||||||
|
for elemmm in tmpppp:
|
||||||
|
tmpList.append(elemmm)
|
||||||
|
# Import the module :
|
||||||
|
for cycleFile in tmpList:
|
||||||
|
#for cycleFile in filenames:
|
||||||
|
add_file = os.path.join(tmp_path, deltaRoot, cycleFile)
|
||||||
|
if len(remove_path) != 0:
|
||||||
|
if add_file[:len(remove_path)] != remove_path:
|
||||||
|
print("[E] Request remove start of a path that is not the same: '" + add_file[:len(remove_path)] + "' demand remove of '" + str(remove_path) + "'")
|
||||||
|
else:
|
||||||
|
add_file = add_file[len(remove_path)+1:]
|
||||||
|
out.append(add_file)
|
||||||
|
return out;
|
Loading…
x
Reference in New Issue
Block a user