Commit 665a8dac322f0a4232d39c379136a945f4d76081
1 parent
b9a54507e8
Exists in
master
! follow the white rabbit !
Showing 6 changed files with 232 additions and 28 deletions Side-by-side Diff
1 | +#---------------# | |
2 | +# OTMEDIA LIA # | |
3 | +# HOWTO # | |
4 | +# version 1.0 # | |
5 | +#---------------# | |
6 | + | |
7 | +1\ Main options | |
8 | +--------------- | |
9 | + | |
10 | +There are five main options for otmedia scripts. | |
11 | +-h : for help | |
12 | +-D : Debug mode | |
13 | +-v n : Verbose mode 1 low to 3 high | |
14 | +-c : Check results | |
15 | +-r : force to rerun a script, without deleting work already done | |
16 | + | |
17 | +2\ Main scripts | |
18 | +--------------- | |
19 | + 2.1\ FirstPass.sh | |
20 | + ----------------- | |
21 | + | |
22 | + FirstPass.sh do speaker diarization and transcription of an audio file. Convert it into wav format if not already done (16000Hz, 16 bits, mono). | |
23 | + If a .SRT file is present in the same directory of the audio file it will copy it. | |
24 | + | |
25 | + $> FisrtPass.sh [options] 110624FR2_20002100.wav result_directory | |
26 | + | |
27 | + Options: | |
28 | + -f n : number of forks for speeral | |
29 | + | |
30 | + Output : result_directory/110624FR2_20002100/res_p1/ | |
31 | + | |
32 | + 2.2\ SecondPass.sh | |
33 | + ------------------ | |
34 | + | |
35 | + SecondPass.sh do speaker adaptation and transcriptions base on the first pass. | |
36 | + | |
37 | + $> SecondPass.sh [options] result_directory/110624FR2_20002100/ | |
38 | + | |
39 | + Options: | |
40 | + -f n : number of forks for speeral | |
41 | + | |
42 | + Output : result_directory/110624FR2_20002100/res_p2/ | |
43 | + | |
44 | + 2.3\ ConfPass.sh | |
45 | + ---------------- | |
46 | + | |
47 | + ConfPass.sh do confidence measure using the second or third pass. | |
48 | + | |
49 | + $> Confpass.sh [options] result_directory/110624FR2_20002100/ <res_p2|res_p3> | |
50 | + | |
51 | + Output : result_directory/110624FR2_20002100/conf/res_p2/scored_ctm/ | |
52 | + and result_directory/110624FR2_20002100.usf file | |
53 | + | |
54 | + 2.4\ ExploitConfidencePass.sh | |
55 | + ----------------------------- | |
56 | + | |
57 | + It exploits confidence pass measure to : | |
58 | + - boost confidente zone | |
59 | + - find alternative in non confidente zone (using SOLR DB) | |
60 | + - extend the lexicon | |
61 | + | |
62 | + $> ExploitConfidencePass.sh [options] result_directory/110624FR2_20002100 | |
63 | + | |
64 | + Output : result_directory/110624FR2_20002100/trigg/speeral | |
65 | + result_directory/110624FR2_20002100/LEX/speeral/_ext | |
66 | + | |
67 | + 2.5\ ThirstPass.sh | |
68 | + ------------------ | |
69 | + | |
70 | + ThirdPass.sh do transcriptions using SecondPass speaker adaptation and ExploitConfidencePass trigg files and new lexicon. | |
71 | + | |
72 | + $> ThirdPass.sh [options] result_directory/110624FR2_20002100/ | |
73 | + | |
74 | + Options : | |
75 | + -f n : number of forks for speeral | |
76 | + | |
77 | + Output : result_directory/110624FR2_20002100/conf/res_p3 | |
78 | + | |
79 | + 2.6\ RecomposePass.sh | |
80 | + -------------------- | |
81 | + | |
82 | + RecomposePass.sh copy results that missing in ThirsPass from the Second and First Pass. | |
83 | + | |
84 | + $> RecomposePass.sh [options] result_directory/110624FR2_20002100/ | |
85 | + | |
86 | + Output : result_directory/110624FR2_20002100/res_all | |
87 | + | |
88 | + 2.7\ ScoringRes.sh | |
89 | + ------------------ | |
90 | + | |
91 | + ScoringRes.sh run differents scoring tools to score the results using SRT file if exists. | |
92 | + | |
93 | + $> ScoringRes.sh [options] result_directory/110624FR2_20002100/ | |
94 | + | |
95 | + Output : result_directory/110624FR2_20002100/scoring | |
96 | + | |
97 | + 2.8\ CheckResults.sh | |
98 | + -------------------- | |
99 | + | |
100 | + CheckResults.sh parse results directories to synthesize works already done. | |
101 | + | |
102 | + $> CheckResults.sh [options] result_directory | |
103 | + | |
104 | + Output : "Directory name #plp #res_p1 #treil_p2 #treil_p3 usf_p2 usf_p3" | |
105 | + #plp number of plp files | |
106 | + #res_p1 number of .res files at first pass | |
107 | + #treil_p2 number of .treil files at second pass | |
108 | + #treil_p3 number of .treil files at third pass | |
109 | + usf_p2 usf file from confidence pass result on second pass (OK|ERR|NAN) | |
110 | + usf_p3 usf file from confidence pass result on third pass (OK|ERR|NAN) | |
111 | + | |
112 | +3\ OneScriptToRuleThemAll.sh | |
113 | +---------------------------- | |
114 | + | |
115 | + The script to do all OTMEDIA LIA pass in one call. | |
116 | + | |
117 | + $> OneScriptToRuleThemAll.sh [options] 110624FR2_20002100.wav result_directory | |
118 | + | |
119 | + Options : (default options are availables) | |
120 | + -a Do every pass | |
121 | + -1 Do First pass | |
122 | + -2 Do Second pass | |
123 | + -3 Do Third pass | |
124 | + -C Do Confidence pass | |
125 | + -e Do Exploit Confidence pass | |
126 | + -R Do Recompose pass | |
127 | + -s Do Scoring pass |
INSTALL
1 | +#---------------# | |
2 | +# OTMEDIA LIA # | |
3 | +# INSTALL # | |
4 | +# version : 1.0 # | |
5 | +#---------------# | |
6 | + | |
7 | +OTMEDIA LIA ready to use ? Really ? | |
8 | +No ! You have to do manualy configuartion for some features. | |
9 | +Let see... | |
10 | + | |
11 | +SUMMARY | |
12 | +------- | |
13 | + | |
14 | +1\ Before installation | |
15 | +2\ install.sh script | |
16 | +3\ SOLR install | |
17 | + | |
18 | + | |
19 | +1\ Before installation | |
20 | +---------------------- | |
21 | + | |
22 | +- Check and install dependencies. | |
23 | +- In 64 bits architcture be sure you can run 32 bits programs. | |
24 | +- Have 300 Go of free space. | |
25 | +- Have acces to the network and the nyx server. | |
26 | + | |
27 | +2/ install.sh script | |
28 | +-------------------- | |
29 | + | |
30 | +install.sh script will do most of the work. | |
31 | +It will check dependencies and configure pass tools. | |
32 | +By default it will do a complet install (300 Go). | |
33 | + | |
34 | +You can modifiy behavior by editing install.sh : | |
35 | + | |
36 | +To disable lexicon adaption using SOLR DB put EXPLOITCONFPASS to 0 (mainly the 290 Go). | |
37 | +To disable confidence measure put CONFPASS to 0. | |
38 | +To disable second and third pass put PASS2 to 0. | |
39 | + | |
40 | +run install.sh and follow the white rabbit. | |
41 | + | |
42 | +3\ SOLR install | |
43 | +--------------- | |
44 | + | |
45 | +The install.sh script download otmedia-2013-04.tar.gz and untar it in OTMEDIA_HOME/tools/SOLR/ . | |
46 | +See SOLR.INSTALL file to install OTMEDIA SOLR DB. |
README
... | ... | @@ -5,11 +5,11 @@ |
5 | 5 | \___/ |_| |_| |_|_____|____/___/_/ \_\ |_____|___/_/ \_\ |
6 | 6 | |
7 | 7 | |
8 | -#-------------------# | |
9 | -# OTMEDIA LIA # | |
10 | -# README # | |
11 | -# version 1.0 # | |
12 | -#-------------------# | |
8 | +#---------------# | |
9 | +# OTMEDIA LIA # | |
10 | +# README # | |
11 | +# version 1.0 # | |
12 | +#---------------# | |
13 | 13 | |
14 | 14 | DESCRIPTION |
15 | 15 | ----------- |
... | ... | @@ -22,6 +22,13 @@ |
22 | 22 | Web Site : http://www.otmedia.fr |
23 | 23 | |
24 | 24 | OTMEDIA LIA project is a set of tools to transcribe radio and TV shows. |
25 | + It does multiple things : | |
26 | + - First pass : default transcription with speeral and speaker diarization. | |
27 | + - Second pass : speaker adaptation and a second transcription pass with speeral. | |
28 | + - Confidence pass : calcul confidence measure from transcription output. | |
29 | + - Exploit Confidence Measure : use SOLR DB data to extend the lexicon on low confidence measure and create trigg files. | |
30 | + - Third pass : second pass using the new lexicon and trigg files. | |
31 | + | |
25 | 32 | |
26 | 33 | DEPENDENCIES |
27 | 34 | ------------ |
... | ... | @@ -57,7 +64,7 @@ |
57 | 64 | |
58 | 65 | Perl is a programming language. |
59 | 66 | |
60 | -iconvi ( >= 2.0.0) | |
67 | +iconv ( >= 2.0.0) | |
61 | 68 | Available from : http://www.gnu.org |
62 | 69 | and debian package |
63 | 70 | |
64 | 71 | |
65 | 72 | |
... | ... | @@ -89,15 +96,16 @@ |
89 | 96 | |
90 | 97 | Quick install below. |
91 | 98 | |
92 | - Before launch installation : | |
99 | + Before launching installation : | |
93 | 100 | |
94 | 101 | Be certain that all dependencies are satisfied. |
102 | + Have 300 Go of free space for complet install. | |
95 | 103 | |
96 | 104 | Issue the following commands to the shell : |
97 | 105 | $> ./install.sh |
98 | 106 | $> export OTMEDIA_HOME=path/to/OTMEDIA/directory |
99 | 107 | |
100 | - Read SOLR.INSTALL part 3/ to install SOLRDB. | |
108 | + Read SOLR.INSTALL part 3 to install SOLRDB. | |
101 | 109 | |
102 | 110 | RUNNING |
103 | 111 | ------- |
... | ... | @@ -113,6 +121,7 @@ |
113 | 121 | ---------- |
114 | 122 | |
115 | 123 | Many. |
124 | + For Bug report, please contact Pascal Nocera at pascal.nocera@univ-avignon.fr | |
116 | 125 | |
117 | 126 | COPYRIGHT |
118 | 127 | --------- |
SOLR.INSTALL
install.sh
1 | 1 | #!/bin/bash |
2 | 2 | |
3 | 3 | #-------------------# |
4 | +# OTMEDIA LIA # | |
4 | 5 | # Install script # |
5 | -# OTMEDIA # | |
6 | +# version : 1.0.0 # | |
6 | 7 | #-------------------# |
7 | 8 | |
8 | 9 | # Color variables |
... | ... | @@ -30,7 +31,7 @@ |
30 | 31 | # and 1 to enable |
31 | 32 | # |
32 | 33 | PASS1=1 # First Pass |
33 | -PASS2=1 # Second Pass | |
34 | +PASS2=1 # Second and Third Pass | |
34 | 35 | CONFPASS=1 # Confidence Pass |
35 | 36 | EXPLOITCONFPASS=1 # SOLR query and trigg |
36 | 37 | |
... | ... | @@ -98,6 +99,15 @@ |
98 | 99 | exit 1; |
99 | 100 | fi |
100 | 101 | echo -e "python : \t ${txtgrn}OK${txtrst}" |
102 | + | |
103 | + ## csh shell | |
104 | + test=$(whereis csh) | |
105 | + if [ "$test" == "csh:" ] | |
106 | + then | |
107 | + echo -e "${txtpur}ERROR${txtrst} csh shell not found\n You have to install csh shell\n sudo apt-get install csh" | |
108 | + exit 1; | |
109 | + fi | |
110 | + echo -e "csh shell : \t ${txtgrn}OK${txtrst}" | |
101 | 111 | fi |
102 | 112 | |
103 | 113 | ## Perl |
... | ... | @@ -118,15 +128,6 @@ |
118 | 128 | fi |
119 | 129 | echo -e "iconv : \t ${txtgrn}OK${txtrst}" |
120 | 130 | |
121 | -## csh shell | |
122 | -test=$(whereis csh) | |
123 | -if [ "$test" == "csh:" ] | |
124 | -then | |
125 | - echo -e "${txtpur}ERROR${txtrst} csh shell not found\n You have to install csh shell\n sudo apt-get install csh" | |
126 | - exit 1; | |
127 | -fi | |
128 | -echo -e "csh shell : \t ${txtgrn}OK${txtrst}" | |
129 | - | |
130 | 131 | ## SRI LM |
131 | 132 | if [ -z "$SRILM" ] && [ -z "$MACHINE_TYPE" ] |
132 | 133 | then |
... | ... | @@ -136,8 +137,6 @@ |
136 | 137 | export SRILM_BIN=$SRILM/bin/$MACHINE_TYPE |
137 | 138 | echo -e "SRILM toolkit : \t ${txtgrn}OK${txtrst}" |
138 | 139 | |
139 | - | |
140 | - | |
141 | 140 | ### Speeral Configuration ### |
142 | 141 | |
143 | 142 | echo -e "\n\t${txtblu}Speeral configuration${txtrst}\n" |
... | ... | @@ -190,7 +189,7 @@ |
190 | 189 | if [ "$test" == "" ] |
191 | 190 | then |
192 | 191 | echo -e "${txtpur}ERROR${txtrst} TOMCAT seems to not be installed)\n You have to install TOMCAT\n" |
193 | - exit 1; | |
192 | + #exit 1; | |
194 | 193 | fi |
195 | 194 | echo -e "\nTOMCAT : \t ${txtgrn}OK${txtrst}\n" |
196 | 195 | # SOLR secondly |
... | ... | @@ -234,6 +233,7 @@ |
234 | 233 | cp ~/.bashrc.org ~/.bashrc |
235 | 234 | export OTMEDIA_HOME=$PWD |
236 | 235 | echo "export OTMEDIA_HOME=$PWD" >> ~/.bashrc |
236 | +echo "export $PATH=$PATH:$PWD/main_tools" >> ~/.bashrc | |
237 | 237 | echo "export SRILM_BIN=$SRILM/bin/$MACHINE_TYPE" >> ~/.bashrc |
238 | 238 | #echo "export LIA_TAGG_LANG=french" >> ~/.bashrc |
239 | 239 | #echo "export LIA_TAGG=$OTMEDIA_HOME/tools/lia_ltbox/lia_tagg/" >> ~/.bashrc |
240 | 240 | |
... | ... | @@ -252,7 +252,14 @@ |
252 | 252 | echo -e "do : source ~/.bashrc" |
253 | 253 | echo -e "or set variable :\n" |
254 | 254 | echo "export OTMEDIA_HOME=$PWD" |
255 | +echo "export PATH=$PATH:$OTMEDIA_HOME/main_tools" | |
255 | 256 | echo "export SRILM_BIN=$SRILM/bin/$MACHINE_TYPE" |
257 | + | |
258 | + | |
259 | +echo " \\\\ " | |
260 | +echo " ,-~~~-\\\\_" | |
261 | +echo " ( .\ " | |
262 | +echo " @\___(__--'" | |
256 | 263 | |
257 | 264 | echo "${txtgrn}Yes${txtylw}I${txtpur}Rastafari{$txtrst}" |
main_tools/ExploitConfidencePass.sh
... | ... | @@ -47,7 +47,7 @@ |
47 | 47 | #---------------# |
48 | 48 | # Parse Options # |
49 | 49 | #---------------# |
50 | -while getopts ":hDv:cf:r" opt | |
50 | +while getopts ":hDv:cr" opt | |
51 | 51 | do |
52 | 52 | case $opt in |
53 | 53 | h) |
... | ... | @@ -57,7 +57,6 @@ |
57 | 57 | echo -e "\t\t-D :\tDEBUG mode on" |
58 | 58 | echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode" |
59 | 59 | echo -e "\t\t-c :\tCheck process, stop if error detected" |
60 | - echo -e "\t\t-f n :\tspecify a speeral forks number (default 1)" | |
61 | 60 | echo -e "\t\t-r n :\tforce rerun without deleting files" |
62 | 61 | exit 1 |
63 | 62 | ;; |
... | ... | @@ -69,9 +68,6 @@ |
69 | 68 | ;; |
70 | 69 | c) |
71 | 70 | CHECK=1 |
72 | - ;; | |
73 | - f) | |
74 | - FORKS="--forks $OPTARG" | |
75 | 71 | ;; |
76 | 72 | r) |
77 | 73 | RERUN=1 |