Blame view
Scripts/utils/nnet-cpu/update_learning_rates.pl
5.93 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
#!/usr/bin/perl -w # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. # This script takes three command-line arguments. # The first is a log-file such as exp/tri4b_nnet/log/combine.10.log, # which is the output of nnet-combine. The second is a file such # as exp/tri4b_nnet/11.tmp.mdl, i.e. a model file, for which we will # update the learning rates; the third is the output nnet file e.g. # exp/tri4b_nnet/11.mdl # This script assumes that the "combine" script is called as: # nnet-combine <old-model> <new-model-1> <new-model-2> ... <new-model-n> <validation-examples> <output-model>. # It gets from the logging output a line like this: # LOG (nnet-combine:CombineNnets():combine-nnet.cc:184) Combining nnets, validation objf per frame changed from -1.43424 to -1.42067, scale factors are [ 0.727508 0.79889 0.299533 0.137696 -0.0479123 0.210445 0.0195638 0.123843 0.167453 0.0193894 -0.0128672 0.178384 0.0516549 0.0958205 0.125495 ] # [in this case the 1st 3 numbers correspond to the <old-model> ] and for each # updatable layer, it works out the total weight on the new models. # It interprets this as being (for each layer) a step length along # the path old-model -> new-model. # Basically, we change the learning rate by a factor equal to this step length, # subject to limits on the change [by default limit to halving/doubling]. # It's fairly obvious why we would want do do this. # These options can be useful if we want to splice the input # features across time. $sources_to_exclude = 1; # may make this configurable later. $min_learning_rate_factor = 0.5; $max_learning_rate_factor = 2.0; $min_learning_rate = 0.0001; # Put a floor because if too small, # the changes become zero due to roundoff. if (@ARGV > 0) { for ($x = 1; $x < 10; $x++) { if ($ARGV[0] eq "--min-learning-rate-factor") { $min_learning_rate_factor = $ARGV[1]; shift; shift; } if ($ARGV[0] eq "--max-learning-rate-factor") { $max_learning_rate_factor = $ARGV[1]; shift; shift; } if ($ARGV[0] eq "--min-learning-rate") { $min_learning_rate = $ARGV[1]; shift; shift; } } } if (@ARGV != 3) { print STDERR "Usage: update_learning_rates.pl [options] <log-file-for-nnet-combine> <nnet-in> <nnet-out> Options: --min-learning-rate-factor # minimum factor to change learning rate by (default: 0.5) --max-learning-rate-factor # maximum factor to change learning rate by (default: 2.0) "; exit(1); } ($combine_log, $nnet_in, $nnet_out) = @ARGV; open(L, "<$combine_log") || die "Opening log file \"$combine_log\""; while(<L>) { if (m/Objective functions for the source neural nets are\s+\[(.+)\]/) { ## a line like: ## LOG (nnet-combine:GetInitialScaleParams():combine-nnet.cc:66) Objective functions for the source neural nets are [ -1.37002 -1.52115 -1.52103 -1.50189 -1.51912 ] @A = split(" ", $1); $num_sources = @A; # number of source neural nets (dimension of @A); 5 in this case. } ## a line like: ## LOG (nnet-combine:CombineNnets():combine-nnet.cc:184) Combining nnets, validation objf per frame changed from -1.37002 to -1.36574, scale factors are [ 0.819379 0.696122 0.458798 0.040513 -0.0448875 0.171431 0.0274615 0.139143 0.133846 0.0372585 0.114193 0.17944 0.0491838 0.0668778 0.0328936 ] if (m/Combining nnets.+scale factors are\s+\[(.+)\]/) { @scale_factors = split(" ", $1); } } if (!defined $num_sources) { die "Log file $combine_log did not have expected format: no line with \"Objective functions\" "; } if (!defined @scale_factors) { die "Log file $combine_log did not have expected format: no line with \"Combining nnets\" "; } $num_scales = @scale_factors; # length of the array. if ($num_scales % $num_sources != 0) { die "Error interpreting log file $combine_log: $num_sources does not divide $num_scales "; } close(L); open(P, "nnet-am-info $nnet_in |") || die "Opening pipe from nnet-am-info"; @learning_rates = (); while(<P>) { if (m/learning rate = ([^,]+),/) { push @learning_rates, $1; } } close(P); $num_layers = $num_scales / $num_sources; $num_info_learning_rates = @learning_rates; if ($num_layers != $num_info_learning_rates) { die "From log file we expect there to be $num_layers updatable components, but from the output of nnet-am-info we saw $num_info_learning_rates"; } for ($layer = 0; $layer < $num_layers; $layer++) { # getting the sum of the weights for this layer from all the non-excluded sources. $sum = 0.0; for ($source = $sources_to_exclude; $source < $num_sources; $source++) { $index = ($source * $num_layers) + $layer; $sum += $scale_factors[$index]; } $learning_rate_factor = $sum; if ($learning_rate_factor > $max_learning_rate_factor) { $learning_rate_factor = $max_learning_rate_factor; } if ($learning_rate_factor < $min_learning_rate_factor) { $learning_rate_factor = $min_learning_rate_factor; } $old_learning_rate = $learning_rates[$layer]; $new_learning_rate = $old_learning_rate * $learning_rate_factor; if ($new_learning_rate < $min_learning_rate) { $new_learning_rate = $min_learning_rate; } print STDERR "For layer $layer, sum of weights of non-excluded sources is $sum, learning-rate factor is $learning_rate_factor "; $learning_rates[$layer] = $new_learning_rate; } $lrates_string=join(":", @learning_rates); $ret = system("nnet-am-copy --learning-rates=$lrates_string $nnet_in $nnet_out"); exit($ret != 0); |