Blame view

egs/wsj/s5/utils/nnet-cpu/update_learning_rates.pl 5.98 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  #!/usr/bin/env perl
  use warnings; #sed replacement for -w perl parameter
  # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  
  # This script takes three command-line arguments.
  # The first is a log-file such as exp/tri4b_nnet/log/combine.10.log,
  # which is the output of nnet-combine.  The second is a file such
  # as exp/tri4b_nnet/11.tmp.mdl, i.e. a model file, for which we will
  # update the learning rates; the third is the output nnet file e.g.
  # exp/tri4b_nnet/11.mdl
  
  # This script assumes that the "combine" script is called as:
  # nnet-combine <old-model> <new-model-1> <new-model-2> ... <new-model-n> <validation-examples> <output-model>.
  # It gets from the logging output a line like this:
  # LOG (nnet-combine:CombineNnets():combine-nnet.cc:184) Combining nnets, validation objf per frame changed from -1.43424 to -1.42067, scale factors are  [ 0.727508 0.79889 0.299533 0.137696 -0.0479123 0.210445 0.0195638 0.123843 0.167453 0.0193894 -0.0128672 0.178384 0.0516549 0.0958205 0.125495 ]
  # [in this case the 1st 3 numbers correspond to the <old-model> ] and for each
  # updatable layer, it works out the total weight on the new models.
  # It interprets this as being (for each layer) a step length along
  # the path old-model -> new-model.
  # Basically, we change the learning rate by a factor equal to this step length,
  # subject to limits on the change  [by default limit to halving/doubling].
  # It's fairly obvious why we would want do do this.
  
  # These options can be useful if we want to splice the input
  # features across time.
  $sources_to_exclude = 1; # may make this configurable later.
  $min_learning_rate_factor = 0.5;
  $max_learning_rate_factor = 2.0;
  $min_learning_rate = 0.0001; # Put a floor because if too small,
    # the changes become zero due to roundoff.
  
  if (@ARGV > 0) {
    for ($x = 1; $x < 10; $x++) {
      if ($ARGV[0] eq "--min-learning-rate-factor") {
        $min_learning_rate_factor = $ARGV[1];
        shift; shift;
      }
      if ($ARGV[0] eq "--max-learning-rate-factor") {
        $max_learning_rate_factor = $ARGV[1];
        shift; shift;
      }
      if ($ARGV[0] eq "--min-learning-rate") {
        $min_learning_rate = $ARGV[1];
        shift; shift;
      }
    }
  }
  
  
  if (@ARGV != 3) {
    print STDERR "Usage: update_learning_rates.pl [options] <log-file-for-nnet-combine> <nnet-in> <nnet-out>
  Options:
     --min-learning-rate-factor       #  minimum factor to change learning rate by (default: 0.5)
     --max-learning-rate-factor       #  maximum factor to change learning rate by (default: 2.0)
  ";
     exit(1);
  }
  
  ($combine_log, $nnet_in, $nnet_out) = @ARGV;
  
  open(L, "<$combine_log") || die "Opening log file \"$combine_log\"";
  
  
  while(<L>) {
    if (m/Objective functions for the source neural nets are\s+\[(.+)\]/) {
      ## a line like:
      ##  LOG (nnet-combine:GetInitialScaleParams():combine-nnet.cc:66) Objective functions for the source neural nets are  [ -1.37002 -1.52115 -1.52103 -1.50189 -1.51912 ]
      @A = split(" ", $1);
      $num_sources = @A; # number of source neural nets (dimension of @A); 5 in this case.
    }
    ## a line like:
    ## LOG (nnet-combine:CombineNnets():combine-nnet.cc:184) Combining nnets, validation objf per frame changed from -1.37002 to -1.36574, scale factors are  [ 0.819379 0.696122 0.458798 0.040513 -0.0448875 0.171431 0.0274615 0.139143 0.133846 0.0372585 0.114193 0.17944 0.0491838 0.0668778 0.0328936 ]
    if (m/Combining nnets.+scale factors are\s+\[(.+)\]/) {
      @scale_factors = split(" ", $1);
    }
  }
  
  if (!defined $num_sources) {
    die "Log file $combine_log did not have expected format: no line with \"Objective functions\"
  ";
  }
  if (!defined @scale_factors) {
    die "Log file $combine_log did not have expected format: no line with \"Combining nnets\"
  ";
  }
  
  
  $num_scales = @scale_factors; # length of the array.
  if ($num_scales % $num_sources != 0) {
    die "Error interpreting log file $combine_log: $num_sources does not divide $num_scales
  ";
  }
  close(L);
  
  open(P, "nnet-am-info $nnet_in |") || die "Opening pipe from nnet-am-info";
  @learning_rates = ();
  while(<P>) {
    if (m/learning rate = ([^,]+),/) {
      push @learning_rates, $1;
    }
  }
  close(P);
  
  $num_layers = $num_scales / $num_sources;
  
  $num_info_learning_rates = @learning_rates;
  if ($num_layers != $num_info_learning_rates) {
    die "From log file we expect there to be $num_layers updatable components, but from the output of nnet-am-info we saw $num_info_learning_rates";
  }
  
  for ($layer = 0; $layer < $num_layers; $layer++) {
    # getting the sum of the weights for this layer from all the non-excluded sources.
    $sum = 0.0;
    for ($source = $sources_to_exclude; $source < $num_sources; $source++) {
      $index = ($source * $num_layers) + $layer;
      $sum += $scale_factors[$index];
    }
    $learning_rate_factor = $sum;
    if ($learning_rate_factor > $max_learning_rate_factor) { $learning_rate_factor = $max_learning_rate_factor; }
    if ($learning_rate_factor < $min_learning_rate_factor) { $learning_rate_factor = $min_learning_rate_factor; }
    $old_learning_rate = $learning_rates[$layer];
    $new_learning_rate = $old_learning_rate * $learning_rate_factor;
    if ($new_learning_rate < $min_learning_rate) { $new_learning_rate = $min_learning_rate; }
    print STDERR "For layer $layer, sum of weights of non-excluded sources is $sum, learning-rate factor is $learning_rate_factor
  ";
    $learning_rates[$layer] = $new_learning_rate;
  }
  
  $lrates_string=join(":", @learning_rates);
  
  $ret = system("nnet-am-copy --learning-rates=$lrates_string $nnet_in $nnet_out");
  
  exit($ret != 0);