Yannick Estève / ONTRAC-Kaldi

Blame view

src/lm/mikolov-rnnlm-lib.cc 33.9 KB
  // lm/mikolov-rnnlm-lib.cc
  
  // Copyright       2015  Guoguo Chen  Hainan Xu
  //            2010-2012  Tomas Mikolov
  
  // See ../../COPYING for clarification regarding multiple authors
  //
  // This file is based on version 0.3e of the RNNLM language modeling
  // toolkit by Tomas Mikolov.  Changes made by authors other than
  // Tomas Mikolov are licensed under the Apache License, the short form
  // os which is below.  The original code by Tomas Mikolov is licensed
  // under the BSD 3-clause license, whose text is further below.
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
  // You may obtain a copy of the License at
  //
  //  http://www.apache.org/licenses/LICENSE-2.0
  //
  // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  // MERCHANTABLITY OR NON-INFRINGEMENT.
  // See the Apache 2 License for the specific language governing permissions and
  // limitations under the License.
  //
  //
  // Original BSD 3-clause license text:
  // Copyright (c) 2010-2012 Tomas Mikolov
  //
  // All rights reserved. Redistribution and use in source and binary forms, with
  // or without modification, are permitted provided that the following conditions
  // are met: 1. Redistributions of source code must retain the above copyright
  // notice, this list of conditions and the following
  // disclaimer. 2. Redistributions in binary form must reproduce the above
  // copyright notice, this list of conditions and the following disclaimer in the
  // documentation and/or other materials provided with the
  // distribution. 3. Neither name of copyright holders nor the names of its
  // contributors may be used to endorse or promote products derived from this
  // software without specific prior written permission. THIS SOFTWARE IS PROVIDED
  // BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
  // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  // EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  // OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
  #include <assert.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include <math.h>
  #include "lm/mikolov-rnnlm-lib.h"
  #include "util/table-types.h"
  
  namespace rnnlm {
  
  ///// fast exp() implementation
  static union {
    double d;
    struct {
      int j, i;
    } n;
  } d2i;
  #define EXP_A (1048576 / M_LN2)
  #define EXP_C 60801
  #define FAST_EXP(y) (d2i.n.i = EXP_A * (y) + (1072693248 - EXP_C), d2i.d)
  
  CRnnLM::CRnnLM() {
    version = 10;
    filetype = TEXT;
  
    use_lmprob = 0;
    gradient_cutoff = 15;
    dynamic = 0;
  
    train_file[0] = 0;
    valid_file[0] = 0;
    test_file[0] = 0;
    rnnlm_file[0] = 0;
  
    alpha_set = 0;
    train_file_set = 0;
  
    alpha = 0.1;
    beta = 0.0000001;
    // beta = 0.00000;
    alpha_divide = 0;
    logp = 0;
    llogp = -100000000;
    iter = 0;
  
    min_improvement = 1.003;
  
    train_words = 0;
    vocab_max_size = 100;
    vocab_size = 0;
    vocab = (struct vocab_word *)calloc(vocab_max_size,
                                        sizeof(struct vocab_word));
  
    layer1_size = 30;
  
    direct_size = 0;
    direct_order = 0;
  
    bptt = 0;
    bptt_block = 10;
    bptt_history = NULL;
    bptt_hidden = NULL;
    bptt_syn0 = NULL;
  
    gen = 0;
  
    independent = 0;
  
    neu0 = NULL;
    neu1 = NULL;
    neuc = NULL;
    neu2 = NULL;
  
    syn0 = NULL;
    syn1 = NULL;
    sync = NULL;
    syn_d = NULL;
    syn_db = NULL;
    // backup
    neu0b = NULL;
    neu1b = NULL;
    neucb = NULL;
    neu2b = NULL;
  
    neu1b2 = NULL;
  
    syn0b = NULL;
    syn1b = NULL;
    syncb = NULL;
  
    rand_seed = 1;
  
    class_size = 100;
    old_classes = 0;
  
    srand(rand_seed);
  
    vocab_hash_size = 100000000;
    vocab_hash  =  reinterpret_cast<int *>(calloc(vocab_hash_size, sizeof(int)));
  }
  
  CRnnLM::~CRnnLM() {
    int i;
  
    if (neu0 != NULL) {
      free(neu0);
      free(neu1);
      if (neuc != NULL) free(neuc);
      free(neu2);
  
      free(syn0);
      free(syn1);
      if (sync != NULL) free(sync);
  
      if (syn_d != NULL) free(syn_d);
  
      if (syn_db != NULL) free(syn_db);
  
      free(neu0b);
      free(neu1b);
      if (neucb != NULL) free(neucb);
      free(neu2b);
  
      free(neu1b2);
  
      free(syn0b);
      free(syn1b);
      if (syncb != NULL) free(syncb);
  
      for (i = 0; i < class_size; i++) {
        free(class_words[i]);
      }
      free(class_max_cn);
      free(class_cn);
      free(class_words);
  
      free(vocab);
      free(vocab_hash);
  
      if (bptt_history != NULL) free(bptt_history);
      if (bptt_hidden != NULL) free(bptt_hidden);
      if (bptt_syn0 != NULL) free(bptt_syn0);
  
      // todo: free bptt variables too
    }
  }
  
  real CRnnLM::random(real min, real max) {
    return rand() / (real)RAND_MAX * (max - min) + min;
  }
  
  void CRnnLM::setRnnLMFile(const std::string &str) {
    strcpy(rnnlm_file, str.c_str());
  }
  
  void CRnnLM::setRandSeed(int newSeed) {
    rand_seed = newSeed;
    srand(rand_seed);
  }
  
  void CRnnLM::readWord(char *word, FILE *fin) {
    int a = 0, ch;
  
    while (!feof(fin)) {
      ch = fgetc(fin);
  
      if (ch == 13) continue;
  
      if ((ch == ' ') || (ch == '\t') || (ch == '
  ')) {
        if (a > 0) {
          if (ch == '
  ') ungetc(ch, fin);
          break;
        }
  
        if (ch == '
  ') {
          strcpy(word, const_cast<char *>("</s>"));
          return;
        } else {
          continue;
        }
      }
  
      word[a] = ch;
      a++;
  
      if (a >= MAX_STRING) {
        // printf("Too long word found!
  ");   //truncate too long words
        a--;
      }
    }
    word[a] = 0;
  }
  
  int CRnnLM::getWordHash(const char *word) {
    unsigned int hash, a;
  
    hash = 0;
    for (a = 0; a < strlen(word); a++) {
      hash = hash * 237 + word[a];
    }
    hash = hash % vocab_hash_size;
  
    return hash;
  }
  
  int CRnnLM::searchVocab(const char *word) {
    int a;
    unsigned int hash;
  
    hash = getWordHash(word);
  
    if (vocab_hash[hash] == -1) return -1;
    if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
  
    for (a = 0; a < vocab_size; a++) {        // search in vocabulary
      if (!strcmp(word, vocab[a].word)) {
        vocab_hash[hash] = a;
        return a;
      }
    }
  
    return -1;              // return OOV if not found
  }
  
  void CRnnLM::sortVocab() {
    int a, b, max;
    vocab_word swap;
  
    for (a = 1; a < vocab_size; a++) {
      max = a;
      for (b = a + 1; b < vocab_size; b++) {
        if (vocab[max].cn < vocab[b].cn) max = b;
      }
  
      swap = vocab[max];
      vocab[max] = vocab[a];
      vocab[a] = swap;
    }
  }
  
  void CRnnLM::saveWeights() {      // saves current weights and unit activations
    int a, b;
  
    for (a = 0; a < layer0_size; a++) {
      neu0b[a].ac = neu0[a].ac;
      neu0b[a].er = neu0[a].er;
    }
  
    for (a = 0; a < layer1_size; a++) {
      neu1b[a].ac = neu1[a].ac;
      neu1b[a].er = neu1[a].er;
    }
  
    for (a = 0; a < layerc_size; a++) {
      neucb[a].ac = neuc[a].ac;
      neucb[a].er = neuc[a].er;
    }
  
    for (a = 0; a < layer2_size; a++) {
      neu2b[a].ac = neu2[a].ac;
      neu2b[a].er = neu2[a].er;
    }
  
    for (b = 0; b < layer1_size; b++) {
      for (a = 0; a < layer0_size; a++) {
        syn0b[a + b * layer0_size].weight = syn0[a + b * layer0_size].weight;
      }
    }
  
    if (layerc_size > 0) {
      for (b = 0; b < layerc_size; b++) {
        for (a = 0; a < layer1_size; a++) {
          syn1b[a + b * layer1_size].weight = syn1[a + b * layer1_size].weight;
        }
      }
  
      for (b = 0; b < layer2_size; b++) {
        for (a = 0; a < layerc_size; a++) {
          syncb[a + b * layerc_size].weight = sync[a + b * layerc_size].weight;
        }
      }
    } else {
      for (b = 0; b < layer2_size; b++) {
        for (a = 0; a < layer1_size; a++) {
          syn1b[a + b * layer1_size].weight = syn1[a + b * layer1_size].weight;
        }
      }
    }
  
    // for (a = 0; a < direct_size; a++) syn_db[a].weight = syn_d[a].weight;
  }
  
  void CRnnLM::initNet() {
    int a, b, cl;
  
    layer0_size = vocab_size + layer1_size;
    layer2_size = vocab_size + class_size;
  
    neu0 = (struct neuron *)calloc(layer0_size, sizeof(struct neuron));
    neu1 = (struct neuron *)calloc(layer1_size, sizeof(struct neuron));
    neuc = (struct neuron *)calloc(layerc_size, sizeof(struct neuron));
    neu2 = (struct neuron *)calloc(layer2_size, sizeof(struct neuron));
  
    syn0 = (struct synapse *)calloc(layer0_size * layer1_size,
                                    sizeof(struct synapse));
    if (layerc_size == 0) {
      syn1 = (struct synapse *)calloc(layer1_size * layer2_size,
                                      sizeof(struct synapse));
    } else {
      syn1 = (struct synapse *)calloc(layer1_size * layerc_size,
                                      sizeof(struct synapse));
      sync = (struct synapse *)calloc(layerc_size * layer2_size,
                                      sizeof(struct synapse));
    }
  
    if (syn1 == NULL) {
      printf("Memory allocation failed
  ");
      exit(1);
    }
  
    if (layerc_size > 0)
      if (sync == NULL) {
        printf("Memory allocation failed
  ");
        exit(1);
      }
  
    syn_d =
      reinterpret_cast<direct_t *>(calloc(static_cast<long long>(direct_size),
                                           sizeof(direct_t)));
  
    if (syn_d == NULL) {
      printf("Memory allocation for direct"
       " connections failed (requested %lld bytes)
  ",
       static_cast<long long>(direct_size) * static_cast<long long>(sizeof(direct_t)));
      exit(1);
    }
  
    neu0b = (struct neuron *)calloc(layer0_size, sizeof(struct neuron));
    neu1b = (struct neuron *)calloc(layer1_size, sizeof(struct neuron));
    neucb = (struct neuron *)calloc(layerc_size, sizeof(struct neuron));
    neu1b2 = (struct neuron *)calloc(layer1_size, sizeof(struct neuron));
    neu2b = (struct neuron *)calloc(layer2_size, sizeof(struct neuron));
  
    syn0b = (struct synapse *)calloc(layer0_size * layer1_size,
                                     sizeof(struct synapse));
    // syn1b = (struct synapse *)calloc(layer1_size*layer2_size,
    // sizeof(struct synapse));
    if (layerc_size == 0) {
      syn1b = (struct synapse *)calloc(layer1_size * layer2_size,
                                       sizeof(struct synapse));
    } else {
      syn1b = (struct synapse *)calloc(layer1_size * layerc_size,
                                       sizeof(struct synapse));
      syncb = (struct synapse *)calloc(layerc_size * layer2_size,
                                       sizeof(struct synapse));
    }
  
    if (syn1b == NULL) {
      printf("Memory allocation failed
  ");
      exit(1);
    }
  
    for (a = 0; a < layer0_size; a++) {
      neu0[a].ac = 0;
      neu0[a].er = 0;
    }
  
    for (a = 0; a < layer1_size; a++) {
      neu1[a].ac = 0;
      neu1[a].er = 0;
    }
  
    for (a = 0; a < layerc_size; a++) {
      neuc[a].ac = 0;
      neuc[a].er = 0;
    }
  
    for (a = 0; a < layer2_size; a++) {
      neu2[a].ac = 0;
      neu2[a].er = 0;
    }
  
    for (b = 0; b < layer1_size; b++) {
      for (a = 0; a < layer0_size; a++) {
        syn0[a + b * layer0_size].weight =
            random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
      }
    }
  
    if (layerc_size > 0) {
      for (b = 0; b < layerc_size; b++) {
        for (a = 0; a < layer1_size; a++) {
          syn1[a + b * layer1_size].weight =
              random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
        }
      }
  
      for (b = 0; b < layer2_size; b++) {
        for (a = 0; a < layerc_size; a++) {
          sync[a + b * layerc_size].weight =
              random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
        }
      }
    } else {
      for (b = 0; b < layer2_size; b++) {
        for (a = 0; a < layer1_size; a++) {
          syn1[a + b * layer1_size].weight =
              random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
        }
      }
    }
  
    long long aa;
    for (aa = 0; aa < direct_size; aa++) {
      syn_d[aa] = 0;
    }
  
    if (bptt > 0) {
      bptt_history = reinterpret_cast<int *>(calloc((bptt + bptt_block + 10),
                                                     sizeof(int)));
      for (a = 0; a < bptt + bptt_block; a++) {
        bptt_history[a] = -1;
      }
      bptt_hidden = reinterpret_cast<neuron *>(calloc(
                          (bptt + bptt_block + 1) * layer1_size, sizeof(neuron)));
      for (a = 0; a < (bptt + bptt_block) * layer1_size; a++) {
        bptt_hidden[a].ac = 0;
        bptt_hidden[a].er = 0;
      }
      bptt_syn0 = (struct synapse *)calloc(layer0_size * layer1_size,
                                           sizeof(struct synapse));
      if (bptt_syn0 == NULL) {
        printf("Memory allocation failed
  ");
        exit(1);
      }
    }
  
    saveWeights();
  
    double df, dd;
    int i;
  
    df = 0;
    dd = 0;
    a = 0;
    b = 0;
  
    if (old_classes) {    // old classes
      for (i = 0; i < vocab_size; i++) {
        b += vocab[i].cn;
      }
      for (i = 0; i < vocab_size; i++) {
        df += vocab[i].cn / static_cast<double>(b);
        if (df > 1) df = 1;
        if (df > (a + 1) / static_cast<double>(class_size)) {
          vocab[i].class_index = a;
          if (a < class_size - 1) a++;
        } else {
          vocab[i].class_index = a;
        }
      }
    } else {      // new classes
      for (i = 0; i < vocab_size; i++) {
        b += vocab[i].cn;
      }
      for (i = 0; i < vocab_size; i++) {
        dd += sqrt(vocab[i].cn / static_cast<double>(b));
      }
      for (i = 0; i < vocab_size; i++) {
        df += sqrt(vocab[i].cn / static_cast<double>(b)) / dd;
        if (df > 1) df = 1;
        if (df > (a + 1) / static_cast<double>(class_size)) {
          vocab[i].class_index = a;
          if (a < class_size - 1) a++;
        } else {
          vocab[i].class_index = a;
        }
      }
    }
  
    // allocate auxiliary class variables (for faster search when
    // normalizing probability at output layer)
  
    class_words = reinterpret_cast<int **>(calloc(class_size, sizeof(int *)));
    class_cn = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
    class_max_cn = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
  
    for (i = 0; i < class_size; i++) {
      class_cn[i] = 0;
      class_max_cn[i] = 10;
      class_words[i] = reinterpret_cast<int *>(calloc(class_max_cn[i], sizeof(int)));
    }
  
    for (i = 0; i < vocab_size; i++) {
      cl = vocab[i].class_index;
      class_words[cl][class_cn[cl]] = i;
      class_cn[cl]++;
      if (class_cn[cl] + 2 >= class_max_cn[cl]) {
        class_max_cn[cl] += 10;
        class_words[cl] = reinterpret_cast<int *>(realloc(class_words[cl],
                                         class_max_cn[cl] * sizeof(int)));
      }
    }
  }
  
  void CRnnLM::goToDelimiter(int delim, FILE *fi) {
    int ch = 0;
  
    while (ch != delim) {
      ch = fgetc(fi);
      if (feof(fi)) {
        printf("Unexpected end of file
  ");
        exit(1);
      }
    }
  }
  
  void CRnnLM::restoreNet() {   // will read whole network structure
    FILE *fi;
    int a, b, ver, unused_size;
    float fl;
    char str[MAX_STRING];
    double d;
  
    fi = fopen(rnnlm_file, "rb");
    if (fi == NULL) {
      printf("ERROR: model file '%s' not found!
  ", rnnlm_file);
      exit(1);
    }
  
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &ver);
    if ((ver == 4) && (version == 5)) {
      /* we will solve this later.. */
    } else {
      if (ver != version) {
        printf("Unknown version of file %s
  ", rnnlm_file);
        exit(1);
      }
    }
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &filetype);
    goToDelimiter(':', fi);
    if (train_file_set == 0) {
      unused_size = fscanf(fi, "%s", train_file);
    } else {
      unused_size = fscanf(fi, "%s", str);
    }
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%s", valid_file);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%lf", &llogp);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &iter);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &train_cur_pos);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%lf", &logp);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &anti_k);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &train_words);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &layer0_size);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &layer1_size);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &layerc_size);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &layer2_size);
    if (ver > 5) {
      goToDelimiter(':', fi);
      unused_size = fscanf(fi, "%lld", &direct_size);
    }
    if (ver > 6) {
      goToDelimiter(':', fi);
      unused_size = fscanf(fi, "%d", &direct_order);
    }
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &bptt);
    if (ver > 4) {
      goToDelimiter(':', fi);
      unused_size = fscanf(fi, "%d", &bptt_block);
    } else {
      bptt_block = 10;
    }
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &vocab_size);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &class_size);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &old_classes);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &independent);
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%lf", &d);
    starting_alpha = d;
    goToDelimiter(':', fi);
    if (alpha_set == 0) {
      unused_size = fscanf(fi, "%lf", &d);
      alpha = d;
    } else {
      unused_size = fscanf(fi, "%lf", &d);
    }
    goToDelimiter(':', fi);
    unused_size = fscanf(fi, "%d", &alpha_divide);
  
    // read normal vocabulary
    if (vocab_max_size < vocab_size) {
      if (vocab != NULL) free(vocab);
      vocab_max_size = vocab_size + 1000;
      // initialize memory for vocabulary
      vocab = (struct vocab_word *)calloc(vocab_max_size,
                                          sizeof(struct vocab_word));
    }
    goToDelimiter(':', fi);
    for (a = 0; a < vocab_size; a++) {
      // unused_size = fscanf(fi, "%d%d%s%d", &b, &vocab[a].cn,
      // vocab[a].word, &vocab[a].class_index);
      unused_size = fscanf(fi, "%d%d", &b, &vocab[a].cn);
      readWord(vocab[a].word, fi);
      unused_size = fscanf(fi, "%d", &vocab[a].class_index);
      // printf("%d  %d  %s  %d
  ", b, vocab[a].cn,
      // vocab[a].word, vocab[a].class_index);
    }
    if (neu0 == NULL) initNet();    // memory allocation here
  
    if (filetype == TEXT) {
      goToDelimiter(':', fi);
      for (a = 0; a < layer1_size; a++) {
        unused_size = fscanf(fi, "%lf", &d);
        neu1[a].ac = d;
      }
    }
    if (filetype == BINARY) {
      fgetc(fi);
      for (a = 0; a < layer1_size; a++) {
        unused_size = fread(&fl, 4, 1, fi);
        neu1[a].ac = fl;
      }
    }
    if (filetype == TEXT) {
      goToDelimiter(':', fi);
      for (b = 0; b < layer1_size; b++) {
        for (a = 0; a < layer0_size; a++) {
          unused_size = fscanf(fi, "%lf", &d);
          syn0[a + b * layer0_size].weight = d;
        }
      }
    }
    if (filetype == BINARY) {
      for (b = 0; b < layer1_size; b++) {
        for (a = 0; a < layer0_size; a++) {
          unused_size = fread(&fl, 4, 1, fi);
          syn0[a + b * layer0_size].weight = fl;
        }
      }
    }
    if (filetype == TEXT) {
      goToDelimiter(':', fi);
      if (layerc_size == 0) {  // no compress layer
        for (b = 0; b < layer2_size; b++) {
          for (a = 0; a < layer1_size; a++) {
            unused_size = fscanf(fi, "%lf", &d);
            syn1[a + b * layer1_size].weight = d;
          }
        }
      } else {        // with compress layer
        for (b = 0; b < layerc_size; b++) {
          for (a = 0; a < layer1_size; a++) {
            unused_size = fscanf(fi, "%lf", &d);
            syn1[a + b * layer1_size].weight = d;
          }
        }
  
        goToDelimiter(':', fi);
  
        for (b = 0; b < layer2_size; b++) {
          for (a = 0; a < layerc_size; a++) {
            unused_size = fscanf(fi, "%lf", &d);
            sync[a + b * layerc_size].weight = d;
          }
        }
      }
    }
    if (filetype == BINARY) {
      if (layerc_size == 0) {  // no compress layer
        for (b = 0; b < layer2_size; b++) {
          for (a = 0; a < layer1_size; a++) {
            unused_size = fread(&fl, 4, 1, fi);
            syn1[a + b * layer1_size].weight = fl;
          }
        }
      } else {        // with compress layer
        for (b = 0; b < layerc_size; b++) {
          for (a = 0; a < layer1_size; a++) {
            unused_size = fread(&fl, 4, 1, fi);
            syn1[a + b * layer1_size].weight = fl;
          }
        }
  
        for (b = 0; b < layer2_size; b++) {
          for (a = 0; a < layerc_size; a++) {
            unused_size = fread(&fl, 4, 1, fi);
            sync[a + b * layerc_size].weight = fl;
          }
        }
      }
    }
    if (filetype == TEXT) {
      goToDelimiter(':', fi);    // direct conenctions
      long long aa;
      for (aa = 0; aa < direct_size; aa++) {
        unused_size = fscanf(fi, "%lf", &d);
        syn_d[aa] = d;
      }
    }
    if (filetype == BINARY) {
      long long aa;
      for (aa = 0; aa < direct_size; aa++) {
        unused_size = fread(&fl, 4, 1, fi);
        syn_d[aa] = fl;
  
        /*unused_size = fread(&si, 2, 1, fi);
          fl = si/(float)(4*256);
          syn_d[aa] = fl;*/
      }
    }
  
    saveWeights();
  
    // idiom to "use" an unused variable
    (void) unused_size;
  
    fclose(fi);
  }
  
  void CRnnLM::netReset() {  // cleans hidden layer activation + bptt history
    int a, b;
  
    for (a = 0; a < layer1_size; a++) {
      neu1[a].ac = 1.0;
    }
  
    copyHiddenLayerToInput();
  
    if (bptt > 0) {
      for (a = 1; a < bptt + bptt_block; a++) {
        bptt_history[a] = 0;
      }
      for (a = bptt + bptt_block - 1; a > 1; a--) {
        for (b = 0; b < layer1_size; b++) {
          bptt_hidden[a * layer1_size + b].ac = 0;
          bptt_hidden[a * layer1_size + b].er = 0;
        }
      }
    }
  
    for (a = 0; a < MAX_NGRAM_ORDER; a++) {
      history[a] = 0;
    }
  }
  
  void CRnnLM::matrixXvector(struct neuron *dest, struct neuron *srcvec,
                             struct synapse *srcmatrix, int matrix_width,
                             int from, int to, int from2, int to2, int type) {
    int a, b;
    real val1, val2, val3, val4;
    real val5, val6, val7, val8;
  
    if (type == 0) {    // ac mod
      for (b = 0; b < (to - from) / 8; b++) {
        val1 = 0;
        val2 = 0;
        val3 = 0;
        val4 = 0;
  
        val5 = 0;
        val6 = 0;
        val7 = 0;
        val8 = 0;
  
        for (a = from2; a < to2; a++) {
          val1 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 0) * matrix_width].weight;
          val2 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 1) * matrix_width].weight;
          val3 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 2) * matrix_width].weight;
          val4 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 3) * matrix_width].weight;
  
          val5 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 4) * matrix_width].weight;
          val6 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 5) * matrix_width].weight;
          val7 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 6) * matrix_width].weight;
          val8 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 7) * matrix_width].weight;
        }
        dest[b * 8 + from + 0].ac += val1;
        dest[b * 8 + from + 1].ac += val2;
        dest[b * 8 + from + 2].ac += val3;
        dest[b * 8 + from + 3].ac += val4;
  
        dest[b * 8 + from + 4].ac += val5;
        dest[b * 8 + from + 5].ac += val6;
        dest[b * 8 + from + 6].ac += val7;
        dest[b * 8 + from + 7].ac += val8;
      }
  
      for (b = b * 8; b < to - from; b++) {
        for (a = from2; a < to2; a++) {
          dest[b+from].ac +=
              srcvec[a].ac * srcmatrix[a + (b + from) * matrix_width].weight;
        }
      }
    } else {    // er mod
      for (a = 0; a < (to2 - from2) / 8; a++) {
        val1 = 0;
        val2 = 0;
        val3 = 0;
        val4 = 0;
  
        val5 = 0;
        val6 = 0;
        val7 = 0;
        val8 = 0;
  
        for (b = from; b < to; b++) {
          val1 += srcvec[b].er * srcmatrix[a * 8 + from2 + 0 + b * matrix_width].weight;
          val2 += srcvec[b].er * srcmatrix[a * 8 + from2 + 1 + b * matrix_width].weight;
          val3 += srcvec[b].er * srcmatrix[a * 8 + from2 + 2 + b * matrix_width].weight;
          val4 += srcvec[b].er * srcmatrix[a * 8 + from2 + 3 + b * matrix_width].weight;
  
          val5 += srcvec[b].er * srcmatrix[a * 8 + from2 + 4 + b * matrix_width].weight;
          val6 += srcvec[b].er * srcmatrix[a * 8 + from2 + 5 + b * matrix_width].weight;
          val7 += srcvec[b].er * srcmatrix[a * 8 + from2 + 6 + b * matrix_width].weight;
          val8 += srcvec[b].er * srcmatrix[a * 8 + from2 + 7 + b * matrix_width].weight;
        }
        dest[a * 8 + from2 + 0].er += val1;
        dest[a * 8 + from2 + 1].er += val2;
        dest[a * 8 + from2 + 2].er += val3;
        dest[a * 8 + from2 + 3].er += val4;
  
        dest[a * 8 + from2 + 4].er += val5;
        dest[a * 8 + from2 + 5].er += val6;
        dest[a * 8 + from2 + 6].er += val7;
        dest[a * 8 + from2 + 7].er += val8;
      }
  
      for (a = a * 8; a < to2 - from2; a++) {
        for (b = from; b < to; b++) {
          dest[a + from2].er
              += srcvec[b].er * srcmatrix[a + from2 + b * matrix_width].weight;
        }
      }
  
      if (gradient_cutoff > 0)
        for (a = from2; a < to2; a++) {
          if (dest[a].er > gradient_cutoff) dest[a].er = gradient_cutoff;
          if (dest[a].er < -gradient_cutoff) dest[a].er = -gradient_cutoff;
        }
    }
  
    // this is normal implementation (about 3x slower):
  
    /*if (type == 0) {    //ac mod
      for (b = from; b < to; b++) {
      for (a = from2; a < to2; a++) {
      dest[b].ac += srcvec[a].ac * srcmatrix[a+b*matrix_width].weight;
      }
      }
      }
      else     //er mod
      if (type == 1) {
      for (a = from2; a < to2; a++) {
      for (b = from; b < to; b++) {
      dest[a].er += srcvec[b].er * srcmatrix[a+b*matrix_width].weight;
      }
      }
      }*/
  }
  
  void CRnnLM::computeNet(int last_word, int word) {
    int a, b, c;
    real val;
    double sum;   // sum is used for normalization: it's better to have larger
                  // precision as many numbers are summed together here
  
    if (last_word != -1) neu0[last_word].ac = 1;
  
    // propagate 0->1
    for (a = 0; a < layer1_size; a++) {
      neu1[a].ac = 0;
    }
    for (a = 0; a < layerc_size; a++) {
      neuc[a].ac = 0;
    }
  
    matrixXvector(neu1, neu0, syn0, layer0_size, 0, layer1_size,
                  layer0_size - layer1_size, layer0_size, 0);
  
    for (b = 0; b < layer1_size; b++) {
      a = last_word;
      if (a != -1) neu1[b].ac += neu0[a].ac * syn0[a + b * layer0_size].weight;
    }
  
    // activate 1      --sigmoid
    for (a = 0; a < layer1_size; a++) {
      if (neu1[a].ac > 50) neu1[a].ac = 50;    // for numerical stability
      if (neu1[a].ac < -50) neu1[a].ac = -50;  // for numerical stability
      val = -neu1[a].ac;
      neu1[a].ac = 1 / (1 + FAST_EXP(val));
    }
  
    if (layerc_size > 0) {
      matrixXvector(neuc, neu1, syn1, layer1_size,
                    0, layerc_size, 0, layer1_size, 0);
      // activate compression      --sigmoid
      for (a = 0; a < layerc_size; a++) {
        if (neuc[a].ac > 50) neuc[a].ac = 50;    // for numerical stability
        if (neuc[a].ac < -50) neuc[a].ac = -50;  // for numerical stability
        val = -neuc[a].ac;
        neuc[a].ac = 1 / (1 + FAST_EXP(val));
      }
    }
  
    // 1->2 class
    for (b = vocab_size; b < layer2_size; b++) {
      neu2[b].ac = 0;
    }
  
    if (layerc_size > 0) {
      matrixXvector(neu2, neuc, sync, layerc_size,
                    vocab_size, layer2_size, 0, layerc_size, 0);
    } else {
      matrixXvector(neu2, neu1, syn1, layer1_size,
                    vocab_size, layer2_size, 0, layer1_size, 0);
    }
  
    // apply direct connections to classes
    if (direct_size > 0) {
      unsigned long long hash[MAX_NGRAM_ORDER];
      // this will hold pointers to syn_d that contains hash parameters
  
      for (a = 0; a < direct_order; a++) {
        hash[a] = 0;
      }
  
      for (a = 0; a < direct_order; a++) {
        b = 0;
        if (a > 0) if (history[a - 1] == -1) break;
        // if OOV was in history, do not use this N-gram feature and higher orders
        hash[a] = PRIMES[0] * PRIMES[1];
  
        for (b = 1; b <= a; b++) {
          hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE]
              * static_cast<unsigned long long>(history[b - 1] + 1);
        }
        // update hash value based on words from the history
  
        hash[a] = hash[a] % (direct_size / 2);
        // make sure that starting hash index is in the first
        // half of syn_d (second part is reserved for history->words features)
      }
  
      for (a = vocab_size; a < layer2_size; a++) {
        for (b = 0; b < direct_order; b++) {
          if (hash[b]) {
            neu2[a].ac += syn_d[hash[b]];
            // apply current parameter and move to the next one
  
            hash[b]++;
          } else {
            break;
          }
        }
      }
    }
  
    // activation 2   --softmax on classes
    sum = 0;
    for (a = vocab_size; a < layer2_size; a++) {
      if (neu2[a].ac > 50) neu2[a].ac = 50;    // for numerical stability
      if (neu2[a].ac < -50) neu2[a].ac = -50;  // for numerical stability
      val = FAST_EXP(neu2[a].ac);
      sum+= val;
      neu2[a].ac = val;
    }
    for (a = vocab_size; a < layer2_size; a++) {
      neu2[a].ac /= sum;
    }
    // output layer activations now sum exactly to 1
  
    if (gen > 0) return;  // if we generate words, we don't know what current word
                          // is -> only classes are estimated and word is selected
                          // in testGen()
  
  
    // 1->2 word
    if (word != -1) {
      for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
        neu2[class_words[vocab[word].class_index][c]].ac = 0;
      }
      if (layerc_size > 0) {
        matrixXvector(neu2, neuc, sync, layerc_size,
                      class_words[vocab[word].class_index][0],
                      class_words[vocab[word].class_index][0]
                      + class_cn[vocab[word].class_index],
                      0, layerc_size, 0);
      } else {
        matrixXvector(neu2, neu1, syn1, layer1_size,
                      class_words[vocab[word].class_index][0],
                      class_words[vocab[word].class_index][0]
                      + class_cn[vocab[word].class_index],
                      0, layer1_size, 0);
      }
    }
  
    // apply direct connections to words
    if (word != -1) if (direct_size > 0) {
      unsigned long long  hash[MAX_NGRAM_ORDER];
  
      for (a = 0; a < direct_order; a++) {
        hash[a] = 0;
      }
  
      for (a = 0; a < direct_order; a++) {
        b = 0;
        if (a > 0) if (history[a - 1] == -1) break;
        hash[a] =
            PRIMES[0] * PRIMES[1] *
            static_cast<unsigned long long>(vocab[word].class_index + 1);
  
        for (b = 1; b <= a; b++) {
          hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE]
              * static_cast<unsigned long long>(history[b - 1] + 1);
        }
        hash[a] = (hash[a] % (direct_size / 2)) + (direct_size) / 2;
      }
  
      for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
        a = class_words[vocab[word].class_index][c];
  
        for (b = 0; b < direct_order; b++) if (hash[b]) {
          neu2[a].ac += syn_d[hash[b]];
          hash[b]++;
          hash[b] = hash[b] % direct_size;
        } else {
          break;
        }
      }
    }
  
    // activation 2   --softmax on words
    sum = 0;
    if (word != -1) {
      for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
        a = class_words[vocab[word].class_index][c];
        if (neu2[a].ac > 50) neu2[a].ac = 50;    // for numerical stability
        if (neu2[a].ac < -50) neu2[a].ac = -50;  // for numerical stability
        val = FAST_EXP(neu2[a].ac);
        sum+= val;
        neu2[a].ac = val;
      }
      for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
        neu2[class_words[vocab[word].class_index][c]].ac /= sum;
      }
    }
  }
  
  void CRnnLM::copyHiddenLayerToInput() {
    int a;
  
    for (a = 0; a < layer1_size; a++) {
      neu0[a + layer0_size - layer1_size].ac = neu1[a].ac;
    }
  }
  
  void CRnnLM::restoreContextFromVector(const std::vector <float> &context_in) {
    assert(context_in.size() == layer1_size);
    for (int i = 0; i  <  layer1_size; ++i) {
      neu1[i].ac = context_in[i];
    }
  }
  
  void CRnnLM::saveContextToVector(std::vector <float> *context_out) {
    assert(context_out != NULL);
    context_out->resize(layer1_size);
    for (int i = 0; i  <  layer1_size; ++i) {
      (*context_out)[i] = neu1[i].ac;
    }
  }
  
  float CRnnLM::computeConditionalLogprob(
      std::string current_word,
      const std::vector < std::string >  &history_words,
      const std::vector < float >  &context_in,
      std::vector < float >  *context_out) {
    // We assume the network has been restored.
    netReset();
    restoreContextFromVector(context_in);
    copyHiddenLayerToInput();
  
    // Maps unk to the unk symbol.
    std::vector <std::string>  history_words_nounk(history_words);
    std::string current_word_nounk = current_word;
    if (isUnk(current_word_nounk)) {
      current_word_nounk = unk_sym;
    }
    for (int i = 0; i < history_words_nounk.size(); ++i) {
      if (isUnk(history_words_nounk[i])) {
        history_words_nounk[i] = unk_sym;
      }
    }
  
    // Handles history for n-gram features.
    for (int i = 0; i < MAX_NGRAM_ORDER; i++) {
      history[i] = 0;
    }
    for (int i = 0; i < history_words_nounk.size() && i < MAX_NGRAM_ORDER; i++) {
      history[i] = searchVocab(
          history_words_nounk[history_words_nounk.size() - 1 - i].c_str());
    }
  
    int word = 0, last_word = 0;
    float logprob = 0;
    if (current_word_nounk == unk_sym) {
      logprob += getUnkPenalty(current_word);
    }
    word = searchVocab(current_word_nounk.c_str());
    if (history_words_nounk.size() > 0) {
      last_word = searchVocab(
          history_words_nounk[history_words_nounk.size() - 1].c_str());
    }
    computeNet(last_word, word);
  
    if (word != -1) {
      logprob +=
          log(neu2[vocab[word].class_index + vocab_size].ac * neu2[word].ac);
    } else {
      logprob += -16.118;
    }
  
    if (context_out != NULL) {
      saveContextToVector(context_out);
    }
  
    if (last_word != -1) {
      neu0[last_word].ac = 0;
    }
  
    return logprob;
  }
  
  bool CRnnLM::isUnk(const std::string &word) {
    int word_int = searchVocab(word.c_str());
    if (word_int == -1)
      return true;
    return false;
  }
  
  void CRnnLM::setUnkSym(const std::string &unk) {
    unk_sym = unk;
  }
  
  float CRnnLM::getUnkPenalty(const std::string &word) {
    unordered_map <std::string, float>::const_iterator iter  =
        unk_penalty.find(word);
    if (iter != unk_penalty.end())
      return iter->second;
    return -16.118;  // Fixed penalty.
  }
  
  void CRnnLM::setUnkPenalty(const std::string &filename) {
    if (filename.empty())
      return;
    kaldi::SequentialBaseFloatReader unk_reader(filename);
    for (; !unk_reader.Done(); unk_reader.Next()) {
      std::string key = unk_reader.Key();
      float prob = unk_reader.Value();
      unk_reader.FreeCurrent();
      unk_penalty[key] = log(prob);
    }
  }
  
  }  // namespace rnnlm