// nnet3/nnet-nnet.h // Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey) // 2016 Daniel Galvez // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_NNET3_NNET_NNET_H_ #define KALDI_NNET3_NNET_NNET_H_ #include "base/kaldi-common.h" #include "util/kaldi-io.h" #include "matrix/matrix-lib.h" #include "nnet3/nnet-common.h" #include "nnet3/nnet-component-itf.h" #include "nnet3/nnet-descriptor.h" #include #include #include #include namespace kaldi { namespace nnet3 { /// This enum is for a kind of annotation we associate with output nodes of the /// network; it's for the convenience of calling code so that if the objective /// is one of a few standard types, we can compute it directly and know how to /// interpret the supervision labels. However, the core of the framework never /// makes use of the objective types, other than making them available to /// calling code which then supplies the derivatives. /// - Objective type kLinear is intended for Neural nets where the final /// component is a LogSoftmaxComponent, so the log-prob (negative /// cross-entropy) objective is just a linear function of the input. /// - Objective type kQuadratic is used to mean the objective function /// f(x, y) = -0.5 (x-y).(x-y), which is to be maximized, as in the kLinear /// case. enum ObjectiveType { kLinear, kQuadratic }; enum NodeType { kInput, kDescriptor, kComponent, kDimRange, kNone }; /// NetworkNode is used to represent, three types of thing: either an input of the /// network (which pretty much just states the dimension of the input vector); /// a Component (e.g. an affine component or a sigmoid component); or a Descriptor. /// A Descriptor is basically an expression that can do things like append /// the outputs of other components (or inputs) together, add them together, and /// do various other things like shifting the time index. /// /// Each Component must have an input of type kDescriptor that is numbered /// Preceding to the Component, and that is not used elsewhere. This may seem /// unintuitive but it makes the implementation a lot easier; any apparent waste /// can be optimized out after compilation. And outputs must also be of type /// kDescriptor. /// /// Note: in the actual computation you can provide input not only to nodes of /// type kInput but also to nodes of type kComponent; this is useful in things /// like recurrent nets where you may want to split the computation up into /// pieces. /// /// Note that in the config-file format, there are three types of node: input, /// component and output. output maps to kDescriptor, but the nodes of type /// kDescriptor that represent the input to a component, are described in the /// same config-file line as the Component itself. struct NetworkNode { NodeType node_type; // "descriptor" is relevant only for nodes of type kDescriptor. Descriptor descriptor; union { // For kComponent, the index into Nnet::components_ int32 component_index; // for kDimRange, the node-index of the input node, which must be of // type kComponent or kInput. int32 node_index; // for nodes of type kDescriptor that are output nodes (i.e. not followed by // a node of type kComponents), the objective function associated with the // output. The core parts of the nnet code just ignore; it is required only // for the information of the calling code, which is perfectly free to // ignore it. View it as a kind of annotation. ObjectiveType objective_type; } u; // for kInput, the dimension of the input feature. For kDimRange, the dimension // of the output (i.e. the length of the range) int32 dim; // for kDimRange, the dimension of the offset into the input component's feature. int32 dim_offset; int32 Dim(const Nnet &nnet) const; // Dimension that this node outputs. NetworkNode(NodeType nt = kNone): node_type(nt), dim(-1), dim_offset(-1) { u.component_index = -1; } NetworkNode(const NetworkNode &other); // copy constructor. // use default assignment operator }; class Nnet { public: // This function can be used either to initialize a new Nnet from a config // file, or to add to an existing Nnet, possibly replacing certain parts of // it. It will die with error if something went wrong. // Also see the function ReadEditConfig() in nnet-utils.h (it's made a // non-member because it doesn't need special access). void ReadConfig(std::istream &config_file); int32 NumComponents() const { return components_.size(); } int32 NumNodes() const { return nodes_.size(); } /// Return component indexed c. Not a copy; not owned by caller. Component *GetComponent(int32 c); /// Return component indexed c (const version). Not a copy; not owned by /// caller. const Component *GetComponent(int32 c) const; /// Replace the component indexed by c with a new component. /// Frees previous component indexed by c. Takes ownership of /// the pointer 'component'. void SetComponent(int32 c, Component *component); /// Adds a new component with the given name, which should not be the same as /// any existing component name. Returns the new component index. Takes /// ownership of the pointer 'component'. int32 AddComponent(const std::string &name, Component *component); /// returns const reference to a particular numbered network node. const NetworkNode &GetNode(int32 node) const { KALDI_ASSERT(node >= 0 && node < nodes_.size()); return nodes_[node]; } /// Non-const accessor for the node... use with extreme caution. NetworkNode &GetNode(int32 node) { KALDI_ASSERT(node >= 0 && node < nodes_.size()); return nodes_[node]; } /// Returns true if this is a component node, meaning that it is of type /// kComponent. bool IsComponentNode(int32 node) const; /// Returns true if this is a dim-range node, meaning that it is of type /// kDimRange. bool IsDimRangeNode(int32 node) const; /// Returns true if this is an output node, meaning that it is of type /// kInput. bool IsInputNode(int32 node) const; /// Returns true if this is a descriptor node, meaning that it is of type /// kDescriptor. Exactly one of IsOutput or IsComponentInput will also /// apply. bool IsDescriptorNode(int32 node) const; /// Returns true if this is an output node, meaning that it is of type kDescriptor /// and is not directly followed by a node of type kComponent. bool IsOutputNode(int32 node) const; /// Returns true if this is component-input node, i.e. a node of type kDescriptor /// that immediately precedes a node of type kComponent. bool IsComponentInputNode(int32 node) const; /// returns vector of node names (needed by some parsing code, for instance). const std::vector &GetNodeNames() const; /// returns individual node name. const std::string &GetNodeName(int32 node_index) const; /// This can be used to modify invidual node names. Note, this does not /// affect the neural net structure at all, it just assigns a new /// name to an existing node while leaving all connections identical. void SetNodeName(int32 node_index, const std::string &new_name); /// returns vector of component names (needed by some parsing code, for instance). const std::vector &GetComponentNames() const; /// returns individual component name. const std::string &GetComponentName(int32 component_index) const; /// returns index associated with this node name, or -1 if no such index. int32 GetNodeIndex(const std::string &node_name) const; /// returns index associated with this component name, or -1 if no such index. int32 GetComponentIndex(const std::string &node_name) const; // This convenience function returns the dimension of the input with name // "input_name" (e.g. input_name="input" or "ivector"), or -1 if there is no // such input. int32 InputDim(const std::string &input_name) const; // This convenience function returns the dimension of the output with // name "input_name" (e.g. output_name="input"), or -1 if there is // no such input. int32 OutputDim(const std::string &output_name) const; void Read(std::istream &istream, bool binary); void Write(std::ostream &ostream, bool binary) const; /// Checks the neural network for validity (dimension matches and various /// other requirements). /// You can call this with warn_for_orphans = false to disable the warnings /// that are printed if orphan nodes or components exist. void Check(bool warn_for_orphans = true) const; /// returns some human-readable information about the network, mostly for /// debugging purposes. /// Also see function NnetInfo() in nnet-utils.h, which prints out more /// extensive infoformation. std::string Info() const; /// [Relevant for clockwork RNNs and similar]. Computes the smallest integer /// n >=1 such that the neural net's behavior will be the same if we shift the /// input and output's time indexes (t) by integer multiples of n. Does this /// by computing the lcm of all the moduli of the Descriptors in the network. int32 Modulus() const; ~Nnet() { Destroy(); } // Default constructor Nnet() { } // Copy constructor Nnet(const Nnet &nnet); Nnet *Copy() const { return new Nnet(*this); } void Swap(Nnet *other); // Assignment operator Nnet& operator =(const Nnet &nnet); // Removes nodes that are never needed to compute any output. void RemoveOrphanNodes(bool remove_orphan_inputs = false); // Removes components that are not used by any node. void RemoveOrphanComponents(); // Removes some nodes. This is not to be called without a lot of thought, // as it could ruin the graph structure if done carelessly. void RemoveSomeNodes(const std::vector &nodes_to_remove); void ResetGenerators(); // resets random-number generators for all // random components. You must call srand() prior to this call, for this to // be effective. // This function outputs to "config_lines" the lines of a config file. If you // provide include_dim=false, this will enable you to reconstruct the nodes in // the network (but not the components, which need to be written separately). // If you provide include_dim=true, it also adds extra information about // node dimensions which is useful for a human reader but won't be // accepted as the config-file format. void GetConfigLines(bool include_dim, std::vector *config_lines) const; private: void Destroy(); // This function returns as a string the contents of a line of a config-file // corresponding to the node indexed "node_index", which must not be of type // kComponentInput. If include_dim=false, it appears in the same format as it // would appear in a line of a config-file; if include_dim=true, we also // include dimension information that would not be provided in a config file. std::string GetAsConfigLine(int32 node_index, bool include_dim) const; // This function is used when reading config files; it exists in order to // handle replacement of existing nodes. The two input vectors have the same // size. Its job is to remove redundant lines that do not have "component" as // first_token, and where two lines have a configuration value name=xxx in the // config with the same name. In this case it removes the first of the two, // but that first one must have index less than num_lines_initial, else it is // an error. // This function also checks that all lines have a config name=xxx, that // IsValidName(xxx) is true, and that there are no two lines with "component" // as the first token and with the same config name=xxx. Note: here, "name" // means literally "name", but "xxx" stands in for the actual name, // e.g. "my-funky-component." static void RemoveRedundantConfigLines(int32 num_lines_initial, std::vector *config_lines); void ProcessComponentConfigLine(int32 initial_num_components, ConfigLine *config); void ProcessComponentNodeConfigLine(int32 pass, ConfigLine *config); void ProcessInputNodeConfigLine(ConfigLine *config); void ProcessOutputNodeConfigLine(int32 pass, ConfigLine *config); void ProcessDimRangeNodeConfigLine(int32 pass, ConfigLine *config); // This function output to "modified_node_names" a modified copy of // node_names_, in which all nodes which are not of type kComponent, kInput or // kDimRange are replaced with the string "***". This is useful when parsing // Descriptors, to avoid inadvertently accepting nodes of invalid types where // they are not allowed. void GetSomeNodeNames(std::vector *modified_node_names) const; // the names of the components of the network. Note, these may be distinct // from the network node names below (and live in a different namespace); the // same component may be used in multiple network nodes, to define parameter // sharing. std::vector component_names_; // the components of the nnet, in arbitrary order. The network topology is // defined separately, below; a given Component may appear more than once in // the network if necessary for parameter tying. std::vector components_; // names of network nodes, i.e. inputs, components and outputs, used only in // reading and writing code. Indexed by network-node index. Note, // components' names are always listed twice, once as foo-input and once as // foo, because the input to a component always gets its own NetworkNode index. std::vector node_names_; // the network nodes of the network. std::vector nodes_; }; } // namespace nnet3 } // namespace kaldi #endif