nnet-general-component.h 45 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

1001

1002

1003

1004

1005

1006

1007

1008

1009

1010

1011

1012

1013

1014

1015

1016

1017

1018

1019

1020

1021

1022

1023

1024

1025

1026

1027

1028

1029

1030

1031

1032

1033

1034

1035

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046

1047

1048

1049

1050

1051

1052

1053

1054

1055

1056

1057

1058

1059

1060

1061

1062

1063

1064

1065

1066

1067

1068

1069

1070

1071

1072

1073

1074

1075

1076

1077

1078

1079

1080

1081

1082

1083

1084

1085

1086

1087

1088

1089

1090

1091

1092

1093

1094

1095

1096

1097

1098

1099

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110


// nnet3/nnet-general-component.h

// Copyright      2015  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_NNET3_NNET_GENERAL_COMPONENT_H_
#define KALDI_NNET3_NNET_GENERAL_COMPONENT_H_

#include "nnet3/nnet-common.h"
#include "nnet3/nnet-component-itf.h"
#include "nnet3/natural-gradient-online.h"
#include <iostream>

namespace kaldi {
namespace nnet3 {

/// @file  nnet-general-component.h
/// This file contains declarations of components that are not "simple",
///   meaning they care about the indexes they are operating on, don't return
///   the kSimpleComponent flag in their Properties(), and may return a different
///   number of outputs than inputs.
///   Also see nnet-convolutional-component.h, which also contains
///   number of convolution-related 'general' components.


/**
   This Component takes a larger input-dim than output-dim, where the input-dim
   must be a multiple of the output-dim, and distributes different blocks of the
   input dimension to different 'x' values.  In the normal case where the input
   is only valid at x=0, the first block of output goes to x=0, the second block
   at x=1, and so on.  It also supports a more general usage, so in general a
   value 'x' at the output will map to block 'x % n_blocks' of the dimension
   blocks of the input, and to an x value 'x / n_blocks' of the input.  For negative
   x values the % and / operations are always rounded down, not towards zero.

   The config line is of the form
     input-dim=xx output-dim=xx
   where input-dim must be a multiple of the output-dim, and n_blocks is
   set to input-dim / output-dim.
   */
class DistributeComponent: public Component {
 public:
  DistributeComponent(int32 input_dim, int32 output_dim) {
    Init(input_dim, output_dim);
  }
  DistributeComponent(): input_dim_(0), output_dim_(0) { }
  virtual int32 InputDim() const { return input_dim_; }
  virtual int32 OutputDim() const { return output_dim_; }

  // use the default Info() function.
  virtual void InitFromConfig(ConfigLine *cfl);
  virtual std::string Type() const { return "DistributeComponent"; }
  virtual int32 Properties() const { return 0; }
  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in,
                         CuMatrixBase<BaseFloat> *out) const;
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &in_value,
                        const CuMatrixBase<BaseFloat> &out_value,
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *, // to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const;

  virtual void Read(std::istream &is, bool binary); // This Read function
  // requires that the Component has the correct type.

  /// Write component to stream
  virtual void Write(std::ostream &os, bool binary) const;
  virtual Component* Copy() const {
    return new DistributeComponent(input_dim_, output_dim_);
  }


  // Some functions that are only to be reimplemented for GeneralComponents.
  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
                               const Index &output_index,
                               std::vector<Index> *desired_indexes) const;

  // This function returns true if at least one of the input indexes used to
  // compute this output index is computable.
  virtual bool IsComputable(const MiscComputationInfo &misc_info,
                            const Index &output_index,
                            const IndexSet &input_index_set,
                            std::vector<Index> *used_inputs) const;

  virtual ComponentPrecomputedIndexes* PrecomputeIndexes(
      const MiscComputationInfo &misc_info,
      const std::vector<Index> &input_indexes,
      const std::vector<Index> &output_indexes,
      bool need_backprop) const;

  // Some functions that are specific to this class.
  void Init(int32 input_dim, int32 output_dim);
 private:
  // computes the input index corresponding to a particular output index.
  // if block != NULL, also computes which block of the input this corresponds to.
  inline void ComputeInputIndexAndBlock(const Index &output_index,
                                        Index *input_index,
                                        int32 *block) const;
  inline void ComputeInputPointers(
      const ComponentPrecomputedIndexes *indexes,
      const CuMatrixBase<BaseFloat> &in,
      int32 num_output_rows,
      std::vector<const BaseFloat*> *input_pointers) const;
  // non-const version of the above.
  inline void ComputeInputPointers(
      const ComponentPrecomputedIndexes *indexes,
      int32 num_output_rows,
      CuMatrixBase<BaseFloat> *in,
      std::vector<BaseFloat*> *input_pointers) const;
  int32 input_dim_;
  int32 output_dim_;

};

class DistributeComponentPrecomputedIndexes:
      public ComponentPrecomputedIndexes {
 public:

  // each pair is a pair (row, dim_offset), and by
  // computing (input.Data() + row * input.Stride() + dim_offset)
  // we get an address that points to the correct input location.
  std::vector<std::pair<int32, int32> > pairs;

  // this class has a virtual destructor so it can be deleted from a pointer
  // to ComponentPrecomputedIndexes.
  virtual ~DistributeComponentPrecomputedIndexes() { }

  virtual ComponentPrecomputedIndexes* Copy() const {
    return new DistributeComponentPrecomputedIndexes(*this);
  }

  virtual void Write(std::ostream &ostream, bool binary) const;

  virtual void Read(std::istream &istream, bool binary);

  virtual std::string Type() const { return "DistributeComponentPrecomputedIndexes"; }
};

/*
  Class StatisticsExtractionComponent is used together with
  StatisticsPoolingComponent to extract moving-average mean and
  standard-deviation statistics.

  StatisticsExtractionComponent is designed to extract statistics-- 0th-order,
  1st-order and optionally diagonal 2nd-order stats-- from small groups of
  frames, such as 10 frames.  The statistics will then be further processed by
  StatisticsPoolingComponent to compute moving-average means and (if configured)
  standard deviations.  The reason for the two-component way of doing this is
  efficiency, particularly in the graph-compilation phase.  (Otherwise there
  would be too many dependencies to process).  The StatisticsExtractionComponent
  is designed to let you extract statistics from fixed-size groups of frames
  (e.g. 10 frames), and in StatisticsPoolingComponent you are only expected to
  compute the averages at the same fixed period (e.g. 10 frames), so it's more
  efficient than if you were to compute a moving average at every single frame;
  and the computation of the intermediate stats means that most of the
  computation that goes into extracting the means and standard deviations for
  nearby frames is shared.

  The config line in a typical setup will be something like:

    input-dim=250 input-period=1 output-period=10 include-variance=true

  input-dim is self-explanatory.  The inputs will be obtained at multiples of
  input-period (e.g. it might be 3 for chain models).  output-period must be a
  multiple of input period, and the requested output indexes will be expected to
  be multiples of output-period (which you can ensure through use of the Round
  descriptor).  For instance, if you request the output on frame 80, it will
  consist of stats from input frames 80 through 89.

  An output of this component will be 'computable' any time at least one of
  the corresponding inputs is computable.

  In all cases the first dimension of the output will be a count (between 1 and
  10 inclusive in this example).  If include-variance=false, then the output
  dimension will be input-dim + 1.  and the output dimensions >0 will be
  1st-order statistics (sums of the input).  If include-variance=true, then the
  output dimension will be input-dim * 2 + 1, where the raw diagonal 2nd-order
  statistics are appended to the 0 and 1st order statistics.

  The default configuration values are:
     input-dim=-1 input-period=1 output-period=1 include-variance=true
 */
class StatisticsExtractionComponent: public Component {
 public:
  // Initializes to defaults which would not pass Check(); use InitFromConfig()
  // or Read() or copy constructor to really initialize.
  StatisticsExtractionComponent();
  // copy constructor, used in Copy().
  StatisticsExtractionComponent(const StatisticsExtractionComponent &other);

  virtual int32 InputDim() const { return input_dim_; }
  virtual int32 OutputDim() const {
    // count + sum stats [ + sum-squared stats].
    return 1 + input_dim_ + (include_variance_ ? input_dim_ : 0);
  }
  virtual void InitFromConfig(ConfigLine *cfl);
  virtual std::string Type() const { return "StatisticsExtractionComponent"; }
  virtual int32 Properties() const {
    return kPropagateAdds|kReordersIndexes|
        (include_variance_ ? kBackpropNeedsInput : 0);
  }
  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in,
                         CuMatrixBase<BaseFloat> *out) const;
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &in_value,
                        const CuMatrixBase<BaseFloat> &out_value,
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *, // to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const;

  virtual void Read(std::istream &is, bool binary); // This Read function
  // requires that the Component has the correct type.

  /// Write component to stream
  virtual void Write(std::ostream &os, bool binary) const;
  virtual Component* Copy() const {
    return new StatisticsExtractionComponent(*this);
  }

  // Some functions that are only to be reimplemented for GeneralComponents.
  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
                               const Index &output_index,
                               std::vector<Index> *desired_indexes) const;

  virtual bool IsComputable(const MiscComputationInfo &misc_info,
                            const Index &output_index,
                            const IndexSet &input_index_set,
                            std::vector<Index> *used_inputs) const;

  // This function reorders the input and output indexes so that they
  // are sorted first on n and then x and then t.
  virtual void ReorderIndexes(std::vector<Index> *input_indexes,
                              std::vector<Index> *output_indexes) const;

  virtual ComponentPrecomputedIndexes* PrecomputeIndexes(
      const MiscComputationInfo &misc_info,
      const std::vector<Index> &input_indexes,
      const std::vector<Index> &output_indexes,
      bool need_backprop) const;

 private:
  // Checks that the parameters are valid.
  void Check() const;

  // Disallow assignment operator.
  StatisticsExtractionComponent &operator =(
      const StatisticsExtractionComponent &other);

  int32 input_dim_;
  int32 input_period_;
  int32 output_period_;
  bool include_variance_;
};

class StatisticsExtractionComponentPrecomputedIndexes:
      public ComponentPrecomputedIndexes {
 public:
  // While creating the output we sum over row ranges of the input.
  // forward_indexes.Dim() equals the number of rows of the output, and each
  // element is a (start, end) range of inputs, that is summed over.
  CuArray<Int32Pair> forward_indexes;

  // This vector stores the number of inputs for each output.  Normally this will be
  // the same as the component's output_period_ / input_period_, but could be less
  // due to edge effects at the utterance boundary.
  CuVector<BaseFloat> counts;

  // Each input row participates in exactly one output element, and
  // 'backward_indexes' identifies which row of the output each row
  // of the input is part of.  It's used in backprop.
  CuArray<int32> backward_indexes;

  ComponentPrecomputedIndexes *Copy() const {
    return new StatisticsExtractionComponentPrecomputedIndexes(*this);
  }

  virtual void Write(std::ostream &os, bool binary) const;

  virtual void Read(std::istream &is, bool binary);

  virtual std::string Type() const { return "StatisticsExtractionComponentPrecomputedIndexes"; }
 private:
  virtual ~StatisticsExtractionComponentPrecomputedIndexes() { }
};

/*
  Class StatisticsPoolingComponent is used together with
  StatisticsExtractionComponent to extract moving-average mean and
  standard-deviation statistics.

  StatisticsPoolingComponent pools the stats over a specified window and
  computes means and possibly log-count and stddevs from them for you.

 # In StatisticsPoolingComponent, the first element of the input is interpreted
 # as a count, which we divide by.
 # Optionally the log of the count can be output, and you can allow it to be
 # repeated several times if you want (useful for systems using the jesus-layer).
 # The output dimension is equal to num-log-count-features plus (input-dim - 1).

 # If include-log-count==false, the output dimension is the input dimension minus one.
 # If output-stddevs=true, then it expects the input-dim to be of the form 2n+1 where n is
 #  presumably the original feature dim, and it interprets the last n dimensions of the feature
 #  as a variance; it outputs the square root of the variance instead of the actual variance.

 configs and their defaults:  input-dim=-1, input-period=1, left-context=-1, right-context=-1,
    num-log-count-features=0, output-stddevs=true, variance-floor=1.0e-10

 You'd access the output of the StatisticsPoolingComponent using rounding, e.g.
  Round(component-name, 10)
 or whatever, instead of just component-name, because its output is only defined at multiples
 of its input-period.

 The output of StatisticsPoolingComponent will only be defined if at least one input was defined.
 */
class StatisticsPoolingComponent: public Component {
 public:
  // Initializes to defaults which would not pass Check(); use InitFromConfig()
  // or Read() or copy constructor to really initialize.
  StatisticsPoolingComponent();
  // copy constructor, used in Copy()
  StatisticsPoolingComponent(const StatisticsPoolingComponent &other);

  virtual int32 InputDim() const { return input_dim_; }
  virtual int32 OutputDim() const {
    return input_dim_ + num_log_count_features_ - 1;
  }
  virtual void InitFromConfig(ConfigLine *cfl);
  virtual std::string Type() const { return "StatisticsPoolingComponent"; }
  virtual int32 Properties() const {
    return kReordersIndexes|kBackpropAdds|
        (output_stddevs_ || num_log_count_features_ > 0 ?
         kBackpropNeedsOutput : 0) |
        (num_log_count_features_ == 0 ? kBackpropNeedsInput : 0);
  }
  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in,
                         CuMatrixBase<BaseFloat> *out) const;
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &in_value,
                        const CuMatrixBase<BaseFloat> &out_value,
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *, // to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const;

  virtual void Read(std::istream &is, bool binary); // This Read function
  // requires that the Component has the correct type.

  /// Write component to stream
  virtual void Write(std::ostream &os, bool binary) const;
  virtual Component* Copy() const {
    return new StatisticsPoolingComponent(*this);
  }

  // Some functions that are only to be reimplemented for GeneralComponents.
  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
                               const Index &output_index,
                               std::vector<Index> *desired_indexes) const;

  // returns true if at least one of its inputs is computable.
  virtual bool IsComputable(const MiscComputationInfo &misc_info,
                            const Index &output_index,
                            const IndexSet &input_index_set,
                            std::vector<Index> *used_inputs) const;

  // This function reorders the input and output indexes so that they
  // are sorted first on n and then x and then t.
  virtual void ReorderIndexes(std::vector<Index> *input_indexes,
                              std::vector<Index> *output_indexes) const;

  virtual ComponentPrecomputedIndexes* PrecomputeIndexes(
      const MiscComputationInfo &misc_info,
      const std::vector<Index> &input_indexes,
      const std::vector<Index> &output_indexes,
      bool need_backprop) const;

 private:
  // Checks that the parameters are valid.
  void Check() const;

  // Disallow assignment operator.
  StatisticsPoolingComponent &operator =(
      const StatisticsPoolingComponent &other);

  int32 input_dim_;
  int32 input_period_;
  int32 left_context_;
  int32 right_context_;
  int32 num_log_count_features_;
  bool output_stddevs_;
  BaseFloat variance_floor_;
};

class StatisticsPoolingComponentPrecomputedIndexes:
      public ComponentPrecomputedIndexes {
 public:

  // in the first stage of creating the output we sum over row ranges of
  // the input.  forward_indexes.Dim() equals the number of rows of the
  // output, and each element is a (start, end) range of inputs, that is
  // summed over.
  CuArray<Int32Pair> forward_indexes;

  // backward_indexes contains the same information as forward_indexes, but in a
  // different format.  backward_indexes.Dim() is the same as the number of rows
  // of input, and each element contains the (start,end) of the range of outputs
  // for which this input index appears as an element of the sum for that
  // output.  This is possible because of the way the inputs and outputs are
  // ordered and because of how we select the elments to appear in the sum using
  // a window.  This quantity is used in backprop.
  CuArray<Int32Pair> backward_indexes;

  virtual ~StatisticsPoolingComponentPrecomputedIndexes() { }

  ComponentPrecomputedIndexes *Copy() const {
    return new StatisticsPoolingComponentPrecomputedIndexes(*this);
  }

  virtual void Write(std::ostream &os, bool binary) const;

  virtual void Read(std::istream &is, bool binary);

  virtual std::string Type() const { return "StatisticsPoolingComponentPrecomputedIndexes"; }
};

// BackpropTruncationComponent zeroes out the gradients every certain number
// of frames, as well as having gradient-clipping functionality as
// ClipGradientComponent.
// This component will be used to prevent gradient explosion problem in
// recurrent neural networks
class BackpropTruncationComponent: public Component {
 public:
  BackpropTruncationComponent(int32 dim,
                              BaseFloat scale,
                              BaseFloat clipping_threshold,
                              BaseFloat zeroing_threshold,
                              int32 zeroing_interval,
                              int32 recurrence_interval) {
    Init(dim, scale, clipping_threshold, zeroing_threshold,
        zeroing_interval, recurrence_interval);}

  BackpropTruncationComponent(): dim_(0), scale_(1.0), clipping_threshold_(-1),
    zeroing_threshold_(-1), zeroing_interval_(0), recurrence_interval_(0),
    num_clipped_(0), num_zeroed_(0), count_(0), count_zeroing_boundaries_(0) { }

  virtual int32 InputDim() const { return dim_; }
  virtual int32 OutputDim() const { return dim_; }
  virtual void InitFromConfig(ConfigLine *cfl);
  void Init(int32 dim, BaseFloat scale, BaseFloat clipping_threshold,
            BaseFloat zeroing_threshold, int32 zeroing_interval,
            int32 recurrence_interval);

  virtual std::string Type() const { return "BackpropTruncationComponent"; }

  virtual int32 Properties() const {
    return kPropagateInPlace|kBackpropInPlace;
  }

  virtual void ZeroStats();

  virtual Component* Copy() const;

  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in,
                         CuMatrixBase<BaseFloat> *out) const;
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &, // in_value,
                        const CuMatrixBase<BaseFloat> &, // out_value,
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const;

  virtual ComponentPrecomputedIndexes* PrecomputeIndexes(
      const MiscComputationInfo &misc_info,
      const std::vector<Index> &input_indexes,
      const std::vector<Index> &output_indexes,
      bool need_backprop) const;

  virtual void Scale(BaseFloat scale);
  virtual void Add(BaseFloat alpha, const Component &other);
  virtual void Read(std::istream &is, bool binary); // This Read function
  // requires that the Component has the correct type.
  /// Write component to stream
  virtual void Write(std::ostream &os, bool binary) const;
  virtual std::string Info() const;
  virtual ~BackpropTruncationComponent() {
  }
 private:
  // input/output dimension
  int32 dim_;

  // Scale that is applied in the forward propagation (and of course in the
  // backprop to match.  Expected to normally be 1, but setting this to other
  // values (e.g.  slightly less than 1) can be used to produce variants of
  // LSTMs where the activations are bounded.
  BaseFloat scale_;

  // threshold (e.g., 30) to be used for clipping corresponds to max-row-norm
  BaseFloat clipping_threshold_;

  // threshold (e.g., 3) to be used for zeroing corresponds to max-row-norm
  BaseFloat zeroing_threshold_;

  // interval (e.g., 20, in number of frames) at which we would zero the
  // gradient if the norm of the gradient is above zeroing_threshold_
  int32 zeroing_interval_;

  // recurrence_interval_ should be the absolute recurrence offset used in RNNs
  // (e.g., 3). It is used to see whether the index the component is processing,
  // crosses a boundary that's a multiple of zeroing_interval_ frames.
  int32 recurrence_interval_;

  // component-node name, used in the destructor to print out stats of
  // self-repair
  std::string debug_info_;

  BackpropTruncationComponent &operator =
      (const BackpropTruncationComponent &other); // Disallow.

 protected:
  // variables to store stats
  // An element corresponds to rows of derivative matrix
  double num_clipped_;  // number of elements which were clipped
  double num_zeroed_;   // number of elements which were zeroed
  double count_;  // number of elements which were processed
  double count_zeroing_boundaries_; // number of zeroing boundaries where we had
                                    // the opportunity to perform zeroing
                                    // the gradient

};

class BackpropTruncationComponentPrecomputedIndexes:
      public ComponentPrecomputedIndexes {
 public:

  // zeroing has the same dimension as the number of rows of out-deriv.
  // Each element in zeroing can take two possible values: -1.0, meaning its
  // corresponding frame is one that we need to consider zeroing the
  // gradient of, and 0.0 otherwise
  CuVector<BaseFloat> zeroing;

  // caches the negative sum of elements in zeroing for less CUDA calls
  // (the sum is computed by CPU). Note that this value would be positive.
  BaseFloat zeroing_sum;

  BackpropTruncationComponentPrecomputedIndexes(): zeroing_sum(0.0) {}

  // this class has a virtual destructor so it can be deleted from a pointer
  // to ComponentPrecomputedIndexes.
  virtual ~BackpropTruncationComponentPrecomputedIndexes() { }

  virtual ComponentPrecomputedIndexes* Copy() const {
    return new BackpropTruncationComponentPrecomputedIndexes(*this);
  }

  virtual void Write(std::ostream &ostream, bool binary) const;

  virtual void Read(std::istream &istream, bool binary);

  virtual std::string Type() const {
    return "BackpropTruncationComponentPrecomputedIndexes";
  }
};


/*
   ConstantComponent returns a constant value for all requested
   indexes, and it has no dependencies on any input.
   It's like a ConstantFunctionComponent, but done the "right"
   way without requiring an unnecessary input.
   It is optionally trainable, and optionally you can use natural
   gradient.

   Configuration values accepted by this component, with defaults if
   applicable:

      output-dim              Dimension that this component outputs.
      is-updatable=true       True if you want this to be updatable.
      use-natural-gradient=true  True if you want the update to use natural gradient.
      output-mean=0.0         Mean of the parameters at initialization (the parameters
                              are what it outputs).
      output-stddev=0.0       Standard deviation of the parameters at initialization.


  Values inherited from UpdatableComponent (see its declaration in
  nnet-component-itf for details):
     learning-rate
     learning-rate-factor
     max-change
*/
class ConstantComponent: public UpdatableComponent {
 public:
  // actually this component requires no inputs; this value
  // is really a don't-care.
  virtual int32 InputDim() const { return output_.Dim(); }

  virtual int32 OutputDim() const { return output_.Dim(); }

  virtual std::string Info() const;

  // possible parameter values with their defaults:
  // is-updatable=true use-natural-gradient=true output-dim=-1
  // output-mean=0 output-stddev=0
  virtual void InitFromConfig(ConfigLine *cfl);

  ConstantComponent();

  ConstantComponent(const ConstantComponent &other);

  virtual std::string Type() const { return "ConstantComponent"; }
  virtual int32 Properties() const {
    return
        (is_updatable_ ? kUpdatableComponent : 0);
  }
  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in,
                         CuMatrixBase<BaseFloat> *out) const;
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &, // in_value
                        const CuMatrixBase<BaseFloat> &, // out_value
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const;

  virtual void Read(std::istream &is, bool binary);
  virtual void Write(std::ostream &os, bool binary) const;

  virtual Component* Copy() const;

  // Some functions that are only to be reimplemented for GeneralComponents.
  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
                               const Index &output_index,
                               std::vector<Index> *desired_indexes) const {
    desired_indexes->clear();  // requires no inputs.
  }

  // This function returns true if at least one of the input indexes used to
  // compute this output index is computable.
  // it's simple because this component requires no inputs.
  virtual bool IsComputable(const MiscComputationInfo &misc_info,
                            const Index &output_index,
                            const IndexSet &input_index_set,
                            std::vector<Index> *used_inputs) const {
    if (used_inputs) used_inputs->clear();
    return true;
  }

  // Some functions from base-class UpdatableComponent.
  virtual void Scale(BaseFloat scale);
  virtual void Add(BaseFloat alpha, const Component &other);
  virtual void PerturbParams(BaseFloat stddev);
  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
  virtual int32 NumParameters() const;
  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
  virtual void UnVectorize(const VectorBase<BaseFloat> &params);

  virtual void ConsolidateMemory();
 private:

  // the output value-- a vector.
  CuVector<BaseFloat> output_;

  bool is_updatable_;
  // if true, and if updatable, do natural-gradient update.
  bool use_natural_gradient_;
  OnlineNaturalGradient preconditioner_;

  const ConstantComponent &operator
  = (const ConstantComponent &other); // Disallow.
};


// DropoutMaskComponent outputs a random zero-or-one value for all dimensions of
// all requested indexes, and it has no dependencies on any input.  It's like a
// ConstantComponent, but with random output that has value zero
// a proportion (dropout_proportion) of the time, and otherwise one.
// This is not the normal way to implement dropout; you'd normally use a
// DropoutComponent (see nnet-simple-component.h).  This component is used while
// implementing per-frame dropout with the LstmNonlinearityComponent; we
// generate a two-dimensional output representing dropout
//
class DropoutMaskComponent: public RandomComponent {
 public:
  // actually this component requires no inputs; this value
  // is really a don't-care.
  virtual int32 InputDim() const { return output_dim_; }

  virtual int32 OutputDim() const { return output_dim_; }

  virtual std::string Info() const;

  // possible parameter values with their defaults:
  // dropout-proportion=0.5 output-dim=-1 continuous=false
  // With the 'continous=false' option (the default), it generates
  // 0 with probability 'dropout-proportion' and 1 otherwise.
  // With 'continuous=true' it outputs 1 plus dropout-proportion times
  //  a value uniformly distributed on [-2, 2].  (e.g. if dropout-proportion is
  // 0.5, this would amount to a value uniformly distributed on [0,2].)
  virtual void InitFromConfig(ConfigLine *cfl);

  DropoutMaskComponent();

  DropoutMaskComponent(const DropoutMaskComponent &other);

  virtual std::string Type() const { return "DropoutMaskComponent"; }
  virtual int32 Properties() const { return kRandomComponent; }
  // note: the matrix 'in' will be empty.
  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
  // backprop does nothing, there is nothing to backprop to and nothing
  // to update.
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &, // in_value
                        const CuMatrixBase<BaseFloat> &, // out_value
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const { }

  virtual void Read(std::istream &is, bool binary);
  virtual void Write(std::ostream &os, bool binary) const;

  virtual Component* Copy() const;

  // Some functions that are only to be reimplemented for GeneralComponents.
  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
                               const Index &output_index,
                               std::vector<Index> *desired_indexes) const {
    desired_indexes->clear();  // requires no inputs.
  }

  // This function returns true if at least one of the input indexes used to
  // compute this output index is computable.
  // it's simple because this component requires no inputs.
  virtual bool IsComputable(const MiscComputationInfo &misc_info,
                            const Index &output_index,
                            const IndexSet &input_index_set,
                            std::vector<Index> *used_inputs) const {
    if (used_inputs) used_inputs->clear();
    return true;
  }

  void SetDropoutProportion(BaseFloat p) { dropout_proportion_ = p; }

 private:

  // The output dimension
  int32 output_dim_;

  BaseFloat dropout_proportion_;

  bool continuous_;

  const DropoutMaskComponent &operator
  = (const DropoutMaskComponent &other); // Disallow.
};


/**
   GeneralDropoutComponent implements dropout, including a continuous
   variant where the thing we multiply is not just zero or one, but may
   be a continuous value.  It is intended for the case where you want to
   either share the dropout mask across all of time, or across groups
   of 't' values (e.g. the first block of 10 values gets one dropout
   mask, the second block of 10 gets another one, and so on).

   It also has support for the frequency component of SpecAugment.

   Configuration values accepted on the command line, with defaults:

       dim        Dimension of the input and output of this component,
                  e.g. 512

       block-dim  Block size if you want the dropout mask to repeat,
                  e.g. if dim=512 and you sent block-dim=128, there will
                  be a mask of dimension 128 repeated 4 times.  This can
                  be useful in convolutional setups.  If not specified,
                  block-dim defaults to 'dim'; if specified, it must be
                  a divisor of 'dim'.

       dropout-proportion=0.5   For conventional dropout, this is the proportion
                  of mask values that (in expectation) are zero; it would
                  normally be between 0 and 0.5.  The nonzero mask values
                  will be given values 1.0 / dropout_proportion, so that the
                  expected value is 1.0.  This behavior is different from
                  DropoutComponent and DropoutMaskComponent.

                  For continuous dropout (continuous==true), the dropout scales
                  will have values (1.0 + 2 * dropout-proportion *
                  Uniform[-1,1]).  This might seem like a strange choice, but it
                  means that dropout-proportion=0.5 gives us a kind of
                  'extremal' case where the dropout scales are distributed as
                  Uniform[0, 2] and we can pass in the dropout scale as if it
                  were a conventional dropout scale.

       time-period=0   This determines how the dropout mask interacts
                  with the time index (t).  In all cases, different sequences
                  (different 'n' values) get different dropout masks.
                  If time-period==0, then the dropout mask is shared across
                  all time values.  If you set time-period > 0, then the
                  dropout mask is shared across blocks of time values: for
                  instance if time-period==10, then we'll use one dropout
                  mask for t values 0 through 9, another for 10 through 19,
                  and so on.  In all cases, the dropout mask will be shared
                  across all 'x' values, although in most setups the x values
                  are just zero so this isn't very interesting.
                  If you set time-period==1 it would be similar to regular
                  dropout, and it would probably make more sense to just use the
                  normal DropoutComponent.

       specaugment-max-proportion=0  If nonzero, causes this component to
                 implement SpecAugment.  (Note: you probably would want this
                 after a batch-norm component so the average at input is
                 zero), and the input dim will be interpreted as some kind of
                 frequency space, e.g. linear or mel.  specaugment-max-proportion
                 will be the maximum proportion of the frequency
                 space that this component might zero out (so multiply this by
                 by input dim to get the maximum columns that might be zeroed out);
                 the actual number of columns zeroed out for each sequence will
                 be randomly chosen between zero and the maximum.  Note: the
                 non-zeroed frequencies won't be multiplied by a constant more
                 than one as we would in the normal dropout mode.

       specaugment-max-regions=1  This can be set to a value greater than one
                 (e.g., 2) to implement a variant of SpecAugment where instead
                 of zeroing out a single region of the frequency spectrum
                 we zero out a randomly chosen number of regions, from one to
                 this number.  The maximum proportion of the frequency spectrum
                 that we remove is unaffected.

 */
class GeneralDropoutComponent: public RandomComponent {
 public:
  virtual int32 InputDim() const { return dim_; }

  virtual int32 OutputDim() const { return dim_; }

  virtual std::string Info() const;

  virtual void InitFromConfig(ConfigLine *cfl);

  GeneralDropoutComponent();

  GeneralDropoutComponent(const GeneralDropoutComponent &other);

  virtual std::string Type() const { return "GeneralDropoutComponent"; }
  virtual int32 Properties() const {
    return kRandomComponent|kPropagateInPlace|kBackpropInPlace|kUsesMemo|
        (block_dim_ != dim_ ? (kInputContiguous|kOutputContiguous) : 0);
  }

  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &, // in_value
                        const CuMatrixBase<BaseFloat> &, // out_value
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const;

  virtual void DeleteMemo(void *memo) const {
    delete static_cast<CuMatrix<BaseFloat>*>(memo);
  }

  virtual ComponentPrecomputedIndexes* PrecomputeIndexes(
      const MiscComputationInfo &misc_info,
      const std::vector<Index> &input_indexes,
      const std::vector<Index> &output_indexes,
      bool need_backprop) const;

  virtual void Read(std::istream &is, bool binary);
  virtual void Write(std::ostream &os, bool binary) const;

  virtual Component* Copy() const;

  void SetDropoutProportion(BaseFloat p) { dropout_proportion_ = p; }

 private:

  // Returns a random matrix reflecting the masking we are applying.
  // In the normal case where we are doing a
  // of dimension 'num_mask_rows' by 'block_dim_'.  This
  // should not be called if test_mode_ is true or dropout_proportion_ is zero.
  CuMatrix<BaseFloat> *GetMemo(int32 num_mask_rows) const;


  // The input and output dimension
  int32 dim_;

  // block_dim_ must divide dim_.
  int32 block_dim_;

  // time_period_ can be zero if we want all 't' values to share the same
  // dropout mask, and a value more than zero if we want blocks of 't' values to
  // share the dropout mask.  For example, if time_period_ is 10, blocks of size
  // 10 frames will share the same dropout mask.
  int32 time_period_;

  BaseFloat dropout_proportion_;

  BaseFloat specaugment_max_proportion_;

  int32 specaugment_max_regions_;

  bool continuous_;

  const GeneralDropoutComponent &operator
  = (const GeneralDropoutComponent &other); // Disallow.
};

// This stores some precomputed indexes for GeneralDropoutComponent.
// This object is created for every instance of the Propagate()
// function in the compiled computation.
class GeneralDropoutComponentPrecomputedIndexes:
      public ComponentPrecomputedIndexes {
 public:


  // num_mask_rows is the number of rows in the dropout-mask matrix, which will
  // in the normal case equal the number of sequences we are processing.  Its
  // num-cols is the block_dim_ of the component (e.g. might be the InputDim()
  // (which is the same as OutputDim()), or maybe less if the block-dim option
  // was specified.
  int32 num_mask_rows;

  // 'indexes' is of dimension (the number of rows in the matrix we're doing
  // Propagate() or Backprop() on) times the (dim_ / block_dim_) of the
  // GeneralDropoutComponent.  Each value is in the range [0, num_mask_rows-1],
  // and each value is repeated (dim_ / block_dim_) times.  This array is used
  // to multiply the reshaped values or derivatives by the appropriate rows of
  // the dropout matrix.
  CuArray<int32> indexes;

  virtual ~GeneralDropoutComponentPrecomputedIndexes() { }

  ComponentPrecomputedIndexes *Copy() const {
    return new GeneralDropoutComponentPrecomputedIndexes(*this);
  }

  virtual void Write(std::ostream &os, bool binary) const;

  virtual void Read(std::istream &is, bool binary);

  virtual std::string Type() const {
    return "GeneralDropoutComponentPrecomputedIndexes";
  }
};


class SpecAugmentTimeMaskComponentPrecomputedIndexes;

/**
   SpecAugmentTimeMaskComponent implements the time part of SpecAugment.
   Instead of zeroing out a single time-region of the input, though,
   it zeroes out multiple smaller time-regions.

   Configuration values accepted on the command line, with defaults:

       dim        Dimension of the input and output of this component,
                  e.g. 512


       zeroed-proportion=0.25  Proportion of the input that is to be zeroed;
                  should be in the range (0, 1).

       time-mask-max-frames=10   The maximum time duration of the *zeroed*
                  regions.  The non-zeroed regions in between will have maximum
                 duration equal to this times (1-z)/z, where z
                 is zeroed-proportion.
 */
class SpecAugmentTimeMaskComponent: public RandomComponent {
 public:
  virtual int32 InputDim() const { return dim_; }

  virtual int32 OutputDim() const { return dim_; }

  virtual std::string Info() const;

  virtual void InitFromConfig(ConfigLine *cfl);

  SpecAugmentTimeMaskComponent();

  SpecAugmentTimeMaskComponent(const SpecAugmentTimeMaskComponent &other);

  virtual std::string Type() const { return "SpecAugmentTimeMaskComponent"; }
  virtual int32 Properties() const {
    return kRandomComponent|kPropagateInPlace|kBackpropInPlace|kUsesMemo;
  }

  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
  virtual void Backprop(const std::string &debug_info,
                        const ComponentPrecomputedIndexes *indexes,
                        const CuMatrixBase<BaseFloat> &, // in_value
                        const CuMatrixBase<BaseFloat> &, // out_value
                        const CuMatrixBase<BaseFloat> &out_deriv,
                        void *memo,
                        Component *to_update,
                        CuMatrixBase<BaseFloat> *in_deriv) const;

  virtual void DeleteMemo(void *memo) const {
    delete static_cast<CuVector<BaseFloat>*>(memo);
  }

  virtual ComponentPrecomputedIndexes* PrecomputeIndexes(
      const MiscComputationInfo &misc_info,
      const std::vector<Index> &input_indexes,
      const std::vector<Index> &output_indexes,
      bool need_backprop) const;

  virtual void Read(std::istream &is, bool binary);
  virtual void Write(std::ostream &os, bool binary) const;

  virtual Component* Copy() const;

 private:

  // Returns a random vector reflecting the masking we are applying.
  CuVector<BaseFloat> *GetMemo(
      const SpecAugmentTimeMaskComponentPrecomputedIndexes &indexes) const;


  // The input and output dimension
  int32 dim_;

  BaseFloat zeroed_proportion_;

  int32 time_mask_max_frames_;

  const SpecAugmentTimeMaskComponent &operator
  = (const SpecAugmentTimeMaskComponent &other); // Disallow.
};

// This stores some precomputed indexes for SpecAugmentTimeMaskComponent.
// This object is created for every instance of the Propagate()
// function in the compiled computation.
class SpecAugmentTimeMaskComponentPrecomputedIndexes:
      public ComponentPrecomputedIndexes {
 public:

  // 'indexes' is indexed first by sequence and then by time within that
  // sequence; each list indexes[s] is a consecutive list of the elements of
  // that sequence (e.g. t=0, t=1, and so on).  The int32 values inside these
  // lists are row-indexes into the matrix that is at the input and output of
  // this component.
  std::vector<std::vector<int32> > indexes;

  // 'tot_size' is the total number of elements in 'indexes', equal to the
  // num-rows of the matrix we're doing dropout on.
  int32 tot_size;

  virtual ~SpecAugmentTimeMaskComponentPrecomputedIndexes() { }

  ComponentPrecomputedIndexes *Copy() const {
    return new SpecAugmentTimeMaskComponentPrecomputedIndexes(*this);
  }

  virtual void Write(std::ostream &os, bool binary) const;

  virtual void Read(std::istream &is, bool binary);

  virtual std::string Type() const {
    return "SpecAugmentTimeMaskComponentPrecomputedIndexes";
  }
};


} // namespace nnet3
} // namespace kaldi


#endif