Blame view
egs/cifar/v1/image/matrix_to_image.py
3.37 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
#!/usr/bin/env python # Copyright 2017 Johns Hopkins University (author: Daniel Povey) # 2017 Yiwen Shao # 2017 Hossein Hadian # Apache 2.0 """ This script converts a Kaldi-format text matrix into a bitmap image. It reads the matrix from its stdin and writes the .bmp image to its stdout. For instance: cat <<EOF | image/matrix_to_image.py --color 3 > foo.png [ 0.0 0.5 1.0 0.0 0.0 0.0 ] EOF The image format is that the number of rows equals the width of the image, and the number of columns equals the height of the image times the number of channels (1 for black and white, 3 for color (RGB)), with the channel varying the fastest. The above example would produce a color image with width 2 and height 1. The first row corresponds to the left side of the image, and the first column corresponds to the top of the image. E.g. to see a (grayscale) line image from training feature files: imgid=0001_010006; copy-feats --binary=false $(grep $imgid data/train/feats.scp | cut -d' ' -f2) - | \ image/matrix_to_image.py --color=1 > $imgid.bmp """ from __future__ import division import argparse import sys from bmp_encoder import * parser = argparse.ArgumentParser(description="""Converts Kaldi-format text matrix representing an image on stdin into bmp image on stdout. See comments at top of script for more details.""") parser.add_argument('--color', type=int, choices=(1, 3), default=3, help='3 if the image is in RGB, 1 if the image is in grayscale.') args = parser.parse_args() matrix = [] num_rows = 0 num_cols = 0 while True: line = sys.stdin.readline().strip(' ').split() if line == []: break if line == ['[']: # deal with the case that the first row only contains "[" continue if line[0] == '[': # drop the "[" in the first row line = line[1:] if line[-1] == ']': # drop the "]" in the last row line = line[:-1] if num_cols == 0: num_cols = len(line) # initialize if len(line) != num_cols: raise Exception("All rows should be of the same length") line = [float(i) for i in line] # string to float if max(line) > 1: raise Excetion("Element value in the matrix should be normalized and no larger than 1") line = [int(x * 255) for x in line] # float to integer ranging from 0 to 255 matrix.append(line) num_rows += 1 if args.color == 3: if num_cols % 3 != 0: raise Exception("Number of columns should be a multiple of 3 in the color mode") width = num_rows height = num_cols/3 # reform the image matrix image_array = [[0 for i in range(width * 3)] for j in range(height)] for i in range(height): for j in range(width): image_array[i][3 * j] = matrix[j][3 * i] image_array[i][3 * j + 1] = matrix[j][3 * i + 1] image_array[i][3 * j + 2] = matrix[j][3 * i + 2] bmp_encoder(image_array, width, height) elif args.color == 1: width = num_rows height = num_cols # reform the image matrix image_array = [[0 for i in range(width * 3)] for j in range(height)] for i in range(height): for j in range(width): image_array[i][3 * j] = matrix[j][i] image_array[i][3 * j + 1] = matrix[j][i] image_array[i][3 * j + 2] = matrix[j][i] bmp_encoder(image_array, width, height) |