matrix_to_image.py
3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
# Copyright 2017 Johns Hopkins University (author: Daniel Povey)
# 2017 Yiwen Shao
# 2017 Hossein Hadian
# Apache 2.0
""" This script converts a Kaldi-format text matrix into a bitmap image.
It reads the matrix from its stdin and writes the .bmp image to its
stdout.
For instance:
cat <<EOF | image/matrix_to_image.py --color 3 > foo.png
[ 0.0 0.5 1.0
0.0 0.0 0.0 ]
EOF
The image format is that the number of rows equals the width of the image, and the
number of columns equals the height of the image times the number of channels
(1 for black and white, 3 for color (RGB)), with the channel varying the
fastest. The above example would produce a color image with width 2 and
height 1. The first row corresponds to the left side of the image, and the
first column corresponds to the top of the image.
E.g. to see a (grayscale) line image from training feature files:
imgid=0001_010006;
copy-feats --binary=false $(grep $imgid data/train/feats.scp | cut -d' ' -f2) - | \
image/matrix_to_image.py --color=1 > $imgid.bmp
"""
from __future__ import division
import argparse
import sys
from bmp_encoder import *
parser = argparse.ArgumentParser(description="""Converts Kaldi-format text matrix
representing an image on stdin into bmp image on stdout. See
comments at top of script for more details.""")
parser.add_argument('--color', type=int, choices=(1, 3), default=3,
help='3 if the image is in RGB, 1 if the image is in grayscale.')
args = parser.parse_args()
matrix = []
num_rows = 0
num_cols = 0
while True:
line = sys.stdin.readline().strip('\n').split()
if line == []:
break
if line == ['[']: # deal with the case that the first row only contains "["
continue
if line[0] == '[': # drop the "[" in the first row
line = line[1:]
if line[-1] == ']': # drop the "]" in the last row
line = line[:-1]
if num_cols == 0:
num_cols = len(line) # initialize
if len(line) != num_cols:
raise Exception("All rows should be of the same length")
line = [float(i) for i in line] # string to float
if max(line) > 1:
raise Excetion("Element value in the matrix should be normalized and no larger than 1")
line = [int(x * 255) for x in line] # float to integer ranging from 0 to 255
matrix.append(line)
num_rows += 1
if args.color == 3:
if num_cols % 3 != 0:
raise Exception("Number of columns should be a multiple of 3 in the color mode")
width = num_rows
height = num_cols/3
# reform the image matrix
image_array = [[0 for i in range(width * 3)] for j in range(height)]
for i in range(height):
for j in range(width):
image_array[i][3 * j] = matrix[j][3 * i]
image_array[i][3 * j + 1] = matrix[j][3 * i + 1]
image_array[i][3 * j + 2] = matrix[j][3 * i + 2]
bmp_encoder(image_array, width, height)
elif args.color == 1:
width = num_rows
height = num_cols
# reform the image matrix
image_array = [[0 for i in range(width * 3)] for j in range(height)]
for i in range(height):
for j in range(width):
image_array[i][3 * j] = matrix[j][i]
image_array[i][3 * j + 1] = matrix[j][i]
image_array[i][3 * j + 2] = matrix[j][i]
bmp_encoder(image_array, width, height)