kaldi-cygwin-io-inl.h
4.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// util/kaldi-cygwin-io-inl.h
// Copyright 2015 Smart Action Company LLC (author: Kirill Katsnelson)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_
#define KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_
#ifndef _MSC_VER
#error This is a Windows-compatibility file. Something went wery wrong.
#endif
#include <string>
// This file is included only into kaldi-io.cc, and only if
// KALDI_CYGWIN_COMPAT is enabled.
//
// The routines map unix-ey paths passed to Windows programs from shell
// scripts in egs. Since shell scripts run under cygwin, they use cygwin's
// own mount table and a mapping to the file system. It is quite possible to
// create quite an intricate mapping that only own cygwin API would be able
// to untangle. Unfortunately, the API to map between filenames is not
// available to non-cygwin programs. Running cygpath for every file operation
// would as well be cumbersome. So this is only a simplistic path resolution,
// assuming that the default cygwin prefix /cygdrive is used, and that all
// resolved unix-style full paths end up prefixed with /cygdrive. This is
// quite a sensible approach. We'll also try to map /dev/null and /tmp/**,
// die on all other /dev/** and warn about all other rooted paths.
namespace kaldi {
static bool prefixp(const std::string& pfx, const std::string& str) {
return pfx.length() <= str.length() &&
std::equal(pfx.begin(), pfx.end(), str.begin());
}
static std::string cygprefix("/cygdrive/");
static std::string MapCygwinPathNoTmp(const std::string &filename) {
// UNC(?), relative, native Windows and empty paths are ok already.
if (prefixp("//", filename) || !prefixp("/", filename))
return filename;
// /dev/...
if (filename == "/dev/null")
return "\\\\.\\nul";
if (prefixp("/dev/", filename)) {
KALDI_ERR << "Unable to resolve path '" << filename
<< "' - only have /dev/null here.";
return "\\\\.\\invalid";
}
// /cygdrive/?[/....]
int preflen = cygprefix.size();
if (prefixp(cygprefix, filename)
&& filename.size() >= preflen + 1 && isalpha(filename[preflen])
&& (filename.size() == preflen + 1 || filename[preflen + 1] == '/')) {
return std::string() + filename[preflen] + ':' +
(filename.size() > preflen + 1 ? filename.substr(preflen + 1) : "/");
}
KALDI_WARN << "Unable to resolve path '" << filename
<< "' - cannot map unix prefix. "
<< "Will go on, but breakage will likely ensue.";
return filename;
}
// extern for unit testing.
std::string MapCygwinPath(const std::string &filename) {
// /tmp[/....]
if (filename != "/tmp" && !prefixp("/tmp/", filename)) {
return MapCygwinPathNoTmp(filename);
}
char *tmpdir = std::getenv("TMP");
if (tmpdir == nullptr)
tmpdir = std::getenv("TEMP");
if (tmpdir == nullptr) {
KALDI_ERR << "Unable to resolve path '" << filename
<< "' - unable to find temporary directory. Set TMP.";
return filename;
}
// Map the value of tmpdir again, as cygwin environment actually may contain
// unix-style paths.
return MapCygwinPathNoTmp(std::string(tmpdir) + filename.substr(4));
}
// A popen implementation that passes the command line through cygwin
// bash.exe. This is necessary since some piped commands are cygwin links
// (e. g. fgrep is a soft link to grep), and some are #!-files, such as
// gunzip which is a shell script that invokes gzip, or kaldi's own run.pl
// which is a perl script.
//
// _popen uses cmd.exe or whatever shell is specified via the COMSPEC
// variable. Unfortunately, it adds a hardcoded " /c " to it, so we cannot
// just substitute the environment variable COMSPEC to point to bash.exe.
// Instead, quote the command and pass it to bash via its -c switch.
static FILE *CygwinCompatPopen(const char* command, const char* mode) {
// To speed up command launch marginally, optionally accept full path
// to bash.exe. This will not work if the path contains spaces, but
// no sane person would install cygwin into a space-ridden path.
const char* bash_exe = std::getenv("BASH_EXE");
std::string qcmd(bash_exe != nullptr ? bash_exe : "bash.exe");
qcmd += " -c \"";
for (; *command; ++command) {
if (*command == '\"')
qcmd += '\"';
qcmd += *command;
}
qcmd += '\"';
return _popen(qcmd.c_str(), mode);
}
} // namespace kaldi
#endif // KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_