#!/usr/bin/env perl # Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 use utf8; use open qw(:encoding(utf8)); binmode STDIN, ":utf8"; binmode STDOUT, ":utf8"; binmode STDERR, ":utf8"; while (<>) { @F = split " "; print $F[0] . " "; foreach $s (@F[1..$#F]) { if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) { print " $s"; } else { @chars = split "", $s; foreach $c (@chars) { if ($c =~ /\p{InCJK_Unified_Ideographs}/) { print " $c"; } else { print "$c"; } } } print " "; } print "\n"; }