tagLem2xml.pl 1022 Bytes
#!/usr/bin/perl 

if($#ARGV != 1){
	print("usage : ./tagLemm2xml <fichier.tagLem> <fichier.doc.xml>");
	exit(1);
}


open(TAG, "<$ARGV[0]");
open(XML, ">$ARGV[1]");

print XML "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<!DOCTYPE Segmentation>\n<Segmentation>\n<seg>\n";
$chaineLem = "";
$newshow = 0;
while(<TAG>){
	$line = $_;
	chomp($line);
	@tempo=split(/ /, $line);
	if($tempo[2] !~ m/#[0-9]*/){
		print XML "$tempo[0] ";
        $newseg=0;
		if($tempo[1] !~ m/MOTINC/){
			if($tempo[1] =~ m/^X.*/){
				$poids = 10;
			}
			else{
				$poids = 1;
			}	
			$chaineLem .= "\t\t<lem poids=\"". $poids ."\"> ".$tempo[2]." </lem>\n" ;
		}
		else{
			$chaineLem .=" ";
		}
			
	}
	else{
		if($chaineLem ne "" || $newshow == 1){
			print XML ".\n\t</wd>\n$chaineLem</phr>\n<phr>\n\t<wd>\n\t\t$tempo[2] ";
		}
		else{
			print XML "$chaineLem<phr>\n\t<wd>\n\t\t$tempo[2] ";
            $newshow=1;
		}
		$chaineLem="";
		
	}
}

print XML "\n\t</wd>\n$chaineLem</phr>\n</seg>\n</Segmentation>";

close(XML);
close(TAG);