Blame view
tools/scripts/UrlConverter.pl
4.85 KB
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
#!/usr/bin/perl while(<STDIN>){ print url($_); } sub url { my $entree = shift; my @sortie; my @tab_lignes = split(/ /,$entree); foreach my $text (@tab_lignes) { my @nv_ligne = (); # trime les blancs $text =~ s/\s+/ /g; if (($text =~ /http/) || ($text =~ /www/) || ($text =~ /ftp:/) || ($text =~ /\.html/) || ($text =~ /\.htm/) || ($text =~ /\@/)|| ($text =~ /\.org/) || ($text =~ /\.net/) || ($text =~ /\.com/) || ($text =~ /\.fr/) || ($text =~ /\.uk/) || ($text =~ /\.be/)) { $text =~ s/http\s*:\s*\/\/\s*/ http:\/\//g; $text =~ s/http\s*:\/\/\s*/ http:\/\//g; $text =~ s/htt\s*:\s*\/\/\s*/ http:\/\//g; $text =~ s/http:\/\s+/http:\/\//g; $text =~ s/\.www\./\. www\./g; $text =~ s/www\.\s+/www\./g; $text =~ s/www\s+\./www\./g; $text =~ s/ftp\.\s+/ftp\./g; $text =~ s/WWW\.\s+/www\./g; $text =~ s/\s+\//\//g; $text =~ s/\s*~\s*/~/g; $text =~ s/\/\s+/\//g; $text =~ s/\.\s+com/\.com/g; $text =~ s/\.\s+fr/\.fr/g; $text =~ s/\.\s+org/\.org/g; $text =~ s/\.\s+doc/\.doc/g; $text =~ s/\.\s+ppt/\.ppt/g; $text =~ s/\.\s+jpg/\.jpg/g; $text =~ s/\.\s+pdf/\.pdf/g; $text =~ s/\s+\.org/\.org/g; $text =~ s/\.\s+gov/\.gov/g; $text =~ s/\.\s+gouv\.\s+fr/\.gouv\.fr/g; $text =~ s/\s+\.gouv\.\s+fr/\.gouv\.fr/g; $text =~ s/\.\s+co\./\.co \./g; $text =~ s/\.\s+uk/\.uk/g; $text =~ s/\.\s+asso\.\s+fr/\.asso\.fr/g; $text =~ s/\.\s+qc/\.qc/g; $text =~ s/\.\s+de/\.de/g; $text =~ s/\.\s+uk/\.uk/g; $text =~ s/\.\s+ca/\.ca/g; $text =~ s/\.\s+ch/\.ch/g; $text =~ s/\.\s+net/\.net/g; $text =~ s/\.\s+th/\.th/g; $text =~ s/\.\s+nasa/\.nasa/g; $text =~ s/\.\s+ibm\.com/\.ibm\.com/g; $text =~ s/\.\s+club\-internet/\.club\-internet/g; $text =~ s/\.\s+yahoo/\.yahoo/g; $text =~ s/\.\s+oleane/\.oleane/g; $text =~ s/\.\s+html/\.html/g; $text =~ s/\.\s+asp/\.asp/g; $text =~ s/\.\s+php/\.php/g; $text =~ s/\.\s+htm/\.htm /g; $text =~ s/\s+\.html/\.html/g; $text =~ s/\s+\.htm/\.htm /g; $text =~ s/\.\s+HTM/\.htm /g; $text =~ s/\s+\[at\]\s+/\@/g; $text =~ s/http:\/\/([0-9]*[0-9]) ([0-9]*[0-9]) ([0-9]*[0-9]) ([0-9]*[0-9])/http:\/\/\1\.\2\.\3\.\4/g; $text =~ s/www\.([a-zA-Z]+)\- ([a-zA-Z]+)/www\.\1\-\2/g; $text =~ s/\.\s+com\./\.com \./g; $text =~ s/\.\s+fr\./\.fr \./g; $text =~ s/\.\s+org\./\.org \./g; $text =~ s/\.\s+pdf\./\.pdf \./g; $text =~ s/\.\s+de/\.de/g; $text =~ s/\s+\.org\./\.org \./g; $text =~ s/\.\s+gov\./\.gov \./g; $text =~ s/\.\s+uk\./\.uk \./g; $text =~ s/\.\s+qc\./\.qc \./g; $text =~ s/\.\s+ca\./\.ca \./g; $text =~ s/\.\s+ch\./\.ch \./g; $text =~ s/\.\s+net\./\.net \./g; $text =~ s/\.\s+th\./\.th \./g; $text =~ s/\.\s+asp\./\.asp \./g; $text =~ s/\.\s+php\./\.php \./g; $text =~ s/\.\s+htm\./\.htm \./g; $text =~ s/\s+\.html\./\.html \./g; $text =~ s/\s+\.htm\./\.htm \./g; $text =~ s/\.\s+HTM\./\.htm \./g; $text =~ s/\s+/ /g; my @line = split(/\s+/, $text); for(my $i = 0; $i < scalar(@line); $i++) { if (($line[$i] =~ /http/) || ($line[$i] =~ /www/)|| ($line[$i] =~ /ftp/)|| ($line[$i] =~ /@/) || ($line[$i] =~ /\.org/) || ($line[$i] =~ /\.net/) || ($line[$i] =~ /\.com/) || ($line[$i] =~ /\.fr/) || ($line[$i] =~ /\.uk/) || ($line[$i] =~ /\.gov/) || ($line[$i] =~ /\.pdf/) || ($line[$i] =~ /\.php3/) || ($line[$i] =~ /\.co/) || ($line[$i] =~ /\.be/)|| ($line[$i] =~ /\.de/)|| ($line[$i] =~ /\.doc/) || ($line[$i] =~ /\.ppt/)) { $line[$i] =~ s/wwww/ www /g; $line[$i] =~ s/\./ point /g; $line[$i] =~ s/\// slash /g; $line[$i] =~ s/:/ deux points /g; $line[$i] =~ s/-/ tiret /g; $line[$i] =~ s/_/ tiret bas /g; $line[$i] =~ s/~/ tilde /g; $line[$i] =~ s/@/ arobase /g; $line[$i] =~ s/#/ die2se /g; $line[$i] =~ s/\?/ point d' interrogation /g; $line[$i] =~ s/=/ e1gale /g; $line[$i] =~ s/([0-9]+)/ \1 /g; push(@nv_ligne, $line[$i]); } else { push(@nv_ligne, $line[$i]); } } push(@sortie, join(" ", @nv_ligne)); } else { push(@sortie, "$text"); } } return (join(" ", @sortie))." "; } |