use strict; use locale; if ($#ARGV != 0){ die "Usage : ", $0, " LEXIQUE_REFERENCE\n"; } open(LEX, "<", $ARGV[0]) or die "impossible d'ouvrir ", $ARGV[0]; my %cats; my %flex; while (my $ligne = ){ chomp($ligne); my ($forme, $lemme, $cat) = split(/\t/, $ligne); my $c = substr($cat, 0, 1); $cats{$c}{$cat} = 1; my $ll = length($lemme); my $lf = length($forme); my $l; if ($ll < $lf){ $l = $ll; } else{ $l = $lf; } for (my $i = 0; $i <= $l; $i++){ if (substr($lemme, 0, $i) eq substr($forme, 0, $i)){ $flex{$c}{substr($lemme, $i)}{$cat} {substr($forme, $i)} ++; } } } while (my $ligne = ){ chomp($ligne); my ($lemme, $c); if ($ligne =~ /^(.*)\/(\p{Lu})$/){ $lemme = $1; $c = $2; my $l = length($lemme); for (my $i = 0; $i <= $l; $i++){ my $finale = substr($lemme, $i); if (defined($flex{$c}{$finale})){ foreach my $cat (keys %{$cats{$c}}){ if (defined($flex{$c}{$finale}{$cat})){ my @flexion = sort {$flex{$c}{$finale}{$cat}{$b} <=> $flex{$c}{$finale}{$cat}{$a};} keys %{$flex{$c}{$finale}{$cat}}; print substr($lemme, 0, $i), $flexion[0], "\t", $lemme, "\t", $cat, "\n"; } } last; } } } else{ warn $ligne, " format incorrect"; } }