use strict; use locale; if ($#ARGV != 1){ die "Usage : ", $0, " LEXIQUE_REFERENCE TEXTE_ETIQUETE\n"; } open(LEX, "<", $ARGV[0]) or die "impossible d'ouvrir ", $ARGV[0]; my %lex; while (my $ligne = ){ chomp($ligne); my ($forme) = split(/\t/, $ligne); $lex{$forme} = 1; } close(LEX); open(TEXTE, "<", $ARGV[1]) or die "impossible d'ouvrir ", $ARGV[1]; my %ouverte = ("NOM" => 1, "VER" => 1, "ADJ" => 1, "ADV" => 1); my %text; while (my $ligne = ){ chomp($ligne); my ($forme, $cat, $lemme) = split(/\t/, $ligne); $cat =~ s/:.*$//; if(defined($ouverte{$cat}) and ($lemme =~ /^\p{Ll}+$/)){ $text{lc($forme)} = 1; } } close(TEXTE); foreach my $f (keys %text){ if (not defined($lex{$f})){ print $f, "\n"; } }