use strict; use locale; my $fenetre = 3; my ($n_forme, %cooc, @phrase, %freq, %infomut ); if ( $#ARGV !=0 ) { die "Usage : ", $0, " texte_étiqueté\n"; } open ( TEXTE, "<", $ARGV[0] ) or die "Impossible d'ouvrir ",$ARGV[0], " : ", $!, "\n"; while ( my $ligne = ) { chomp $ligne; my ( $forme, $etiquette, $lemme ) = split( /\t/, $ligne ); if ( $etiquette ne "SENT" ) { push ( @phrase, $lemme ); $freq{lc $lemme}++; $n_forme++; } else { for ( my $i = 0; $i <= $#phrase; $i++ ) { my $pivot = lc($phrase[$i]) ; for ( my $j = $i-$fenetre; $j <= $i+$fenetre; $j++ ) { if ( ($j >= 0) and ($j <= $#phrase) and ($j != $i) ) { my $cooccurrent= lc($phrase[$j]); $cooc{$pivot}{$cooccurrent}++; } } } @phrase = (); } } close TEXTE; foreach my $pivot ( keys %cooc ){ foreach my $cooccurrent ( keys %{$cooc{$pivot}} ){ $infomut{$pivot}{$cooccurrent} = log( ( $n_forme * $cooc{$pivot}{$cooccurrent} ) / ( $freq{$pivot}*$freq{$cooccurrent} ) ) / log(2); } } print "Quel pivot ? (X pour finir) : "; my $pivot = ; chomp $pivot; while ( $pivot ne "X" ){ if ( defined ( $infomut{$pivot} ) ) { my %infomut_pivot = %{ $infomut{$pivot} }; foreach my $cooccurrent (sort { $infomut_pivot{$b} <=> $infomut_pivot{$a} or $a cmp $b} keys %infomut_pivot) { print $cooccurrent, "\t", sprintf("%.2f", $infomut_pivot{$cooccurrent}), "\n"; } } else { print "\"",$pivot, "\" n'a pas de cooccurrents !\n"; } print "Quel pivot ? (X pour finir) : "; $pivot = ; chomp $pivot; }