#!/usr/bin/perl # Jussi Karlgren, SICS, 2003. jussi@sics.se # Use this script as you see fit. # No thanks necessary; bug reports gratefully accepted; # complaints disregarded. #--------------------------------------------------------------------- # Put data in a file with items on separate lines, # categorial information in one column and values in another; # sort file by value. # # Usage: kruskalwallis.perl -c -v # with columns numbered from 0 up. # require "getopts.pl"; &Getopts('c:v:d'); $critpos = $opt_c; $valpos = $opt_v; $debug = $opt_d; $n = 0; $k = 0; while(<>) { split; if (!($dir) && $prev && $_[$valpos] > $prev) { $dir = 1; }; if (!($dir) && $prev && $_[$valpos] < $prev) { $dir = -1; }; if ($dir < 0 && $_[$valpos] > $prev) { die "kruskal-wallis: not sorted at $n ($_[$valpos] > $prev)\n"; } elsif ($dir > 0 && $_[$valpos] < $prev) { die "kruskal-wallis: not sorted at $n ($_[$valpos] < $prev)\n"; }; $crit = $_[$critpos]; if ($_[$valpos] ne $prev) { $k = $an / $a if $a; $an = 0; $a = 0; foreach $c (keys %a) { $n{$c} += $a{$c}; $r{$c} += $k * $a{$c}; $a{$c} = 0; }; if ($debug && $n > 0) { print "$n $prev\t"; foreach $x (keys %n) { print " $x: $n{$x} $s{$x}/$n{$x} $r{$x}\t"; }; print "\n"; }; $prev = $_[$valpos]; }; $n++; $an += $n; $a++; $a{$crit}++; $s{$crit} += $_[$valpos]; $s2{$crit} += $_[$valpos]*$_[$valpos]; $sn += $_[$valpos]; $sn2 += $_[$valpos] * $_[$valpos]; }; $k = $an / $a if $a; foreach $c (keys %a) { $n{$c} += $a{$c}; $r{$c} += $k * $a{$c}; $a{$c} = 0; }; close(FN); if ($debug) { print "$n $prev\t"; foreach $x (keys %n) { print " $x: $n{$x} $s{$x}/$n{$x} $r{$x}\t"; }; print "\n\n"; }; print "Kruskal Wallis: "; print "$n cases "; if ($n) { print "(",$sn/$n," average)"; }; foreach $x (keys %n) { print ", $x: $n{$x} items "; if ($n{$x}) { print "(average ",$s{$x}/$n{$x},"; rank sum ",$r{$x},")"; }; }; print ".\n"; $df = 0; $hl = 0; foreach $x (keys %r) { if ($n{$x}) { $hl += $r{$x}*$r{$x}/$n{$x}; }; $df++; }; $H = 12*$hl/($n*($n+1))-3*($n+1) if $n; $df--; print "Kruskal-Wallis test statistic: $H - refer to khi2 tables with $df degrees of freedom.\n"; exit(0);