#!/usr/bin/perl
# Jussi Karlgren, SICS, 2003. jussi@sics.se
# Use this script as you see fit.
# No thanks necessary; bug reports gratefully accepted;
# complaints disregarded.
#---------------------------------------------------------------------
# Put data in a file with items on separate lines,
# categorial information in one column and values in another;
# sort file by value.
#
# Usage: kruskalwallis.perl -c -v
# with columns numbered from 0 up.
#
require "getopts.pl";
&Getopts('c:v:d');
$critpos = $opt_c;
$valpos = $opt_v;
$debug = $opt_d;
$n = 0;
$k = 0;
while(<>) {
split;
if (!($dir) && $prev && $_[$valpos] > $prev) {
$dir = 1;
};
if (!($dir) && $prev && $_[$valpos] < $prev) {
$dir = -1;
};
if ($dir < 0 && $_[$valpos] > $prev) {
die "kruskal-wallis: not sorted at $n ($_[$valpos] > $prev)\n";
} elsif ($dir > 0 && $_[$valpos] < $prev) {
die "kruskal-wallis: not sorted at $n ($_[$valpos] < $prev)\n";
};
$crit = $_[$critpos];
if ($_[$valpos] ne $prev) {
$k = $an / $a if $a;
$an = 0;
$a = 0;
foreach $c (keys %a) {
$n{$c} += $a{$c};
$r{$c} += $k * $a{$c};
$a{$c} = 0;
};
if ($debug && $n > 0) {
print "$n $prev\t";
foreach $x (keys %n) {
print " $x: $n{$x} $s{$x}/$n{$x} $r{$x}\t";
};
print "\n";
};
$prev = $_[$valpos];
};
$n++;
$an += $n;
$a++;
$a{$crit}++;
$s{$crit} += $_[$valpos];
$s2{$crit} += $_[$valpos]*$_[$valpos];
$sn += $_[$valpos];
$sn2 += $_[$valpos] * $_[$valpos];
};
$k = $an / $a if $a;
foreach $c (keys %a) {
$n{$c} += $a{$c};
$r{$c} += $k * $a{$c};
$a{$c} = 0;
};
close(FN);
if ($debug) {
print "$n $prev\t";
foreach $x (keys %n) {
print " $x: $n{$x} $s{$x}/$n{$x} $r{$x}\t";
};
print "\n\n";
};
print "Kruskal Wallis: ";
print "$n cases ";
if ($n) {
print "(",$sn/$n," average)";
};
foreach $x (keys %n) {
print ", $x: $n{$x} items ";
if ($n{$x}) {
print "(average ",$s{$x}/$n{$x},"; rank sum ",$r{$x},")";
};
};
print ".\n";
$df = 0;
$hl = 0;
foreach $x (keys %r) {
if ($n{$x}) {
$hl += $r{$x}*$r{$x}/$n{$x};
};
$df++;
};
$H = 12*$hl/($n*($n+1))-3*($n+1) if $n;
$df--;
print "Kruskal-Wallis test statistic: $H - refer to khi2 tables with $df degrees of freedom.\n";
exit(0);