#!/usr/bin/perl

# Public Domain filter by Eric Auer 6/2005: Read histogram data and
# create pooled versions for each given "pool definition"

# Suggested log filter calling style:
# perl histology.pl abcd efgh < fixation-histograms.log
# creates pooled-abcd.log and pooled-efgh.log

# Recommended groups:
# abcd efgh   aceg bdfh   abgh cdef   ac bd eg fh   ab cd gh ef   ag ce bh df

my @cats = @ARGV;
my @data = ((), (), (), (), (), (), (), ());
my %allkeys = ();
my $startwith = 0;	# start with fixation N+1 (exclude first N fixations)

if (($#ARGV+1) < 1) {
  print "You have to specify at least one group definition on the command\n";
  print "line. Example:    $0 abcd efgh < all-histograms.log\n";
  exit 0;
}

foreach (<STDIN>) {
  my $line = $_;
  chomp($line);
  $line =~ tr/\t/ /;
  my ($key, @values) = split(/[ ]+/,$line);
  my ($type, @base) = split(/[.]/, $key);
  my $basekey = join('.',@base);
  if ($type =~ /^[a-h]$/) {
    $type = ord($type) - ord('a');
    $data[$type]{$basekey} = join(' ',@values);
    if (($basekey ne "any.any") && ($basekey ne "image.2b")) {
      $allkeys{$basekey} = 1;
      # any.any is boring and we use EITHER image.2b OR image.2
    }
    # print STDERR "$basekey / $type / " . join(" ",@values) . "\n";
  } else {
    # discard line (e.g. percent style line)
  }
}
  
foreach $group (@cats) {
  open(LOGFILE,">pooled-$group.log") || die "Cannot write pooled-$group.log";
  # we assume that "a..." data exists for all "[a-h]..." types.
  my @elements = split(//,$group);
  for $n (0 .. $#elements) {
    $elements[$n] = ord($elements[$n]) - ord('a');	# convert to number
  }
  foreach $key (sort keys %allkeys) {
    print LOGFILE "$group.$key\t";
    print STDERR "$group.$key\n";
    my %keyhash = ();	# hash of hashes: data for each type for each column
    my %anyhash = ();	# hash of hashes: "any" for each type for each column
    my $maxcolumn = 0;
    foreach $e (0 .. 7) {
      my @somedata = split(/[ ]/,$data[$e]{$key});
      for $n (0 .. $#somedata) {
        $keyhash{$e}->{$n} = $somedata[$n];	# this item for each column
      }
      @somedata = split(/[ ]/,$data[$e]{"any.any"});
      for $n (0 .. $#somedata) {
        $anyhash{$e}->{$n} =  $somedata[$n];	# the "any.any" item for each column
      }
      $maxcolumn = $#somedata;
    }
    # print STDERR "Columns:" . ($maxcolumn+1) . "\n";
    my $n = 0;
    while ($n < $maxcolumn) {
      my $size = 0;	# sum of any-values in this column for this group
      my $sum = 0;	# sum of values in this column for this group for this key
      foreach $e (@elements) {
        $sum += $keyhash{$e}->{$n};
        $size += $anyhash{$e}->{$n};
      } # elements of a group
      if ($n < $startwith) {	# zap first N columns
        printf LOGFILE "-1 ";
      } else {
        if ($size > 4) {	# diagram only makes sense if there is enough data
          printf LOGFILE "%2.1f ", (100 * $sum / $size);
        } else {
          print LOGFILE "-1";
          $n = $maxcolumn;	# stop here
        }
      }
      $n++;
    } # columns
    print LOGFILE "\n";
  } # keys
  close(LOGFILE);
} # groups

