#!/usr/bin/perl

# Public Domain pipe filter by Eric Auer 6/2005 - 7/2005:
# Read DPIFILTER output and squeeze all sorts of statistics out
# of the data and pretty-print them. Write several log files.

# Suggested log filter calling style:
# cat *.flt | perl analyze-dpi-trials.pl

# DPIFILTER output format is a bit exotic:
# Line format 1: MSG\ttimestamp " "s tag " "s 0 or more " "s-sep. value(s).
# Line format 2: EFIX\ttimestamp " "s-separated values.



# Maybe add more statistical things? For fixations, only duration is
# usually interesting. Regression path completely ignores non-text
# fixations so far. First pass for non-text regions is calculated as
# first inspection for now. Only text fix can stop text first pass.



my $SORTBY = "region";		# "region" or "type", you can re-sort by type
				# later simply by using the sort textutil...

my $SUMMARIZE = 1;		# create summary description of each trial
	# in a separate log file - includes mini-lists-of-fixations

my $LOGTRIALS = 0;		# SHOW answer and fixation counts per trial
		# 0		# includes verbose "trial id" and button/time

my $SPSSOUTPUT = 1;		# write per-trial values of each measure to
				# a separate log file in SPSSish format...
	# *** Question: Is this also okay for Openstat and R-Project?
	# *** Hint: use sort -n to sort the log file by subj. / trial number

my $HISTOGRAM = 1;		# create histogram: how many Nth fixations
	# were on region ...? One long line output per region... extra file.
my $HISTDEPTH = 99;		# output histogram for at most first N fix

my $CHECKEFFECTS = 2;		# SHOW list of significant effects within
		# 2		# all useful combinations of measures
	# 0 is "not at all", 1 "along binary split", 2 "along 4-way split",
	# 3 "along 8-way split" (recommended setting: 1 or 2)...

my $LATEXEFFECTS = 1;		# generate LaTeX code for significant
				# effects along "more interesting" contrasts
	# LaTeX: new 7/2005. Only enabled if CHECKEFFECTS is on. Verbosity
	# level is selected by CHECKEFFECTS, but level 3 is ignored here...

my $SEMIMAGE = 2;		# use agent / ambiguous / patient image
	# "numbers" instead of 1/2/3 left-to-right for first-pass analysis
        # and for summary log. agt / 2 / pat can be either 1/2/3 or 3/2/1.
        # Even if this is set to 1, most measures are still available for
        # image.1/2/3 in addition to the image.agt/.pat measures :-).
	# Set to 2 to additionally calculate the BASIC measures for the
	# "text agent" and "text patient" and "not mentioned" IMAGE regions.

my $KILLWRONG = 2;		# separate trials with wrong button answer
	# common setting: 1. if set to 2: check for further differences
	# between right- and wrong-answered trials...

my $KILLZERO = 0;		# exclude "0 fixations to ... this trial"
				# from statistics. common setting  is 1 (?).

my $KILLBROKEN = 1;		# remove trials with too much track loss
				# common setting: 0, and redo such trials
				# at once during the experiment instead.

my $lossthreshold = 250;	# treat fixation gaps of e.g. 150+ msec as loss
my $endlossthreshold = 250;	# treat fixation gaps of e.g. 250+ msec as loss
my $brokenfraction = 0.25;	# serious track loss "percentage" threshold

# e.g. 100: 536 at end, 188 in between (longer timespans w/o fixations)
#      125: 478          93	#      150: 414          88
#      200: 318          69	#      250: 231          61
#      300: 168          58	#      400: 106          56
#      500:  76          47	#      600:  61          41
#      800:  32          29	#     1600:  12          17
# recommended value: 150 msec, enough to accept a "hole" created by a short
# (less than 60 msec) removed fixation and the saccades around it, and 250
# msec for endlossthreshold (extra time allowed to press the button)...
# recommended brokenfraction is 0.25, but other values 0.15 ... 0.35 can
# make sense as well. 
# people normally blink ca. 15 times per minute for typ. 50-100 msec, 2-3x
# more if they are sleepy... blinks reflexes to airpuffs are e.g. 1/4-1/2
# (typ. 1/3) sec long. short blinks are just merged with "their" fixations
# in earlier steps of the analysis anyway.
# saccades are typically less than 100 msec in static-stimulus context...

my $removeshortfix  =  0;	# remove too short fixations (below ??? msec)
	# *** preferred method is to use a pool-or-remove system which
	# *** merges short fix with longer fix at similar coordinates...
	# *** however, setting removeshortfix to e.g. 333, 375 or 400 can
	# *** be useful to "wash the data further" / find effects better.
# open(FOO,">foolog");



my %firstinspection = ();	# flags for first inspection detection
my $inspectwhat = "none";	# for first inspection detection
my @regressionpath = (-1, -1, -1, -1);	# for regression path measurement

my %localstats = ();		# collects data for 1 trial
my %localhisto = ();		# preliminary fixation count: ignored if
		# a trial is wrong-answer or track-loss, else moved to histo

my $ttype = "";			# global var: type of current trial: "a" ... "h"

# categories for fixation target regions. fixation duration / count for
# non-fixation categories: button.any (timestamps)
# each region category are flushed to the global stats after each trial.
	# 93% of 1st fixations are on image.2 due to experiment design.
	# so image.2b is "image.2 but ignoring 1st fixation of trial"...
my @regions;
if ($SEMIMAGE > 0) {
  if ($SEMIMAGE > 1) {
    @regions = ("black.any", "image.any", "image.agt", "image.2", "image.pat",
      "text.any", "text.1np1", "text.2verb", "text.3adv", "text.4np2",
      "any.any", "image.1", "image.2b", "image.3",
      "image.ta", "image.tp", "image.tx");
  } else {
    @regions = ("black.any", "image.any", "image.agt", "image.2", "image.pat",
      "text.any", "text.1np1", "text.2verb", "text.3adv", "text.4np2",
      "any.any", "image.1", "image.2b", "image.3");
  }
} else {
  @regions = ("black.any", "image.any", "image.1", "image.2", "image.3",
    "text.any", "text.1np1", "text.2verb", "text.3adv", "text.4np2",
    "any.any", "image.2b");
}

my @measures = ("t", "n", "1st", "pass1t", "pass1n", "rpatht", "rpathn");
	# measures for each region: fixation count, fixation duration sum,
	# first fix duration, first pass duration, first pass fix count,
	# regression path duration / fix count (r.p. only for text regions).

	# region- and measure-names for prettyprinting in LaTeX (7/2005)
my %prettyregions = ("button", "button", "any", "anywhere",
  "black", "background", "image", "any picture", "text", "any word",
  "1", "left pic.", "2", "middle pic.", "3", "right pic.",
  "agt", "agent pic.", "2b", "middle' pic.", "pat", "patient pic.",
  "ta", "subject pic.", "tp", "object pic.", "tx", "other pic.",
  "1np1", "first NP", "2verb", "verb",
  "3adv", "adverb", "4np2", "second NP");

my %prettymeasures = ("t", "time", "n", "\\# of fix.", "1st", "1st fix. time",
  "pass1t", "1st pass time", "pass1n", "1st pass \\#",
  "insp1t", "1st ins. time", "insp1n", "1st ins. \\#",
  "rpatht", "r-path time", "rpathn", "r-path \\#");



sub collectregressionpath {	# collect regression path data for key, value
  my ($key,$value) = @_;
  my $n = $key;
  $n =~ s/text.//;
  $n =~ s/[a-z].*//;		# $n is now only the number: 1 .. 4
  $n -= 1;			# make 0-based value

  my $m = $n - 1;
  while ($m > -1) {		# stop count for all regions to the left
    $regressionpath[$m] = 0;
    $m--;
  }

  if ($regressionpath[$n] == 0) {	# after end of regression path?
    return;
  }

  if ($regressionpath[$n] == -1) {	# first visit?
    $regressionpath[$n] = 1;		# start running
  }

  my @rpathkeys = ("1np1", "2verb", "3adv", "4np2");
  for $m (0 .. 3) {
    if ($regressionpath[$m] == 1) {	# do all (!) "running" regions
      $localstats{"text." . $rpathkeys[$m] . ".rpathn"}++;
      $localstats{"text." . $rpathkeys[$m] . ".rpatht"} += ($value + 0);
    }
  } # update all active regression path regions
} # collectregressionpath



sub collectfirstpass {	# collect first pass data for key, value
  my ($key,$value) = @_;

  if ($inspectwhat eq $key) {	# accumulate first pass data

    if (defined($firstinspection{$key}) &&
      ($firstinspection{$key} == 1) ) {	# accumulate data
      $localstats{"" . $key . ".pass1n"}++;
      $localstats{"" . $key . ".pass1t"} += ($value + 0);
    } else {
      # if firstinspection not defined:
      # just first inspection, usually less interesting than first pass
      # if firstinspection not 1:
      # this is part of the Nth inspection
    }

  } else {			# possible start of a first pass inspection

    $inspectwhat = $key;
    if (defined($firstinspection{$key})) {	# "second pass", ignore
      # already had a 1st inspection on that one. this is a 2nd inspection!
      $firstinspection{$key}++;		# FLAG as second inspection start!
					# to avoid re-entering "FIRST" mode
      # we could start collecting data about Nth inspection here
    } else {
      # "first pass" is "first inspection but only if not already skipped",
      # which we only test for for text regions
      my $isfirst = 1;
      if ( ($key eq "text.1np1") && ( defined($firstinspection{"text.2verb"})
        || defined($firstinspection{"text.3adv"})
        || defined($firstinspection{"text.4np2"}) ) ) {
        $isfirst = 0; # 1st region already skipped: 2nd, 3rd or 4th touched
      }
      if ( ($key eq "text.2verb") && ( defined($firstinspection{"text.3adv"})
        || defined($firstinspection{"text.4np2"}) ) ) {
        $isfirst = 0; # 2nd region already skipped: 3rd or 4th touched
      }
      if ( ($key eq "text.3adv") && defined($firstinspection{"text.4np2"}) ) {
          $isfirst = 0; # 3rd region already skipped: 4th touched
      }
      if ($isfirst > 0) {
        $firstinspection{$key} = 1;	# actually starting a first pass
        $localstats{"" . $key . ".pass1n"} = 1;
        $localstats{"" . $key . ".pass1t"} = ($value + 0);
      } else {
        # we could collect first inspection data here...
        # usually less interesting than first pass
      }
    } # potential first pass

  } # fixation target changed
} # collectfirstpass



sub localpoint { # add fixation target , duration to in-trial local "statistics"
		# never called with value 0
  my ($key,$value,$idx) = @_;

  return if ($value < $removeshortfix);	# optionally REMOVE SHORT FIXATIONS

  if (($key =~ /[.]any/) || ($key eq "image.2") ||
    (($SEMIMAGE > 0) && (($key eq "image.1") || ($key eq "image.3"))) ) {
    # collect any.any image.any and text.any "first pass" separately,
    # but better do not collect it at all...
    # we must separate image.2, too, as it would get in the way for
    # the image.2b analysis otherwise :-(.
    # if SEMIMAGE (image.agt, image.pat) is in use, first pass data
    # is collected for those but not for image.1 and image.3...
  } else {
    if ($key =~ /^image[.]t.$/) {
      # NEVER collect "first pass" info for 2ND LEVEL "semimage" regions
      # (image.ta image.tp and image.tx) because that
      # would interfere with 1st level "semimage" region processing...
    } else {
      collectfirstpass($key, $value);	# collect first pass / first insp data
    }
  }

  if ($key =~ /^text.[1-4]/) {			# any specific text region?
    collectregressionpath($key, $value);	# do regression path measure
  }

  if ($localstats{"" . $key . ".n"} == 0) {	# 1st fixation for this region
    $localstats{"" . $key . ".1st"} = ($value + 0);
  }
  $localstats{"" . $key . ".n"}++;
  $localstats{"" . $key . ".t"} += ($value + 0);
  # print FOO "(" . $key . ")+=" . $value . "\n";

  return if ($HISTOGRAM < 1);	# we are done unless we want a histogram

  my $y1 = "*." . $key . "." . $idx; # data from any trial
  my $y2 = "" . $ttype . "." . $key . "." . $idx; # for per-region per-category...
  # localhisto keys are unique, so the following test would never trigger:
  # if (defined($localhisto{$y1}) || defined($localhisto{$y2})) { die "2?"; }
  $localhisto{$y1} = 1;	# count all $idx-th fixations on this region
  $localhisto{$y2} = 1;	# count the $key category/region $idx-th fix
} # localpoint



my %counts = ();		# data point count for statistics
my %sums = ();			# data point sum for statistics
my %squares = ();		# data point square sum for statistics
my %variances = ();		# for further analysis
my %averages = ();		# for further analysis
my %histo = ();			# counts of Nth fixations per region



sub addpoint {	# add key , value to average / standard deviation storage
  my ($x, $key,$value) = @_;	# is is "" for right or "x" for wrong answer
  $value = $value + 0;	# force to numerical
  my $fullkey = "" . $x . $ttype . "." . $key;

  return if (($KILLZERO > 0) && ($value < 1));	# whether to use empty items
	# empty items can be "0 fixations" or "0 msec of fixations".


  if (!defined($counts{$fullkey})) {
    $counts{$fullkey} = 0;
    $sums{$fullkey} = 0;
    $squares{$fullkey} = 0;
  }
  $counts{$fullkey} = $counts{$fullkey} + 1;
  $sums{$fullkey} = $sums{$fullkey} + $value;
  $squares{$fullkey} = $squares{$fullkey} + ($value * $value);
  # print FOO "[" . $key . "]<-" . $value. "\n";

  addpointgroup($x . "*." . $key, $value);	# group "anything"

  if ($ttype =~ /[abcd]/) {
    addpointgroup($x . "2yes." . $key, $value);	# group "a-d": YES, match
    # in x case: user hit "no" while correct answer would have been "yes"...
  } else {
    addpointgroup($x . "3non." . $key, $value);	# group "e-h": NO, nomatch
  }

  if ($ttype =~ /[aceg]/) {
    addpointgroup($x . "0svo." . $key, $value);	# group "aceg": SVO
  } else {
    addpointgroup($x . "1ovs." . $key, $value);	# group "bdfh": OVS
  }

  if ($ttype =~ /[cdef]/) {
    addpointgroup($x . "4toR." . $key, $value);	# group "cdef": to the right
  } else {
    addpointgroup($x . "5toL." . $key, $value);	# group "abgh": to the left
  }

  # maybe useful others: all 3 "split into 2 planes with 4 cases each"...
  # ltr-yes, ltr-non, rtl-yes, rtl-non,
  # svo-ltr, svo-rtl, ovs-ltr, ovs-rtl,
  # ovs-yes, ovs-non, svo-yes, svo-non.
  # a: left svo yes   b: left ovs yes   c: right svo yes   d: right ovs yes
  # e: right svo no   f: right ovs no   g: left svo no     h: left ovs no
  # svo/ovs and yes/no:
  if ($ttype =~ /[ac]/) { addpointgroup($x . "p1sy." . $key, $value); }
  if ($ttype =~ /[bd]/) { addpointgroup($x . "p2oy." . $key, $value); }
  if ($ttype =~ /[eg]/) { addpointgroup($x . "p3sn." . $key, $value); }
  if ($ttype =~ /[fh]/) { addpointgroup($x . "p4on." . $key, $value); }

  # to-left/to-right and yes/no:
  if ($ttype =~ /[cd]/) { addpointgroup($x . "q1ry." . $key, $value); }
  if ($ttype =~ /[ab]/) { addpointgroup($x . "q2ly." . $key, $value); }
  if ($ttype =~ /[ef]/) { addpointgroup($x . "q3rn." . $key, $value); }
  if ($ttype =~ /[gh]/) { addpointgroup($x . "q4ln." . $key, $value); }

  # to-left/to-right and svo/ovs:
  if ($ttype =~ /[ce]/) { addpointgroup($x . "r1rs." . $key, $value); }
  if ($ttype =~ /[ag]/) { addpointgroup($x . "r2ls." . $key, $value); }
  if ($ttype =~ /[df]/) { addpointgroup($x . "r3ro." . $key, $value); }
  if ($ttype =~ /[bh]/) { addpointgroup($x . "r4lo." . $key, $value); }
} # addpoint



sub addpointgroup {	# add key , value to average / standard deviation
			# only called with value 0 if addpoint allowed it
  my ($key,$value) = @_;
  $value = $value + 0;	# force to numerical
  if (!defined($counts{$key})) {
    $counts{$key} = 0;
    $sums{$key} = 0;
    $squares{$key} = 0;
  }
  $counts{$key} = $counts{$key} + 1;
  $sums{$key} = $sums{$key} + $value;
  $squares{$key} = $squares{$key} + ($value * $value);
  # print FOO "[" . $key . "]<-" . $value. "\n";
} # addpointgroup



sub sortby {	# by region: as opposed to sort by trial type which is the
		# effect of a default "sort by key string" sort
  return ($a cmp $b) if ($SORTBY ne "region");
  ($key1t, @key1) = split(/[.]/, $a);	# type.regiongroup.region.measure
  $key1t = (join('.',@key1) . "." . $key1t);
  ($key2t, @key2) = split(/[.]/, $b);	# must use /[.]/ syntax, not '.' ...
  $key2t = (join('.',@key2) . "." . $key2t);
  # print "COMPARE: " . $key1t . " / " . $key2t . "\n";
  return ($key1t cmp $key2t);
} # sortby



sub tcrit05 { # find critical t value for given degrees of freedom
  my ($df) = @_;	# 0.05 significance version
  my @tcritlist05 = (
     999, 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262,
    2.228, 2.201, 2.179, 2.160, 2.145, 2.131, 2.120, 2.110, 2.101, 2.093,
    2.086, 2.080, 2.074, 2.069, 2.064, 2.060, 2.056, 2.052, 2.048, 2.045,
    2.042, 2.021, 2.000, 1.980, 1.960 );
  if ($df < 35) { return $tcritlist05[$df]; }
  return $tcritlist05[34];
} # tcrit05



sub tcrit01 { # find critical t value for given degrees of freedom
  my ($df) = @_;	# 0.01 significance version
  my @tcritlist01 = (
     999, 63.657, 9.925, 5.841, 4.604, 4.032, 3.707, 3.499, 3.355, 3.250,
    3.169, 3.106, 3.055, 3.012, 2.997, 2.947, 2.921, 2.898, 2.878, 2.861,
    2.845, 2.831, 2.819, 2.807, 2.797, 2.787, 2.779, 2.771, 2.763, 2.756,
    2.750, 2.704, 2.660, 2.617, 2.576 );
  if ($df < 35) { return $tcritlist01[$df]; }
  return $tcritlist01[34];
} # tcrit01



sub tcrit001 { # find critical t value for given degrees of freedom
  my ($df) = @_;	# 0.001 significance version
  my @tcritlist001 = (
    999, 639.619, 31,598, 12.941, 8.610, 6.859, 5.595, 5.405, 5.041, 4.781,
    4.587, 4.537, 4.318, 4.221, 4.140, 4.073, 4.015, 3.965, 3.992, 3.773,
    3.850, 3.819, 3.792, 3.767, 3.745, 3.725, 3.707, 3.690, 3.674, 3,659,
    3.646, 3.551, 3.460, 3.373, 3.291 );
  if ($df < 35) { return $tcritlist001[$df]; }
  return $tcritlist001[34]; # tcrit for significance 0.001 for df=34
} # tcrit001



sub teststats {	# check a pair measures for significant differences, t-test.
	# Arguments: type1, type2, full measure name, right-or-wrong, show
        # show can be 0 (return significance and sign only), 1 (display) or
        # -1 (return abs(t value) only)

	# *** might be nice to do some ANOVA to compare 4- and 8-type
	# groups: http://www.psychstat.smsu.edu/introbook/sbk27m.htm
	# --> from http://www.psychstat.smsu.edu/introbook/sbk00.htm -->
	# "Introductory statistics: Concepts, models, and applications"
	# 1996-1998 by David W Stockburger, SW Missoury State University

  my ($t1, $t2, $measurename, $right, $show) = @_;
  my $k1;
  my $k2;

  if ($right eq "both") {
    $k1 = "" . $t1 . "." . $measurename;
    $k2 = "x" . $t2 . "." . $measurename;
  } else {
    $k1 = "$right" . $t1 . "." . $measurename;
    $k2 = "$right" . $t2 . "." . $measurename;
  }

  if (defined($variances{$k1}) && defined($variances{$k2})) {
    # do two-tailed t test on the two statistics:
    my $n1 = $counts{$k1};
    my $n2 = $counts{$k2};
    my $standarderror = ( ( ($variances{$k1}*($n1-1)) +
      ($variances{$k2}*($n2-1)) ) / ($n1+$n2-2) ) * ( (1/$n1) + (1/$n2) );
    $standarderror = sqrt($standarderror);
    my $tobserved = 0;
    if ($standarderror > 0) {
      $tobserved = ($averages{$k1} - $averages{$k2}) / $standarderror;
    }
    if (abs($tobserved) > tcrit05($n1+$n2-2)) {
      # we just show the abs(tobserved) as the sign is boring to know...
      my $sig = 0.05;	# the weakest significance that we check for
      if (abs($tobserved) > tcrit01($n1+$n2-2)) {
        $sig = 0.01;	# nice, we found a BETTER significance :-)
        if (abs($tobserved) > tcrit001($n1+$n2-2)) {
          $sig = 0.001;	# this effect is really blatantly big...
        }
      }
      if ($show > 0) {
        if ($sig == 0.001) {
          printf  "** TWO, t=%5.3f (sig. %1.3f) ", abs($tobserved), $sig;
        } else {
          if ($sig == 0.01) {
	    print "*. ";
	  } else {
	    print ".. ";
	  }
          printf "TWO, t=%5.3f (sig. %1.2f) ", abs($tobserved), $sig;
        }
        print "$right/$measurename: $t1=";
	my $dir = "-->";
	$dir = "<--" if ($averages{$k1} < $averages{$k2});
        printf "%5.3f %s %s=%5.3f\n", $averages{$k1}, $dir, $t2, $averages{$k2};
        print "  n=$n1 / $n2, stddev=";
        printf "%5.3f / %5.3f\n", sqrt($variances{$k1}), sqrt($variances{$k2});
      }
      if ($show < 0) {
        return (abs($tobserved));
      }
      if ($averages{$k1} > $averages{$k2}) {
        return $sig;
      } else {
        return (0 - $sig);
      }
    } # if significant
    else {
      # printf STDERR "NO EFFECT: %s vs. %s, t=%5.3f\n", $k1, $k2, $tobserved;
    }
  } # if data present
  return 0;
} # teststats



sub latex2way {	# output significance of 2-way difference as custom TeX code
	# arguments: measure name, right-or-wrong, 2 types
  my ($measure, $right, $type1, $type2) = @_;
  my $significance = -teststats($type1, $type2, $measure, $right, 0);
	# we expect our value to be smaller for type1
  my $dir1 = "";
  my $dir2 = "";
  if ($significance < 0) {	# unexpected direction?
    $dir1 = "\\underline{";	# then emphasize (no space for prefix "-")
    $dir2 = "}";
  }
  $significance = abs($significance);
  if ($significance == 0) {
    return "\\nNUL";
  }
  if ($significance < 0.01) {	# is it 0.001?
    return "$dir1" . "\\nMAX$dir2";
  }
  if ($significance < 0.05) {	# is it 0.01?
    return "$dir1" . "\\nMED$dir2";
  }
  return "$dir1" . "\\nMIN$dir2";	# else it is 0.05
} # latex2way (new 7/2005)



sub teststats4way { # compare 4 statistics items in various ways
		# arguments: measure name, right-or-wrong, 4 types
  my ($measure, $right, @types4) = @_;
  my @results4 = (0, 0, 0, 0, 0, 0);
  my @left4  = (0, 0, 0, 1, 1, 2);	# left hand comparison partner
  my @right4 = (1, 2, 3, 2, 3, 3);	# right hand comparison partner
  my $verbose4 = ($CHECKEFFECTS - 2);	# go verbose if even 8-way is on
  my $y = 0;

  for $x (0 .. 3) {
    if (!defined($averages{"$right" . $types4[$x] . "." . $measure})) {
      $y++;
      # print "INCOMPLETE: $right" . $types4[$x] . "." . $measure . "\n";
      # usually happens only if all 4 values are not available anyway...
    }
  }
  if (($y > 0) && ($y < 4)) {
    print "FOURWAY: $y of 4 items missing for $right" . "/$measure\n";
    # should never happen. could even use "die" here.
  }
  return if ($y > 0);	# cannot do the full 4 way test

  my @sort4 = sort { ($averages{"$right" . $a . "." . $measure} <=>
    $averages{"$right" . $b . "." . $measure}) } @types4;
  # *** ALTERNATIVELY: skip the sort. "my @sort4 = @types4;"...
  # *** Results are easier to read for humans after sorting, because only
  # *** ONE direction significant differences are in results4 with sort.

  my @names4 = ("00", "01", "10", "11");
  for $x (0 .. 3) {
    $names4[$x] = $sort4[$x];
    $names4[$x] =~ s/^..//g;	# remove first 2 chars which are only log sort
				# hints, see the list of addpointgroup calls!
  }

  $y = 0;
  my $z = 0;	# check how strong the strongest effect is
  for $x (0 .. 5) {
    $results4[$x] = -teststats($sort4[$left4[$x]], $sort4[$right4[$x]],
      $measure, $right, $verbose4);
    $y++ if ($results4[$x] ne 0);
    $z=1 if (($results4[$x] == 0.01) && ($z < 1));
    $z=2 if ($results4[$x] == 0.001);
  }
  return if ($y == 0);	# no effects anywhere

  my @fulldiff4 = (0, 0, 0);
  $fulldiff4[0] = $results4[0] * $results4[1] * $results4[2]; # 0 < 123
  $fulldiff4[1] = $results4[1] * $results4[3] * $results4[4]; # 01 < 23
  $fulldiff4[2] = $results4[2] * $results4[4] * $results4[5]; # 012 < 3
  my @directdiff4 = ($results4[0], $results4[3], $results4[5]);
    # 1/2 2/3 and 3/4

  if ($z == 2) {	# show hint about effect strength
    print   "** ";
  } else {
    if ($z == 1) {
      print "*. ";
    } else {
      print ".. ";
    }
  }

  print "FOUR " . $right . "/" . $measure . ":";
  for $x (0 .. 3) {
    print " " . $names4[$x] . "=";
    printf "%5.3f", $averages{"$right" . $sort4[$x] . "." . $measure};
    if ($x < 3) {
      if ($fulldiff4[$x] ne 0) {
        print " <<<";	# all previous values differ from all next values
      } else {
        if ($directdiff4[$x] ne 0) {
          print " -/-";	# value differs from next value
          # could somehow show "short-distance but no long-distance diff"
          # (which can happen if the long-distance item varies a lot)
        } else {
          print " ---";	# no local differences
          # could somehow show "long-distance but no short-distance diff"
        }
      }
    } # show in-between analyisis
  } # list values

  print "\n  how: [";
  $z = 0;	# re-use variable to create effect bit mask
  for $x (0 .. 5) {
    if ($results4[$x] == 0) {
      print "0";
    } else {
      $z += (1 << $x);
      if ($results4[$x] > 0) {
        print "+";
      } else {
        print "-";
      }
    }
  } # short sign list (for comparisons 12 13 14 23 24 34)
  print "/$z] ";

  for $x (0 .. 5) {
    print " " . $names4[$left4[$x]] . "" . $names4[$right4[$x]] . "=";
    if ($results4[$x] == 0) {
      print "0";
    } else {
      if ($results4[$x] < 0.01) {
        printf "%1.3f", $results4[$x];
      } else {
        printf "%1.2f", $results4[$x];
      }
    }
  } # longer significance list

  # list the long-distance effects, unless already
  # implied by short-distance or obvious effects:
  my $long4 = 0;
  if ( ($fulldiff4[0] == 0) && ($fulldiff4[1] == 0) ) {
    if ($results4[1] ne 0) { $long4 += 1; } # 1st/3rd
    if (($fulldiff4[2] == 0) && ($results4[2] ne 0)) { $long4 += 2; } # 1st/4th
  }
  if ( ($fulldiff4[1] == 0) && ($fulldiff4[2] == 0) && ($results4[4] ne 0) ) {
    $long4 += 4; # 2nd/4th
  }
  my @long4texts = ("", "  1st/3rd", "  1st/4th", "  1st/3rd..4th",
    "  2nd/4th", "  1st/3rd 2nd/4th", "  1st..2nd/4th", "  1st 2nd 3rd 4th");
  print "" . $long4texts[$long4];
  print "\n";

  if ($verbose4 < 1) {		# unless we already showed all full 6 t tests
    print "  tvalue: ";
    for $x (0 .. 5) {
      my $t = teststats($sort4[$left4[$x]], $sort4[$right4[$x]],
        $measure, $right, -1);
      if ($t ne 0) {
	# show t value for significant contrast pair
        printf " %s/%s=%3.3f", $names4[$left4[$x]], $names4[$right4[$x]], $t;
      } else {
	# do not show t values for insignificant pairs at all
        # printf " %s/%s=0", $names4[$left4[$x]], $names4[$right4[$x]];
      }
    } # loop over comparisons
    print "\n";
  } # show t values and pairs for each t test

  print "  count/stddev: ";
  for $x (0 .. 3) {
    print "" . $names4[$x] . "=";
    printf "%i/%5.3f", $counts{"$right" . $sort4[$x] . "." . $measure},
      sqrt($variances{"$right" . $sort4[$x] . "." . $measure});
    print ", " if ($x < 3);
  } # show counts and standard deviations for all 4 items
  print "\n";

} # teststats4way



sub latex4way { # compare 4 statistics items in various ways
		# arguments: measure name, right-or-wrong, 4 types
  my ($measure, $right, @types4) = @_;
  my @results4 = (0, 0, 0, 0, 0, 0);
  my @left4  = (0, 0, 0, 1, 1, 2);	# left hand comparison partner
  my @right4 = (1, 2, 3, 2, 3, 3);	# right hand comparison partner
  my $y = 0;
  my $rets = "";			# to-be-returned string

  for $x (0 .. 3) {
    if (!defined($averages{"$right" . $types4[$x] . "." . $measure})) {
      $y++;
    }
  }
  if (($y > 0) && ($y < 4)) {
    print "FOURWAY: $y of 4 items missing for $right" . "/$measure\n";
    # should never happen. could even use "die" here.
  }
  return "n/a" if ($y > 0);	# cannot do the full 4 way test

  my @sort4 = sort { ($averages{"$right" . $a . "." . $measure} <=>
    $averages{"$right" . $b . "." . $measure}) } @types4;

  my @names4 = ("00", "01", "10", "11");
  for $x (0 .. 3) {
    $names4[$x] = $sort4[$x];
    $names4[$x] =~ s/^..//g;	# remove first 2 chars: e.g. "sy" now
    $names4[$x] =~ tr/a-z/A-Z/;
    substr $names4[$x], 1, 0, "/";	# e.g. "S/Y" now
  }

  # $y = 0;
  for $x (0 .. 5) {
    $results4[$x] = abs(teststats($sort4[$left4[$x]], $sort4[$right4[$x]],
      $measure, $right, 0));
    # $y++ if ($results4[$x] > 0);
  }
  # return "nothing significant" if ($y == 0);	# just boring data, BUT
  # TWO-way results for the same data can STILL have significant effects!

  my @fulldiff4 = (0, 0, 0);	# each nonzero if three tests are all nonzero
  $fulldiff4[0] = $results4[0] * $results4[1] * $results4[2]; # 0 < 123
  $fulldiff4[1] = $results4[1] * $results4[3] * $results4[4]; # 01 < 23
  $fulldiff4[2] = $results4[2] * $results4[4] * $results4[5]; # 012 < 3
  my @directdiff4 = ($results4[0], $results4[3], $results4[5]);
    # 1/2 2/3 and 3/4

  $rets = "\$";
  for $x (0 .. 3) {
    $rets .= " {\\mathrm{" . $names4[$x] . "} \\atop ";
    $y = $averages{"$right" . $sort4[$x] . "." . $measure};
    if ($y < 9.9995) {
      $rets .= sprintf "%5.3f", $y;	# 0.001 to 9.999
    } else {
      if ($y < 99.995) {
        $rets .= sprintf "%5.2f", $y;	# 10.00 to 99.99
      } else {
        if ($y < 999.95) {
          $rets .= sprintf "%5.1f", $y;   # 100.0 to 999.9
        } else {
          $rets .= sprintf "%5.0f", $y;   # 1000 to 99999
        }
      }
    }
    $rets .= "} ";

    if ($x < 3) {	# insert difference-markers
      $rets .= "\n\t" if ($x == 1);	# linebreak after 2nd of 4 values
      if ($fulldiff4[$x] ne 0) {
        # all previous values differ from all next values
        $rets .= "\\prec";		# "boundary" between values
      } else {
        if ($directdiff4[$x] ne 0) {
          $rets .= "\\triangleleft";	# local value difference (rare case)
        } else {
          $rets .= "\\approx";		# values are the approx. the same
        }
      }
    } # show in-between analyisis
  } # list values

  return $rets . "\$";
} # latex4way (new 7/2005)



		# *** MAIN PROGRAM FOLLOWS ***
		# ----------------------------



my %missingmeasures = ();	# measures which we failed to calculate

foreach $s (@regions) {  # ...  image.2 ... text.np1 text.verb ...
   foreach $m (@measures) { # n t 1st ...
     my $key = "" . $s . "." . $m;
     if ($m =~ /rpath/) {
       if ($s =~ /^text.[1-4]/) {
         # regression path exists for specific text regions ONLY
       } else {
         $missingmeasures{$key} = 1;
       }
     } # rpath
     if ($m =~ /pass1/) {
       if ( ($s =~ /[.]any/) || ($s eq "image.2") ) {
         # we have NO 1st pass for image.any, text.any, any.any, black.any
         # image.2 (but for image.2b) for first-pass duration / fixcount
         $missingmeasures{$key} = 1;
       }
       if ( ($SEMIMAGE > 0) &&
         ( ($s eq "image.1") || ($s eq "image.3") ) ) {
         # for SEMIMAGE, we have image.agt/.pat but NOT image.1/3 1st pass
         $missingmeasures{$key} = 1;
       }
       if ($s =~ /^image[.]t.$/) {
         # the image.ta/.tp/.tx semimage 2 regions all have NO 1st pass info!
         $missingmeasures{$key} = 1;
       }
     } # pass1
   } # measures
} # regions



if ($SUMMARIZE > 0) {
  open(SUMMARY,">trial-summaries.log")
    || die "Cannot write trial summaries\n";
}

if ($SPSSOUTPUT > 0) {
  open(SPSSFILE,">per-trial-measures.log")
    || die "Cannot export per trial measures to file\n";
}

print STDERR "Reading eyetracker data from stdin...\n";
if ($removeshortfix > 9) {
  print STDERR "Fixations below $removeshortfix msec are skipped\n"
}



{	# +++ start of variable scope for file read related things
# keeping track of the context:
my $who = "nobody 0 0";		# current subject and set / script numbers
my $basetime = 0;		# last synctime, for fixations and button
my $lastfix = -1;			# END time of previous fixation
my $findex = -1;			# number of a fixation in a trial
my $brokentrial = 0;		# error sum
my $brokentrials = 0;		# count of removed trials

# from trialid lines:
my $where = "notrial";		# current trial id (for INFO, TRIAL, ENDBUTTON)
my $tnumber = 0;		# item number, 0 ... 24 (used in ENDBUTTON)
my $tmatch = "y/n";		# m or n for match or nomatch (used in ENDBUTTON)

# from info word lines:
my $wmax = -1;			# highest word number for current trial

# statistics about the where of the fixations:
my $fixcounti = 0;		# image fixation count for current trial
my $fixcountw = 0;		# word fixation count for current trial
my $fixcount0 = 0;		# background fixation count for current trial



foreach (<STDIN>) {
  my $line = $_;
  chomp($line);
  ($category, $timestamp, $tag, @values) = split(' ',$line);
  $timestamp = $timestamp + 0;		# make numeric

  if ($category eq "EFIX") {		# category: fixation

    my $fstart;				# start time
    my $fend;				# end time
    my $fduration;			# duration

					# data about the where of a fixation:
    my $fwnum = -1;			# word number, -1 if nowhere
    my $finum = -1;			# image object number, -1 if nowhere

    $findex++;				# number of this fixation in trial
    $fstart = $timestamp - $basetime;	# synctime-relative
    $fend = $tag + 0;			# make numeric: end time (no real tag)
    $fend = $fend - $basetime;		# synctime-relative
    $fduration = $values[0] + 0;	# make numeric
    # ignoring values 1 and 2, which are the X and Y coordinate

    my $gaplen = $fstart - $lastfix;
    if ($gaplen > $lossthreshold) {	# too long between fix
      if ( ($SUMMARIZE > 0) && ($gaplen > (3.0 * $lossthreshold)) ) {
        print SUMMARY "+++";
      }
      $brokentrial += $gaplen;
      if ($SUMMARIZE > 0) {
        print SUMMARY "GAP1=" . ($fstart - $lastfix);
        print SUMMARY "_(" . $lastfix . ") ";	# ... . " to " . $fstart;
      }
    }

    my $fichr = "";
    if (($values[3] eq 0) && ($values[4] eq "black")) {
      $fwnum = -1;
      $finum = -1;
      $fixcount0++;
    } else {
      if ($values[4] =~ /^[0-9]*$/) {	# numerical: a word length
        $fwnum = $values[3];		# word number
        $finum = -1;
        $fixcountw++;
      } else {
        $finum = $values[3];		# color code: 12, 48 or 60 for now!
        $fichr = "?";
        $fixcounti++;
        if ($finum eq 48) {
          $finum = 1;	# blue, left
          $fichr = "<";
        } else { if ($finum eq 12) {
          $finum = 2;	# green, center
          $fichr = "x";
        } else { if ($finum eq 60) {
          $finum = 3;	# turquoise, right
          $fichr = ">";
        } } }
	$fwnum = -1;
      }
    }
    if (($finum eq -1) && ($fwnum eq -1)) {
      localpoint("black.any", $fduration, $findex); # <<<
      print SUMMARY "- " if ($SUMMARIZE > 0);	# nowhere fixation
    } else { if ($finum eq -1) {
      my $fwclass = "3adv";			# everything else: ADV
      $fwclass = "1np1" if ($fwnum < 2);	# first 2 words: NP1
      $fwclass = "2verb" if ($fwnum eq 2);	# next word: VERB
      $fwclass = "4np2" if ($fwnum > ($wmax-2)); # last 2 words: NP2
      localpoint("text." . $fwclass, $fduration, $findex); # <<<
      localpoint("text.any", $fduration, $findex); # <<<
      print SUMMARY "" . $fwnum . " " if ($SUMMARIZE > 0);	# word fix
    } else {
      my $fx = $finum;	# for image-agent/patient decision
      my $fz = "";	# stores text-agent/patient/third decision

      if (($SEMIMAGE > 0) && ($finum != 2)) {
        if ($ttype =~ /[abgh]/) {	# right to left case?
          $fx = 4 - $fx;		# normalize to left to right order
        }
        if ($fx < 2) {			# "on the normalized-left": agent
          localpoint("image.agt", $fduration, $findex); # <<<
        } else {			# "on the normalized-right": patient
          localpoint("image.pat", $fduration, $findex); # <<<
        }
      } # do image-semantically-labelled image regions

      if ($SEMIMAGE > 1) {	# text-semantical image labelling wanted?
        if ($finum == 2) {
          if ($findex > 1) {
            # 2b-ish: center is text-agent for svo, else text-patient
            if ($ttype =~ /[aceg]/) {	# svo?
              localpoint("image.ta", $fduration, $findex); # <<<
              $fz = "A";
            } else {
              localpoint("image.tp", $fduration, $findex); # <<<
              $fz = "P";
            }
          } # image-ambiguous
        } else {
          if ($fx < 2) {
            if ($ttype =~ /[bd]/) { # ovs match agent: text-agent
              localpoint("image.ta", $fduration, $findex); # <<<
              $fz = "A";
            }
            if ($ttype =~ /[eg]/) { # svo nomatch agent: text-patient
              localpoint("image.tp", $fduration, $findex); # <<<
              $fz = "P";
            }
            # ovs nomatch agent is just "third person"
            # svo match agent is just "third person"
            if ($ttype =~ /[acfh]/) {
              localpoint("image.tx", $fduration, $findex); # <<<
              $fz = "X";
            }
          } # image-agent
          if ($fx > 2) {
            if ($ttype =~ /[fh]/) { # ovs nomatch patient: text-agent
              localpoint("image.ta", $fduration, $findex); # <<<
              $fz = "A";
            } # (ovs match patient is just "third person", not logged yet)
            if ($ttype =~ /[ac]/) { # svo match patient: text-patient
              localpoint("image.tp", $fduration, $findex); # <<<
              $fz = "P";
            }
            # ovs match patient is just "third person"
            # svo nomatch patient is just "third person"
            if ($ttype =~ /[bdeg]/) {
              localpoint("image.tx", $fduration, $findex); # <<<
              $fz = "X";
            }
          } # image-patient
        } # image-agent or image-patient
      } # do TEXT-semantically-labelled image regions

      localpoint("image." . $finum, $fduration, $findex); # <<<
      localpoint("image.any", $fduration, $findex); # <<<

      if ( ($finum == 2) && ($findex > 1) ) {	# special image.2b region:
        # image.2 but NOT the 1st fixation. Might be useful because 93% of
        # the 1st fix of all trials are to image.2, gaze target induced...
        localpoint("image.2b", $fduration, $findex); # <<<
        # "image.any" DOES include the 1st fixation - could add "image.anyb"
      }

      if ($SUMMARIZE > 0) {
        if ($fichr eq "?") {
          print SUMMARY "? ";		# image fix but unknown color code
        } else {
          if ($SEMIMAGE > 0) {		# encode as a x p ROLE?
            if ($fz eq "") {
              my @rolechrs = ("?", "a", "x", "p");	# role counts here
              print SUMMARY "" . $rolechrs[$fx] . " ";	# image fix
            } else {
              # text role counts here and is already stored in fz:
              print SUMMARY "" . $fz . " ";		# image fix
            }
          } else {			# else encode as < x > LOCATION
            print SUMMARY "" . $fichr . " ";	# image fix
          }
        } # pretty-print image classification
      } # summarize
    
    } }
    localpoint("any.any", $fduration, $findex); # <<<
    $lastfix = $fend;

  } else {			# non-EFIX category is assumed to be MSG

  if ($tag eq "PARAMS") {	# new subject (ignore screen info)
    # parse subject number, subject nickname and subject group number
    $values[1] =~ s/set//g;
    $values[1] =~ s/script//g;
    $values[1] =~ s/[.]txt//g;
    $values[1] =~ tr/0-9/ /c;	# turn non-numbers to spaces
    $who = sprintf "%-8s %s", $values[0], $values[1];
  } # PARAMS

  # DISPLAY_COORDS x1 y1 x2 y2: ignored

  if ($tag eq "TRIALID") {	# store trial ID
				# tdir / torder are redundant, ttype encodes
				# them (and also tmatch) anyway...
    my $tdir;	# = "<->";	# <-- or -->
    my $torder;	# = "?V?";	# SVO or OVS
    $where = $values[0];	# keep trial ID for later
    ($tnumber, $ttype, $tdir, $torder, $tmatch, undef) = split('-',$where);
    # number, a-h, image (direction), style (svo / ovs), match (n / m), a-d
    # type1 "a" is svo/_1/m, "b" is ovs/_1/m, "c" is svo/_2/m,
    # "d" is ovs/_2/m, type1 e-h are as a-d but .../.../n, nomatch,
    # so they use the "opposite image": _2 _2 _1 _1...
    # a: left svo yes   b: left ovs yes   c: right svo yes   d: right ovs yes
    # e: right svo no   f: right ovs no   g: left svo no     h: left ovs no
    $torder =~ s/SVO/svo/g;	# the less exotic case
    $tdir =~ s/.*_//g;	# image name, keep only 2nd part: 1 or 2
    $tdir =~ s/1/<--/g;	# _1 is "actions are done right to left"
    $tdir =~ s/2/-->/g;	# _2 is "actions are done left to right"
    $tmatch =~ s/m/YES/g;
    $tmatch =~ s/n/non/g;

    $lastfix = 0;		# initialize fixation time check
    $basetime = 0;		# no synctime set yet
    $wmax = -1;			# reset highest word number
    $findex = 0;		# reset fixation number

    $fixcounti = 0;		# reset image fixation count
    $fixcountw = 0;		# reset word fixation count
    $fixcount0 = 0;		# reset background fixation count
    $brokentrial = 0;		# reset error flag

    %localstats = ();		# initialize per-trial fixation measures
    %localhisto = ();		# initialize per-trial fixation counts
    %firstinspection = ();	# initialize per-trial first pass flags
    $inspectwhat = "none";	# ...
    @regressionpath = (-1, -1, -1, -1);	# initialize regression path flags

    foreach $s (@regions) {  # ...  image.2 ... text.np1 text.verb ...
       foreach $m (@measures) { # n t 1st ...
         my $key = "" . $s . "." . $m;
         if (!defined($missingmeasures{$key})) {
           $localstats{$key} = 0;
         }
       } # measures
    } # regions

    if ($LOGTRIALS > 0) {
      print STDOUT "" . $who . " " . $torder . " " . $tdir . " ";
      printf STDOUT "%s %s %2d ", $tmatch, $ttype, $tnumber;
    }
  } # TRIALID

  # CORRECTION x y radius: ignored for now

  if ($tag eq "INFO") {		# info word assumed
    my $wnum = $values[1];	# 0 based word number
    my $wname = $values[6];	# the word as a string
    $wmax = $wnum if ($wnum > $wmax);
    if ($SUMMARIZE > 0) {
      print SUMMARY "" . $who . " " . $where . "   " if ($wnum eq 0);
      print SUMMARY "" . $wnum . "." . $wname . " ";
    }
  } # INFO

  # SYNCTIME: timestamps are all in 92 ... 106 msec range.
  # DISPLAY ON: ignored, duplicates SYNCTIME information.
  # histogram: 11 38 55 59 59 64 81 57 51 54 70 71 56 34 7 times 92 ... 106
  if ($tag eq "SYNCTIME") {
    $basetime = $timestamp;
    $lastfix = 0;
    print SUMMARY "\n" if ($SUMMARIZE > 0);
  } # SYNCTIME

  if ($tag eq "TRIGGER") {	# trigger main assumed
    # *** printf STDOUT "%5d ", $timestamp; # not basetime-relative
  } # TRIGGER

  if ($tag eq "TRIAL") {	# only OK trials should be encountered here
    if ($values[0] eq "ABORTED") {
      print STDERR "Found aborted trial $where.\n";
    }
    if ($values[0] eq "REPEATED") {
      print STDERR "Found repeated trial $where.\n";
    }
  } # TRIAL

  if ($tag eq "ENDBUTTON") {	# TRIAL_RESULT has the same values...
    my $b = $values[0];		# button number, 1 or 2...
    my $yesno = $values[0];
    $yesno =~ s/1/YES/g;
    $yesno =~ s/2/non/g;
    my $rightwrong = "";
    if ($yesno eq $tmatch) {
      $rightwrong = "right";
    } else {
      $rightwrong = "WRONG";
    }
    $timestamp = $timestamp - $basetime;

    my $gaplen = $timestamp - $lastfix;
    if ($gaplen > $endlossthreshold) {	# too long before answer
      if ( ($SUMMARIZE > 0) && ($gaplen > (3.0 * $endlossthreshold)) ) {
        print SUMMARY "+++";
      }
      $brokentrial += $gaplen;
      if ($SUMMARIZE > 0) {
        print SUMMARY "GAP2=" . ($timestamp - $lastfix) . " ";
        # print SUMMARY "_(" . $lastfix . " to " . $timestamp . ") ";
      }
    }

    if ( ($KILLBROKEN > 0) &&
         ($brokentrial > ($timestamp * $brokenfraction)) ) {
      # DO NOT STORE FIXATION STATS if too many gaps in trial
      # %localstats = (); %localhisto = ();
      $brokentrials++;

      goto brokentrial;	# skip all logging for removed trials!

    }

    my $x = "";		# "" for right, "x" for wrong answer trials

    if ( ($KILLWRONG > 0) && ($tmatch ne $yesno) ) {
      # Store fixation stats for WRONG answers separately...
      $x = "x";
      # ... and EXCLUDE those stats from the histogram!
      # %localhisto = ();
    } else {
      foreach $k (keys %localhisto) {	# values in localhisto are all just 1!
        $histo{$k} = 0 if (!defined($histo{$k}));
        $histo{$k}++;			# <<< transfer to global histogram
      }
    }

    if (($x eq "") && ($SPSSOUTPUT > 0)) {
      my $spsshead;
      my $measurenumber = 1;
      @whoparts = split(' ',$who);	# name, set, script
      $spsshead = "" . ( (($whoparts[1]-1)*8) + ($whoparts[2]-1) );
        # convert "name set# script#" to a "subject number" in 0..31 range
        # ... - we have set 1..4, each with script 1..8.
      $spsshead .= "\t" . $tnumber . "\t";
      $spsshead .= sprintf("%-18s",$where) . "\t";
      $spsshead .= "" . (1 + ord($ttype) - ord("a"));	# convert a-h to 1-8
      foreach $m (@measures) {
        # measures can be: .n .t .1st ...
        my $m2 = $m;
        $m2 =~ s/pass1/p1_/;	# shorten a bit
        $m2 =~ s/rpath/rp_/;	# shorten a bit
        $m2 =~ s/1st/1st_t/;	# use more verbose name ;-)
        print SPSSFILE "" . $spsshead . "\t" . $measurenumber . ":" . $m2;
	print SPSSFILE "\t" . $measurenumber;
        foreach $s (@regions) { # a category can be e.g. "text.np1"
          # format: subj# item# coding condition np1time verbtime ...
          if (defined($localstats{"" . $s . "." . $m})) {
            print SPSSFILE "\t" . $localstats{"" . $s . "." . $m};
          } else {
            print SPSSFILE "\t-1";	# no data available for this measure
          }
        } # regions
        print SPSSFILE "\n";
        $measurenumber++;
      } # measures
      print SPSSFILE "" . $spsshead . "\t" . $measurenumber . ":end";
      print SPSSFILE "\t" . $measurenumber . "\t" . $timestamp . "\n";
    }

    foreach $k (keys %localstats) {
      addpoint($x, $k, $localstats{$k});	# <<< transfer to global stats
    }

    # store button timing stats (could even do that for broken trials)...:
    addpoint($x, "button.any.t", $timestamp);		# <<<
    # *** addpoint($x, "button." . $b . ".t", $timestamp);	# <<<

    brokentrial:				# a GOTO LABEL...

    %localstats = ();
    %localhisto = ();

    if ($SUMMARIZE > 0) {
      print SUMMARY "  " . $yesno . "/" . $rightwrong . "=" . $timestamp . " ";
      print SUMMARY "w=" . $fixcountw . " i=" . $fixcounti . " ";
      print SUMMARY "?=" . $fixcount0;
      if ( $brokentrial > ($timestamp * $brokenfraction) ) {
        if ($KILLBROKEN > 0) {
          print SUMMARY " ***REMOVED***";
        } else {
          print SUMMARY " ***";
        }
      } else {
        if ( ( $brokentrial > ($timestamp * $brokenfraction / 2) ) ||
             ($fixcounti < 2) || ($fixcountw < 3) ||
             ( ($fixcount0 > $fixcountw) && ($fixcount0 > $fixcounti) ) ) {
	  print SUMMARY " ???";
	} # almost broken
      } # broken / not broken
      # trials with less than 3 word fixations or e.g. only 1 image fixation
      # are odd but not necessarily broken. maybe the subjects just did not
      # pay enough attention. having many background fixations is odd, too.
      print SUMMARY "\n";
    }

    if ($LOGTRIALS > 0) {
      printf STDOUT "%s %s %5d", $yesno, $rightwrong, $timestamp;
      printf STDOUT "%2d %2d %2d", $fixcountw, $fixcounti, $fixcount0;
      if ( ($KILLBROKEN > 0) &&
           ( $brokentrial > ($timestamp * $brokenfraction) ) ) {
          print STDOUT " REMOVED";
      }
      print STDOUT "\n";
    }
  } # ENDBUTTON

  # TRIAL_RESULT value: ignored, duplicates ENDBUTTON value...

  } # end of category-is-MSG part

} # end of foreach file read loop

if ($brokentrials > 0) {
  print STDERR "Removed " . $brokentrials . " trials with excessive track";
  print STDERR " loss (more than $brokenfraction of the trial\n";
  print STDERR "  duration in track losses longer than $lossthreshold";
  print STDERR " (at end: $endlossthreshold) msec\n";
}
} # +++ end of the variable scope for file read related things



print STDERR "(No data collected for:\n";
my $n = 0;
foreach $key (sort keys %missingmeasures) {
  print STDERR " $key";
  $n++;
  print STDERR "\n" if (($n % 4) == 0);
}
print STDERR ")\n";



print STDERR "Writing fixation statistics...\n";
open(STATFILE,">fixation-stats.log") || die "cannot write fixation stats file\n";

# some sample data for testing:
# addpoint("test.14159",1); addpoint("test.14159",4); addpoint("test.14159",1);
# addpoint("test.14159",5); addpoint("test.14159",9);
# "unbiased stddev" of the following is 2.88 ("/ (n-1)" in the formula)...
# addpoint("foo",65); addpoint("foo",69); addpoint("foo",70);
# addpoint("foo",63); addpoint("foo",70); addpoint("foo",68);

my $stddevsum = 0;
my $nstatitems = 0;

foreach $statitem (sort sortby keys %counts) {
  my $howmany = $counts{$statitem};
  my $average = -1;
  my $variance = -1; 
  my $rms = -1;

  if ($howmany > 0) {
    $average = $sums{$statitem} / $howmany;
    $averages{$statitem} = $average;
  }

  # standard deviation is square root of variance...
  # variance is "sum of squares of distance to average" divided by X
  # where X is either howmany (simple) or howmany-1 (unbiased).

  # useful formula: "sum of squares of distance to average" can also
  # be calculated without having to know the average in advance, as:
  # (sum of squares) minus ((square of sum) / howmany).

  # rms is the square root of the average of the squares
  # rms of the deviations from the mean (average) is the standard deviation
  # rms^2 = mean^2 + stddev^2   (stddev in "x is howmany" style here...)

  if ($howmany > 1) {
    $variance = ( $squares{$statitem} -
      ($sums{$statitem} * $sums{$statitem} / $howmany) ) / ($howmany - 1);
    $variances{$statitem} = $variance;
  }

  if ($howmany > 0) {
    $rms = sqrt($squares{$statitem} / $howmany);
  }

  printf STATFILE "%-20s", $statitem;		# e.g. "c.image.2" or "d.text.any"
  printf STATFILE "\t%3u", $howmany;		# number of points
  printf STATFILE "\t%5.6f", $average;		# average
  if ($variance >= 0) {
    my $stddev = sqrt($variance);
    $stddevsum += $stddev;
    printf STATFILE "\t%5.6f", $stddev;	# standard deviation
  } else {
    printf STATFILE "\t%5.6f", -1;		# n/a
  }
  printf STATFILE "\t%5.6f", ($rms);		# root mean squared
  print STATFILE "\n";
  $nstatitems++;
} # end of foreach stats loop

print STDERR "Average standard deviation in " . $nstatitems . " items: ";
printf STDERR "%5.6f\n", ($stddevsum / $nstatitems);
close(STATFILE);



if ($HISTOGRAM > 0) {
  print STDERR "Writing fixation histogram...\n";
  open(HISTFILE,">fixation-histograms.log")
    || die "Cannot write fixation histograms to file\n";

  my $imax = 1;
  my $i = 1;
  while ($i <= $HISTDEPTH) {
    if (defined($histo{"*.any.any." . $imax})) {
      $imax = $i;
    }
    $i++;
  }
  # imax is now highest used fixation number but at most HISTDEPTH
  print STDERR "Histogram shows first " . $imax . " fixations.\n";

  # create histogram log, using absolute counts
  foreach $s (@regions) { # e.g. image.2, text.np1 ...
    foreach $type ("*", "a", "b", "c", "d", "e", "f", "g", "h") {
      my $key = "" . $type . "." . $s;
      print HISTFILE "" . $key . "\t";
      $i = 1;
      while ($i <= $imax) {
        if (defined($histo{$key . "." . $i})) {
          printf HISTFILE "%3i", $histo{$key . "." . $i};
        } else {
          print HISTFILE "  0";
        }
        print HISTFILE " " if ($i < $HISTDEPTH);
        $i++;
      } # loop: fixation numbers
      print HISTFILE "\n";
    } # type *, a-h
  } # category: fixation targets

  # repeat histogram log, this time writing percentages
  foreach $type ("*", "a", "b", "c", "d", "e", "f", "g", "h") {
    foreach $s (@regions) { # e.g. image.2, text.np1 ...
      if ($s ne "any.any") {
        my $key = "" . $type . "." . $s;
        print HISTFILE "%" . $key . "\t";
        $i = 1;
        while ($i <= $imax) {
          if (defined($histo{$key . "." . $i})) {
            printf HISTFILE "%.1f",
              (100.0 * $histo{$key . "." . $i} /
               $histo{$type . ".any.any." . $i});
          } else {
            print HISTFILE "0";
          }
          print HISTFILE " " if ($i < $HISTDEPTH);
          $i++;
        } # loop: fixation numbers
        print HISTFILE "\n";
      } # if not "any" case
    } # category: fixation targets
  } # type *, a-h

  close(HISTFILE);
} # histogram



# print "FOUR-way: doing 6 t-tests (12 13 14 23 24 34) for each quadruple:\n";

if ($CHECKEFFECTS > 0) {

  print STDERR "Searching for significant differences";
  if ($LATEXEFFECTS > 0) {
    print STDERR " and generating LaTeX table data";
  }
  print STDERR "...\n";
  print "\nEffects in right-answered trials:\n\n";

  my @checkwhat = ("button.any.t");
	# button.1.t and button.2.t would be quite boring / implicit
	# because we have split into right and wrong answers and
	# answer-should-be-yes and answer-should-be-no anyway.
  my $nchecks = 1;

  foreach $r (@regions) {	# e.g. "text.4np2", "image.2"...
    $checkwhat[$nchecks] = "---";	# start new paragraph
    $nchecks++;
    foreach $m (@measures) {	# e.g. "n", "pass1n"...
      $checkwhat[$nchecks] = "" . $r . "." . $m;
      $nchecks++;
    } # regions
  } # measures


  my @lasttexregion = ("none", "none", "none");

  foreach $x ("", "x") {	# right and wrong answered trials

    if ($LATEXEFFECTS > 0) {
      if ($x eq "") {
        open(TEXFILE1,">effects-so-yn.tex")
          || die "Cannot write SVO / OVS yes / no TeX to file\n";
        open(TEXFILE2,">effects-lr-yn.tex")
          || die "Cannot write left / right yes / no TeX to file\n";
        open(TEXFILE3,">effects-lr-so.tex")
          || die "Cannot write left / right SVO / OVS TeX to file\n";
      } else {
        open(TEXFILE1,">effects-so-yn-wrong.tex")
          || die "Cannot write SVO / OVS yes / no (wrong-answered) TeX to file\n";
        open(TEXFILE2,">effects-lr-yn-wrong.tex")
          || die "Cannot write left / right yes / no (wrong-answered) TeX to file\n";
        open(TEXFILE3,">effects-lr-so-wrong.tex")
          || die "Cannot write left / right SVO / OVS (wrong-answered) TeX to file\n";
      }
      my $rightwrong = "wrong";
      $rightwrong = "right" if ($x eq "");
      print TEXFILE1 "% $rightwrong: what & svo/ovs & yes(svo/ovs) & no(svo/ovs) &\n";
      print TEXFILE1 "%   yes/no & svo(yes/no) & ovs(yes/no) & values \\\\\n";
      print TEXFILE2 "% $rightwrong: what & toR/toL & yes(toR/toL) & no(toR/toL) &\n";
      print TEXFILE2 "%   toR(yes/no) & toL(yes/no) & values \\\\\n";
      print TEXFILE3 "% $rightwrong: what & toR/toL & svo(toR/toL) & ovs(toR/toL) &\n";
      print TEXFILE3 "%   toR(svo/ovs) & toL(svo/ovs) & values \\\\\n";
    } # if LATEXEFFECTS

    if (($KILLWRONG < 1) && ($x eq "x")) { goto nowrong; }

    print "\n\n\nEffects in WRONG-ANSWERED trials:\n" if ($x eq "x");

    foreach $key (@checkwhat) {		# large obvious contrasts first
      if ($key eq "---") {
        print "\n";
        if ($LATEXEFFECTS > 0) {
          print TEXFILE1 "%\n";
          print TEXFILE2 "%\n";
          print TEXFILE3 "%\n";
        }
      } else {				# two-way checks
        teststats("0svo", "1ovs", $key, $x, 1);
        teststats("2yes", "3non", $key, $x, 1);
        teststats("4toR", "5toL", $key, $x, 1);

        if ($CHECKEFFECTS > 1) {	# four-way checks
          teststats4way($key, $x, "p1sy", "p2oy", "p3sn", "p4on");
          teststats4way($key, $x, "q1ry", "q2ly", "q3rn", "q4ln");
          teststats4way($key, $x, "r1rs", "r2ls", "r3ro", "r4lo");
          print "\n";
        } # verbosity level 2 or higher

	if ($LATEXEFFECTS > 0) {	# pretty-print the effects

	  my ($ka, $kb, $kc) = split(/[.]/, $key);	# LaTeX things are new 7/2005
		# no first pass for image.t? collected, only for image.???,
		# or - only if no image.??? present, for image.?.
		# no first pass for *.any, and first pass is actually
		# first inspection if no text region.
	  if (($ka ne "text") && ($kc =~ /pass1/)) {
	    $kc =~ s/pass1/insp1/;
	  }
	  if ($kb eq "any") {
	    $kb = $ka;
	  }
	  die "Unknown region $kb" if (!defined($prettyregions{$kb}));
	  die "Unknown measure $kc" if (!defined($prettymeasures{$kc}));
	  my @texline = ("", "", "");
	  my @texinteresting = (0, 0, 0);
	  for $i (0 .. 2) {
	    $texline[$i] = "";
	    if ($lasttexregion[$i] ne $kb) {	# fresh region coming up
              $texline[$i] .= "\\underline{\\textbf{" . $prettyregions{$kb};
	      if ($i > 0) {			# left / right extra tables:
	        $texline[$i] .= "}}\t& & & & & & \\\\\n";	# 7 columns
	      } else {				# main SVO OVS yes no table:
	        $texline[$i] .= "}}\t& & & & & & & \\\\\n";	# 8 columns
	      }
	    }
	    $texline[$i] .= "" . $prettymeasures{$kc};
	  } # for all 3 simultaneously generated TeX lines

	  $texline[0] .= "\t& " . latex2way($key, $x, "0svo", "1ovs");
	  $texline[0] .= "\t& " . latex2way($key, $x, "p1sy", "p2oy"); # yes(SVO/OVS)
	  $texline[0] .= "\t& " . latex2way($key, $x, "p3sn", "p4on"); # no(SVO/OVS)
		# p1/p4 and p2/p3 ignored for now!

	  $texline[0] .= "\t& " . latex2way($key, $x, "2yes", "3non");
	  $texline[0] .= "\t& " . latex2way($key, $x, "p1sy", "p3sn"); # SVO(yes/no)
	  $texline[0] .= "\t& " . latex2way($key, $x, "p2oy", "p4on"); # OVS(yes/no)
		# p1/p4 and p2/p3 ignored for now!
	  if ($texline[0] =~ /nM[IAE][NXD]/) {
	    $texinteresting[0] = 1; # at least one \\nMIN \\nMED or \\nMAX
	  }

	  if (defined($averages{"$x" . "0svo." . $key})) {
	    $texline[0] .= sprintf "\n\t%% svo=%5.3f ovs=%5.3f yes=%5.3f no=%5.3f\n",
	      $averages{"$x" . "0svo." . $key}, $averages{"$x" . "1ovs." . $key},
	      $averages{"$x" . "2yes." . $key}, $averages{"$x" . "3non." . $key};
	    # add averages - but only as a comment and only to this file...
	    # key can be for example "button.any.t"
	  } else {
	    $texline[0] .= "\n";
	  }

	  $texline[1] .= "\t& " . latex2way($key, $x, "4toR", "5toL");
	  $texline[1] .= "\t& " . latex2way($key, $x, "q1ry", "q2ly"); # yes(toR/toL)
	  $texline[1] .= "\t& " . latex2way($key, $x, "q3rn", "q4ln"); # no(toR/toL)
		# q1/q4 and q2/q3 ignored for now!
	  if ($texline[1] =~ /nM[IAE][NXD]/) {
	    $texinteresting[1] = 1; # at least one \\nMIN \\nMED or \\nMAX
	  }

	  # $texline[1] .= "\t& " . latex2way($key, $x, "2yes", "3non");
	  # comparing plain yes to no would just duplicate info from another table
	  my $tlright = latex2way($key, $x, "q1ry", "q3rn"); # toR(yes/no)
	  my $tlleft  = latex2way($key, $x, "q2ly", "q4ln"); # toL(yes/no)
		# q1/q4 and q2/q3 ignored for now!
	  if ($tlright eq $tlleft) {	# boringly symmetrical effect?
	    $texline[1] .= "\t& $tlright\t& $tlleft";
	  } else {
	    $texline[1] .= "\t& $tlright\t& $tlleft";
	    if ($texline[1] =~ /nM[IAE][NXD]/) {
	      $texinteresting[1] = 1; # at least one \\nMIN \\nMED or \\nMAX
	    }
	  }

	  $texline[2] .= "\t& " . latex2way($key, $x, "4toR", "5toL");
	  $texline[2] .= "\t& " . latex2way($key, $x, "r1rs", "r2ls"); # SVO(toR/toL)
	  $texline[2] .= "\t& " . latex2way($key, $x, "r3ro", "r4lo"); # OVS(toR/toL)
		# r1/r4 and r2/r3 ignored for now!
	  if ($texline[2] =~ /nM[IAE][NXD]/) {
	    $texinteresting[2] = 1; # at least one \\nMIN \\nMED or \\nMAX
	  }

	  # $texline[2] .= "\t& " . latex2way($key, $x, "0svo", "1ovs");
	  # comparing plain SVO to OVS would just duplicate info from another table
	  $tlright = latex2way($key, $x, "r1rs", "r3ro"); # toR(SVO/OVS)
	  $tlleft  = latex2way($key, $x, "r2ls", "r4lo"); # toL(SVO/OVS)
		# r1/r4 and r2/r3 ignored for now!
	  if ($tlright eq $tlleft) {	# boringly symmetrical effect?
	    $texline[2] .= "\t& $tlright\t& $tlleft";
	  } else {
	    $texline[2] .= "\t& $tlright\t& $tlleft";
	    if ($texline[2] =~ /nM[IAE][NXD]/) {
	      $texinteresting[2] = 1; # at least one \\nMIN \\nMED or \\nMAX
	    }
	  }


	  if ($CHECKEFFECTS > 1) {	# show ordered values and where they differ
            $texline[0] .= "\t& " . latex4way($key, $x, "p1sy", "p2oy", "p3sn", "p4on");
            $texline[1] .= "\n\t& " . latex4way($key, $x, "q1ry", "q2ly", "q3rn", "q4ln");
            $texline[2] .= "\n\t& " . latex4way($key, $x, "r1rs", "r2ls", "r3ro", "r4lo");
          } # verbosity level 2 or higher

	  for $i (0 .. 2) {
	    $texline[$i] .= " \\\\\n";
	    if ($texinteresting[$i] > 0) {
	      if ($i == 0) {
	        print TEXFILE1 $texline[$i];	# store that TeX
	      }
	      if ($i == 1) {
	        print TEXFILE2 $texline[$i];	# store that TeX
	      }
	      if ($i == 2) {
	        print TEXFILE3 $texline[$i];	# store that TeX
	      }
	      $lasttexregion[$i] = $kb;
	    } # print only non-boring lines to LaTeX file
	  }

	} # if LATEXEFFECTS

      } # non-separator key
    } # for each region/measure
    print "\n";

    if ($CHECKEFFECTS > 2) {
      foreach $key (@checkwhat) { # smallest "groups" last, separately
        # finally, do all [a-h] [a-h] pairs but avoid mirrored and same,same:
        if ($key eq "---") {
          print "\n";
        } else {
          foreach $t1 (0 .. 7) {
            foreach $t2 (0 .. 7) {
              if ($t1 > $t2) { teststats("" . chr($t1 + ord('a')),
                "" . chr($t2 + ord('a')), $key, $x, 1); }
            } # t2
          } # t1
        }
      } # things to check
      print "\n";
    } # verbosity level 3 or higher

    nowrong:					# a GOTO LABEL...

  } # right and wrong

  if ($LATEXEFFECTS > 0) {
    close(TEXFILE1);
    close(TEXFILE2);
    close(TEXFILE3);
  }


  if ($KILLWRONG > 1) {	# check for effects of "wrong" itself
    print "\n\n\nEffects related to WRONG-ANSWERING itself:\n";

    foreach $key (@checkwhat) { # large obvious groups first
      if ($key eq "---") {
        print "\n";
      } else {
        teststats("2yes", "2yes", $key, "both", 1);
        teststats("3non", "3non", $key, "both", 1);
        teststats("0svo", "0svo", $key, "both", 1);
        teststats("1ovs", "1ovs", $key, "both", 1);
        teststats("4toR", "4toR", $key, "both", 1);
        teststats("5toL", "5toL", $key, "both", 1);
      }
    }
    print "\n\n";

    # *** COULD also check in FOUR-WAY style for (right/wrong * (2-way...)).

    if ($CHECKEFFECTS > 1) {
      foreach $key (@checkwhat) { # smallest "groups" last
        if ($key eq "---") {
          print "\n";
        } else {
          foreach $t1 (0 .. 7) {
            teststats("" . chr($t1 + ord('a')),
                "" . chr($t1 + ord('a')), $key, "both", 1);
          } # t1
        }
      } # things to check
      print "\n";
    } # verbosity level 2 (!) or higher

    print "\n";

  } # check if right / wrong itself has effects

} # check effects


# done :-)
print STDERR "Done!\n";



  # Four-way differences - comparisons 12 13 14 23 24 34:
  # encountered combinations: +++0+0 0+++++ 0++0++ 0+0000 000++0 000+00
  # 0+0+00 ++++++ +++000 00+0++ (frequency 1..5 so far)
  # 0++000 0++0+0 0000+0 +++++0 (frequency 9..13)
  # 22 times 0++++0, 23 times 00+0+0, 25 times 00+000.

  # Descriptions, "x:" means "only shows up in wrong-answered trials":
  # 4!  (1)330(1)0, "1 differs from 34, 2 does hardly"
  # 5!  033332, "1 is like 2, but 3 is not like 4. 1/2 differ from 3/4"
  # 4?  0(1)302(1), "almost 1=2=3, 1 not 4, 2 not 4, 3 almost 4"
  # 1x: 0(1)0000, "only a small difference only between 1 and 3"
  # 2x: 000(1)(1)0, "only 2 differs a bit only from 3/4"
  # 1x: 000(1)00, "all similar, but small 2/3 difference"
  # 2x: 0(1)0(1)00, "all similar, but small 1/3 and 2/3 differences"
  # 6!  233333 or (1)33333, "all different (but 1/2 less so)"
  # 3!  (1)23000, "only 1 differs"
  # 3!  00302(1), 0020(1)2, 003032, 0030(1)(1), 003033, "only 4 differs..."

  # more frequent:
  # 2!  0(1)(1)000, 0(1)2000, 022000, "only 1 differs from 3/4"
  # 3!  0230(1)0, 0(1)20(1)0 and some 0(1)3020, 0220(1)0, 0(1)3020,
  #       "1 differs from 3/4, 2 only (a bit) from 4"
  # 1?  0000(1)0, few 000020 or 000030, mostly x: "some 2/4 differences only"
  # 5!  233230, **233220, *(1)33(1)20, (1)33220, *233(1)20, **(1)33330,
  #       "3 like 4 but others different from each other" (*/** = frequent)

  # very frequent:
  # 4!  *033330, *033220, 0332(1)0, *0(1)2(1)20, 023(1)30, *033230,
  #       033(1)(1)0, 022(1)20, 0(1)(1)(1)(1)0, "1/2 differ from 3/4"
  # 2!  003030, 003020, 0030(1)0, 00(1)030, 00(1)020, *002020, **00(1)0(1)0,
  #       **0020(1)0, "1 and 2 differ from 4 (but no 3/4 or 1/2 difference)"
  # 1?  003000, *002000, **00(1)000, "no big differences at all, only 1/4"
