#!/usr/bin/perl -w

package htrosbif;

use strict;

require 5.010; # state variables! yay!

use File::Basename;
use Getopt::Long qw(:config no_ignore_case);
use IO::Socket;
use JSON::XS;
use List::MoreUtils qw(uniq);
use Pod::Usage;
use POSIX qw(floor);
use Socket;
use Storable qw(dclone);
use Test::Deep::NoTest;
use URI;

use constant VERSION => "alpha 5 - infowar" ;
use feature qw(state);

=encoding utf8

=head1 NAME

htrosbif - Active HTTP server fingerprinting and recon tool

=head1 SYNOPSIS

htrosbif [OPTIONS] <url>

=head1 DESCRIPTION

Does a bit of Recon by Fire, if you will. Prods the web server in all sorts of old, new, basic, fancy, spec-compliant and spec-breaking ways. Tries to characterise both the well-spoken educated responses and the seriously deviant babble it receives in return. Signatures contain no user data, only header names and http-level quirks.

As a useful side effect, might detect reverse proxies, http load balancers, intrusion prevention systems and web application firewalls. Signature generation is mostly manual for these overlays.

=head1 OPTIONS SUMMARY

=head2 Signature matching (default mode)

htrosbif [-v|--verbose] [-d|--debug <num>] [--timeout <seconds>] [--eccm] [-m|--matchsigs <signature dir>] [-O|--overlays <overlay dir>] [-a|--allmatches] (<url> || [-r|--readsig <captured signature>])

By default prods <url> and lists top ten matches from ./sigs/*.sig

=head2 Signature generation

htrosbif [-v|--verbose] [-d|--debug <num>] [--timeout <seconds>] [--eccm] [-w|--writesig <signature file>] <url>

=head1 LIMITATIONS

<url> should be a valid url returning a 200 series status code to a GET request - no redirect or authentication. This restriction may be lifted in the future.

 todo -
   stacked overlay matching
   refactoring - how about a tools/apply_overlay.pl ?
   alternates - what if the 400/500 is replaced by a happy 302 ?

=head1 AUTHOR

Erik Inge Bolsø <knan-rosbif@anduin.net> originally wrote this tool.

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2009,2010 Erik Inge Bolsø.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

=head1 SEE ALSO

 httprecon	<http://www.computec.ch/projekte/httprecon/>
 hmap		<http://ujeni.murkyroc.com/hmap/>

=cut

my $version;
my $verbose;
my $debug;
my $resptimeout = 2;
my $mediumtimeout = 14;
my $help = 0;
my $result;
my $readsig;
my $writesig;
my $allmatches;
my $eccm;
my $matchsigs = "sigs";
my $overlaydir = "overlays";

# blandest possible but still plausible useragent
my $bland_useragent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' ;

## sanitizing helpers
########################

sub print_sane ($)
{
  # in case of evil servers:
  # sanitize \0, backspace, delete, vt, ff, ansi terminal escapes, etc.
  # show them as periods, hexdump-style
  my $unclean = $_[0];
  $unclean =~ s/[^[:alnum:][:punct:] \t\r\n]/./g ;
  print $unclean ;
}

sub debug_dump
{
  my $hashref = $_[0];
  print JSON::XS->new->pretty(1)->canonical(1)->encode($hashref);
}

########################

$result = GetOptions (	'writesig|w=s' => \$writesig,
			'readsig|r=s'  => \$readsig,
			'matchsigs|m=s'=> \$matchsigs,
			'overlays|O=s' => \$overlaydir,
			'allmatches|a' => \$allmatches,
			'verbose|v'    => \$verbose,
			'version|V'    => \$version,
			'timeout=i'    => \$resptimeout,
			'eccm'         => \$eccm,
			'debug|d=i'    => \$debug,
                      	'help|h|?'     => \$help);

pod2usage(-exitstatus=>0, -verbose=>2) if $help;

if ($version)
{
  print "htrosbif " . VERSION . "\n";
  exit 0;
}

PARSEAGAIN:

my $url = $ARGV[0];
die "No url given, nothing to do. -h for help.\n" unless ($readsig || $url) ;
my $uri;
my $host;
my $port;
my $path;
my $scheme;

if ($url)
{
  $url =~ s/^/http:\/\// if not $url =~ /^http:\/\//; # prepend http:// for now if missing, revisit for https support.
  $uri = URI->new($url);
  $host = $uri->host;
  $port = $uri->port;
  $path = $uri->path;
  $scheme = $uri->scheme;

  # empty $path just isn't going to work. We need at least a / for proper operation.
  unless ($path) { $path = "/"; }
}

my %testresults;
my $tmpresult;

my %matchresults;
my %overlays;

my %firstclasstests;
my %secondclasstests;
my %thirdclasstests;

my $OK_CHARS='-a-zA-Z0-9_./';
if ($writesig)
{
  if ($writesig =~ m|[^$OK_CHARS]|)
  {
    warn ("Unacceptable characters in -w filename, ignoring option\n");
    undef $writesig;
  } else {
    $writesig =~ /(.*)/;
    $writesig = $1;
    undef $matchsigs; # writesig overrides matchsigs
  }
}

if ($matchsigs)
{
  if ($matchsigs =~ m|[^$OK_CHARS]|)
  {
    warn ("Unacceptable characters in -m dirname, ignoring option\n");
    undef $matchsigs;
  } else {
    $matchsigs =~ /(.*)/;
    $matchsigs = $1;
  }
  if (! (-e $matchsigs and -d $matchsigs and -r $matchsigs) )
  {
    warn ("Signature directory '$matchsigs' not readable, ignoring -m option\n");
    undef $matchsigs;
  }
}

if ($readsig)
{
  if ($readsig =~ m|[^$OK_CHARS]|)
  {
    warn ("Unacceptable characters in -r filename, ignoring option\n");
    undef $readsig;
  } else {
    $readsig =~ /(.*)/;
    $readsig = $1;
  }
  if (! (-e $readsig and -f $readsig and -r $readsig) )
  {
    warn ("Signature '$readsig' not readable, ignoring -r option\n");
    undef $readsig;
    goto PARSEAGAIN;
  } else {
    undef $uri; # readsig overrides url and writesig
    undef $url;
    undef $writesig;
  }
}

if ($overlaydir)
{
  if ($overlaydir =~ m|[^$OK_CHARS]|)
  {
    warn ("Unacceptable characters in -O dirname, ignoring option\n");
    undef $overlaydir;
  } else {
    $overlaydir =~ /(.*)/;
    $overlaydir = $1;
  }
  if (! (-e $overlaydir and -d $overlaydir and -r $overlaydir) )
  {
    warn ("Overlay directory '$overlaydir' not readable, ignoring -O option\n");
    undef $overlaydir;
  }
}

# XXX: actually, no https yet.
unless ($readsig or $scheme eq "http" or $scheme eq "https") { die "only supports http and https URI schemes, for now.\n" ; }

# load signatures for matching
if ($matchsigs)
{
  my @sigs = <$matchsigs/*.sig> ;
  SIG: foreach my $signame (@sigs)
  {
    open SIGNAME, "<" . $signame or next SIG;
    my $sig = join '', <SIGNAME> ;
    close SIGNAME ;
    $matchresults{ basename $signame } = eval { JSON::XS->new->utf8->relaxed(1)->decode($sig); };
    warn ("Error parsing signature \"$signame\", skipped\n") if ($@);
    warn ($@) if ($@);
    delete $matchresults{ basename $signame } if ($@);
  }
  if ($verbose) { print_sane keys (%matchresults) . " signatures loaded.\n" ; }
}

# load overlay signatures
if ($overlaydir)
{
  my @overlayfiles = <$overlaydir/*.sig> ;
  OVER: foreach my $overlayname (@overlayfiles)
  {
    open OVERLAY, "<" . $overlayname or next OVER;
    my $over = join '', <OVERLAY> ;
    close OVERLAY ;
    $overlays{ basename $overlayname } = eval { JSON::XS->new->utf8->relaxed(1)->decode($over); };
    warn ("Error parsing overlay \"$overlayname\", skipped\n") if ($@);
    warn ($@) if ($@);
    delete $overlays{ basename $overlayname } if ($@);
  }
  if ($verbose) { print_sane keys (%overlays) . " overlays loaded.\n" ; }
}

classify_test('get_10_knowngood', 1);
classify_test('get_11_knowngood', 1);
classify_test('get_09_knowngood', 2);
classify_test('options_11_star', 1);
classify_test('options_11_knowngood', 1);
classify_test('get_12_knowngood', 1);
classify_test('get_10_nlonly', 1);
classify_test('get_11_headercontinuation_kind', 1);
classify_test('get_11_headercontinuation_kind_unfold_sst', 2);
classify_test('get_11_headercontinuation_kind_unfold_sss', 2);
classify_test('get_11_headercontinuation', 1);
classify_test('get_11_headercontinuation_unfold_sst', 2);
classify_test('get_11_headercontinuation_unfold_sss', 2);
classify_test('get_20_knowngood', 1);
classify_test('get_10_cronly', 1);
classify_test('getlc_10_knowngood', 1);
classify_test('get_11_duplicate_contentlength', 1);
classify_test('hairy_method_11_knowngood', 2);
classify_test('doesnotexist_11_knowngood', 1);
classify_test('get_11_knowngood_percentzero', 1);

unless ($readsig)
{
  print_sane "actively fingerprinting...\n" if ($verbose) ;

  # is_namevhost();

  # friendly requests, should mostly work
  $tmpresult = &test_get_10_knowngood(); add_testresult('get_10_knowngood', $tmpresult);
  $tmpresult = &test_get_11_knowngood(); add_testresult('get_11_knowngood', $tmpresult);
  $tmpresult = &test_get_09_knowngood(); add_testresult('get_09_knowngood', $tmpresult);
  $tmpresult = &test_options_11_star(); add_testresult('options_11_star', $tmpresult);
  $tmpresult = &test_options_11_knowngood(); add_testresult('options_11_knowngood', $tmpresult);

  # borderline requests
  $tmpresult = &test_get_12_knowngood(); add_testresult('get_12_knowngood', $tmpresult);
  $tmpresult = &test_get_10_nlonly(); add_testresult('get_10_nlonly', $tmpresult);
  $tmpresult = &test_get_11_headercontinuation_kind(); add_testresult('get_11_headercontinuation_kind', $tmpresult);
  $tmpresult = &test_get_11_headercontinuation_kind_unfold_sst(); add_testresult('get_11_headercontinuation_kind_unfold_sst', $tmpresult);
  $tmpresult = &test_get_11_headercontinuation_kind_unfold_sss(); add_testresult('get_11_headercontinuation_kind_unfold_sss', $tmpresult);
  $tmpresult = &test_get_11_headercontinuation(); add_testresult('get_11_headercontinuation', $tmpresult);
  $tmpresult = &test_get_11_headercontinuation_unfold_sst(); add_testresult('get_11_headercontinuation_unfold_sst', $tmpresult);
  $tmpresult = &test_get_11_headercontinuation_unfold_sss(); add_testresult('get_11_headercontinuation_unfold_sss', $tmpresult);

  # error-provoking requests
  $tmpresult = &test_get_20_knowngood(); add_testresult('get_20_knowngood', $tmpresult);
  $tmpresult = &test_get_10_cronly(); add_testresult('get_10_cronly', $tmpresult);
  $tmpresult = &test_getlc_10_knowngood(); add_testresult('getlc_10_knowngood', $tmpresult);
  $tmpresult = &test_get_11_duplicate_contentlength(); add_testresult('get_11_duplicate_contentlength', $tmpresult);

  $tmpresult = &test_hairy_method_11_knowngood(); add_testresult('hairy_method_11_knowngood', $tmpresult);
  $tmpresult = &test_doesnotexist_11_knowngood(); add_testresult('doesnotexist_11_knowngood', $tmpresult);

  # evil requests
  $tmpresult = &test_get_11_knowngood_percentzero(); add_testresult('get_11_knowngood_percentzero', $tmpresult);

  # dump results
  if ($verbose and %testresults) {
    print_sane "\n";
    print_sane "test results:\n" ;
    foreach (sort keys %testresults) {
      print_sane "$_:\n\n";
      my $tmphashref = $testresults{$_} ;
      foreach my $sigkey (sort keys %$tmphashref) {
         print_sane " $sigkey: $tmphashref->{ $sigkey }\n";
      }
    print_sane "\n";
    }
  }
}

if ($readsig)
{
  open SIGNAME, "<" . $readsig or die "Can't open signature $readsig after all, strange. Aborting.\n";
  my $sig = join '', <SIGNAME> ;
  close SIGNAME ;
  my $tmp_testresults = eval { JSON::XS->new->utf8->relaxed(1)->decode($sig); };
  warn ("Error parsing signature \"$readsig\", skipped\n") if ($@);
  warn ($@) if ($@);
  die "Parse error.\n" if ($@);
  %testresults = %$tmp_testresults;
}

# signature matching
if ($matchsigs)
{
  print_sane "doing signature matching...\n" if ($verbose) ;

  my %matchscores;
  my %maxscores;

  my %overlayscores;
  my %overlaymaxes;

  match_sigtree(\%testresults, \%matchresults, \%matchscores, \%maxscores, undef, \%firstclasstests, \%secondclasstests);

  if ($overlaydir)
  {
    print_sane "doing overlay matching...\n" if ($debug) ;

    OVERLAY: foreach my $overlaytomatch (keys %overlays)
    {
      my @opaquetests;
      my %olfirstclass = %firstclasstests;
      my %olsecondclass = %secondclasstests;

      print_sane "Overlay matching, trying: $overlaytomatch\n" if $debug;

      # matching phase 1: check overlay->{'fixed_responses'}
      my $overlay = $overlays{$overlaytomatch};
      if (defined ($overlay->{'fixed_responses'}))
      {
        my $fixed = $overlay->{'fixed_responses'};
        $overlaymaxes{$overlaytomatch} += 100 * scalar keys %$fixed;
        foreach my $sigpart (keys %$fixed)
        {
          push @opaquetests, $sigpart;
          my $sig1 = deref_sig ($sigpart, \%testresults);
          my $sig2 = $fixed->{$sigpart};
          if (($sig2 eq "delete") and (keys (%$sig1) > 0))
          {
            print_sane "$sigpart found with $sigpart == delete, negative match for $overlaytomatch\n" if ($debug and $debug >= 2);
            $overlayscores{$overlaytomatch} -= 100;
            $overlaymaxes{$overlaytomatch} -= 100;
            next;
          }
          elsif ($sig2 eq "delete")
          {
            $overlaymaxes{$overlaytomatch} -= 100;
            next;
          }
          $overlayscores{$overlaytomatch} += compare_sigs ($sig1, $sig2);
        }
      }

      # matching phase 1.1: check $overlay->{'optional_responses'}

      if (defined ($overlay->{'optional_responses'}))
      {
        my $optional = $overlay->{'optional_responses'};
        foreach my $sigpart (keys %$optional)
        {
          my $sig1 = deref_sig ($sigpart, \%testresults);
          my $sig2 = $optional->{$sigpart};
          if (($sig2 eq "delete") and (keys (%$sig1) > 0))
          {
            print_sane "$sigpart found with $sigpart == delete, negative optional match for $overlaytomatch\n" if ($debug and $debug >= 2);
            next;
          }
          elsif ($sig2 eq "delete")
          {
            $overlayscores{$overlaytomatch} += 100;
            $overlaymaxes{$overlaytomatch} += 100;
            push @opaquetests, $sigpart;
            next;
          }
          my $score = compare_sigs ($sig1, $sig2);
          if ($score == 100)
          {
            $overlayscores{$overlaytomatch} += 100;
            $overlaymaxes{$overlaytomatch} += 100;
            push @opaquetests, $sigpart;
          }
        }
      }

      # matching phase 2: check $overlay->{'cleaned_requests'}
      # if we get past phase 2, the match is good enough to pursue further.

      if (defined ($overlay->{'cleaned_requests'}))
      {
        my $cleaned = $overlay->{'cleaned_requests'};
        $overlaymaxes{$overlaytomatch} += 100 * scalar keys %$cleaned;
        foreach my $sigpart (keys %$cleaned)
        {
          push @opaquetests, $sigpart;
          my $sig1 = deref_sig ($sigpart, \%testresults);
          my $sig2;
          if (ref($cleaned->{$sigpart}) eq 'HASH')
          {
            promote_test($cleaned->{$sigpart}->{'like'}, \%olfirstclass, \%olsecondclass); # promote referred test
            # merge and compare
            $sig2 = deref_sig ($cleaned->{$sigpart}->{'like'}, \%testresults);
            # skip if non-existent
            unless (keys %$sig2)
            {
              $overlaymaxes{$overlaytomatch} -= 100;
              next;
            }
            my $likehash = $cleaned->{$sigpart};
            my %sig3 = %$sig2 ;
            foreach my $key (keys %$likehash)
            {
              next if ($key eq "like");
              $sig3{$key} = $likehash->{$key};
            }
            print_sane ("$sigpart ") if ($verbose && $debug) ;
            print_sane ("likeness: " . compare_sigs ($sig1, \%sig3) . "\n") if ($verbose && $debug) ;
            $overlayscores{$overlaytomatch} += compare_sigs ($sig1, \%sig3);
          }
          else
          {
            promote_test($cleaned->{$sigpart}, \%olfirstclass, \%olsecondclass); # promote referred test
            $sig2 = deref_sig ($cleaned->{$sigpart}, \%testresults);
            # skip if non-existent
            unless (keys %$sig2)
            {
              $overlaymaxes{$overlaytomatch} -= 100;
              next;
            }
            print_sane ("$sigpart ") if ($verbose && $debug) ;
            print_sane ("likeness: " . compare_sigs ($sig1, $sig2) . "\n") if ($verbose && $debug) ;
            $overlayscores{$overlaytomatch} += compare_sigs ($sig1, $sig2);
          }
        }
      }
      if ($overlayscores{$overlaytomatch} >= ( $overlaymaxes{$overlaytomatch} * 0.79 ) )
      {
        print_sane "Overlay match, phase 2, 79%: $overlaytomatch ($overlayscores{$overlaytomatch}/$overlaymaxes{$overlaytomatch})\n" if $verbose;
      }
      else
      {
        next OVERLAY;
      }

      # matching phase 2.5: check $overlay->{'unreliable_requests'}
      # definition: requests that may be satisfied from cache, or pass through and trigger a native response, semi-randomly
      if (defined ($overlay->{'unreliable_requests'}))
      {
        my $unreliable = $overlay->{'unreliable_requests'};
        foreach my $sigpart (keys %$unreliable)
        {
          my $sig1 = deref_sig ($sigpart, \%testresults);
          my $sig2 = deref_sig ($unreliable->{$sigpart}, \%testresults);
          if ( eq_deeply ($sig1, $sig2) )
          {
            print_sane "Unreliable request - hit $sigpart\n" if $debug and $debug >= 2;
            push @opaquetests, $sigpart;
            $overlayscores{$overlaytomatch} += 100;
            $overlaymaxes{$overlaytomatch} += 100;
          }
        }
      }

      # matching phase 3:
      # - copy %testresults, delete the keys we've already matched
      my %visibletest = %testresults ;
      foreach my $key (@opaquetests)
      {
        del_testresult ($key, \%visibletest);

	# hide opaque tests
	delete $olfirstclass{$key};
	delete $olsecondclass{$key};
      }

      while ((keys %olsecondclass > 0) and (keys %olfirstclass < keys %firstclasstests )) {
	# promote a test for each hidden by the overlay and not replaced
	promote_first_test (\%olfirstclass, \%olsecondclass);
      }

      # - copy %matchresults, transform according to $overlay->{'fixed_backend_conditional'} and $overlay->{'modified_responses'}
      my $visibleref = dclone \%matchresults;
      my %visiblematch = %$visibleref;

      if (defined ($overlay->{'fixed_backend_conditional'}))
      {
        my $backendcondarr = $overlay->{'fixed_backend_conditional'};
        foreach my $backendcond (@$backendcondarr)
        {
          my $condition           = $backendcond->{'condition'};
          my $replacementresponse = $backendcond->{'response'};
          my $optionalresponse    = $backendcond->{'response_optional'};
          # XXX Hack - redo when we need >1 condition
          my ($condkey,) = keys %$condition;
          my $condval = $condition->{$condkey};

          foreach my $matchresult (keys %visiblematch)
          {
            my $visiblesig = $visiblematch{$matchresult};
            foreach my $sigpart (keys %$visiblesig)
            {
              my $tempsig = deref_sig ($sigpart, $visiblesig);
              if ($tempsig->{$condkey} and $tempsig->{$condkey} eq $condval)
              {
		# fresh copy every time, please
		del_testresult($sigpart, $visiblesig);
		$visiblesig->{$sigpart} = hashem (%$replacementresponse);
                # add response_optional bit, if it looks convenient
                if (compare_sigs( deref_sig ($sigpart, \%visibletest), $optionalresponse) > 0)
                {
		  print_sane "Yup, convenient - adding $matchresult: $sigpart\n" if ($debug && $debug >= 2);
                  $tempsig = deref_sig ($sigpart, $visiblesig);
		  $tempsig = hashem (%$tempsig);
		  del_testresult($sigpart, $visiblesig);
                  foreach my $key (keys %$optionalresponse)
                  {
                    $tempsig->{$key} = $optionalresponse->{$key};
                  }
		  $visiblesig->{$sigpart} = $tempsig;
                }
              } # if match! transform!
            } # foreach loop through signature bits
          } # foreach loop through %visiblematch
	  debug_dump (\%visiblematch) if ($debug && $debug >= 2);
        } # foreach loop through fixed_backend_conditional
      }

      if (defined ($overlay->{'modified_responses'}))
      {
        my $responsehash = $overlay->{'modified_responses'};
        foreach my $responsehdr (keys %$responsehash)
        {
          print_sane "Modified response - $responsehdr\n" if $debug and $debug >= 2;
          foreach my $matchresult (keys %visiblematch)
          {
            my $visiblesig = $visiblematch{$matchresult};
            foreach my $sigpart (keys %$visiblesig)
            {
              print_sane "Modified response - $matchresult : $sigpart\n" if $debug and $debug >= 2;
              my $tempsig = deref_sig ($sigpart, $visiblesig);
              # hides a quirk flag?
              if ($responsehash->{$responsehdr} eq 'delete')
              {
                if (exists $tempsig->{$responsehdr}) {
                  print_sane "Modified response - deleting $responsehdr from $matchresult : $sigpart\n" if $debug and $debug >= 2;
                  delete $tempsig->{$responsehdr};
                }
              }
              # may add a quirk flag?
              elsif ($responsehash->{$responsehdr} eq 'may-add')
              {
                if (compare_sigs( deref_sig ($sigpart, \%visibletest), { $responsehdr => 1 } ) > 0)
                {
                  print_sane "Modified response - adding $responsehdr to $matchresult : $sigpart\n" if $debug and $debug >= 2;
                  $tempsig->{$responsehdr} = 1;
                } else {
                  print_sane "Modified response - not adding $responsehdr to $matchresult : $sigpart\n" if $debug and $debug >= 2;
                }
              }
              # may delete a quirk flag?
              elsif ($responsehash->{$responsehdr} eq 'may-delete')
              {
                if (compare_sigs( deref_sig ($sigpart, \%visibletest), { $responsehdr => 1 } ) > 0)
                {
                  print_sane "Modified response - not deleting $responsehdr to $matchresult : $sigpart\n" if $debug and $debug >= 2;
                } else {
                  print_sane "Modified response - deleting $responsehdr to $matchresult : $sigpart\n" if $debug and $debug >= 2;
                  delete $tempsig->{$responsehdr};
                }
              }
              else
              {
                my $responsemodifier = $responsehash->{$responsehdr};
                # strips tokens from the string?
                if ($responsemodifier->{'strips'})
                {
                  if ($tempsig->{$responsehdr})
                  {
                    my %taboo;
                    foreach my $elem (split(',', $responsemodifier->{'strips'}))
                    {
                      $taboo{$elem} = 1;
                    }
                    print_sane "Tokenlist - pre: $tempsig->{$responsehdr} \n" if $debug and $debug >= 2;
                    print_sane "Tokenlist - filtered by: $responsemodifier->{'strips'} \n" if $debug and $debug >= 2;
                    my @tokenlist = split(',', $tempsig->{$responsehdr});
                    my @filteredtokenlist;

                    while (my $token = shift @tokenlist)
                    {
                      unless (exists $taboo{$token})
                      {
                        push @filteredtokenlist,$token ;
                      }
                    }
                    my $joined = join(',', @filteredtokenlist);
                    print_sane "Tokenlist - post: $joined \n" if $debug and $debug >= 2;
                    $tempsig->{$responsehdr} = $joined;
                  }
                } # tokenstripper
                # appends tokens, unconditionally ?
                if ($responsemodifier->{'appends'})
                {
                  if ($tempsig->{$responsehdr})
                  {
                    print_sane "Tokenlist - appending $responsemodifier->{'appends'} to $tempsig->{$responsehdr} \n" if $debug and $debug >= 2;
                    $tempsig->{$responsehdr} = $tempsig->{$responsehdr} . "," . $responsemodifier->{'appends'};
                  }
		  elsif ($tempsig->{'http_version'})
		  {
                    print_sane "Tokenlist - post: $responsemodifier->{'appends'} \n" if $debug and $debug >= 2;
		    $tempsig->{$responsehdr} = $responsemodifier->{'appends'};
		  }
                } # tokenappender
                # appends tokens, conditionally ?
                if ($responsemodifier->{'appendsifmissing'})
                {
                  if ($tempsig->{$responsehdr})
                  {
                    my %seen;

                    print_sane "Tokenlist - pre: $tempsig->{$responsehdr} \n" if $debug and $debug >= 2;
                    print_sane "Tokenlist - appendsifmissing: $responsemodifier->{'appendsifmissing'} \n" if $debug and $debug >= 2;

                    my @tokenlist = split(',', $tempsig->{$responsehdr} . "," . $responsemodifier->{'appendsifmissing'});
                    my @filteredtokenlist;

                    while (my $token = shift @tokenlist)
                    {
                      unless (exists $seen{$token})
                      {
                        push @filteredtokenlist,$token ;
                        $seen{$token} = 1;
                      }
                    }
                    my $joined = join(',', @filteredtokenlist);
                    print_sane "Tokenlist - post: $joined \n" if $debug and $debug >= 2;
                    $tempsig->{$responsehdr} = $joined;
                  }
		  elsif ($tempsig->{'http_version'})
		  {
                    print_sane "Tokenlist - post: $responsemodifier->{'appendsifmissing'} \n" if $debug and $debug >= 2;
		    $tempsig->{$responsehdr} = $responsemodifier->{'appendsifmissing'};
		  }
                } # conditional tokenappender
                # replacement ?
                if ($responsemodifier->{'replaced_by'})
                {
                  if (exists $tempsig->{$responsehdr}) {
                    print_sane "Modified response - replacing $responsehdr from $matchresult : $sigpart\n" if $debug and $debug >= 2;
                    delete $tempsig->{$responsehdr};
		    my $replacements = $responsemodifier->{'replaced_by'};
		    foreach my $key (keys %$replacements)
		    {
                      print_sane "Modified response - replacing - adding $key = $replacements->{$key}\n" if $debug and $debug >= 2;
		      $tempsig->{$key} = $replacements->{$key};
		    }
                  }
                } # replacement
              }
              $visiblesig->{$sigpart} = $tempsig ;
            } # foreach loop through signature bits
          } # foreach loop through %visiblematch
        } # foreach loop through modified_responses
      }

      # requests/responses that are passed through unmolested ( varnish vcl pipe, et al )
      if (defined ($overlay->{'passthrough_requests'}))
      {
        my $requesthash = $overlay->{'passthrough_requests'};
        foreach my $request (keys %$requesthash)
        {
          print_sane "Passthrough request - $request\n" if $debug and $debug >= 2;

          foreach my $matchresult (keys %visiblematch)
          {
            my $visiblesig = $visiblematch{$matchresult};
	    my $standardsig = $matchresults{$matchresult};
	    my $standardreq = deref_sig ($request, $standardsig) ;
	    $visiblesig->{$request} = $standardreq;
          }
        }
      }

      # - run the matching code on our temporary copies
      match_sigtree(\%visibletest, \%visiblematch, \%matchscores, \%maxscores, [ $overlayscores{$overlaytomatch}, $overlaymaxes{$overlaytomatch}, $overlaytomatch ],
                    \%olfirstclass, \%olsecondclass);

      # * phase 99, in the future: stacked overlays - we're not too far off now, actually.

    } # foreach overlay
  }

  # the payoff!
  # time to show the results...
  #################################
  my $numprinted = 0;
  foreach my $match (sort { ($matchscores{$b}/$maxscores{$b}) <=> ($matchscores{$a}/$maxscores{$a}) } keys %matchscores)
  {
    print_sane "Match ($matchscores{$match}/$maxscores{$match}): $match\n";
    $numprinted++;
    if ($numprinted eq 10) { last unless $allmatches; }
  }

  if ($numprinted eq 0)
  {
    print STDERR "No matches! That's odd...\n" ;
    exit(1);
  }
}

# write signature to file
if ($writesig)
{
  open SAVESIG, ">" . $writesig or die ("Couldn't open $writesig for writing signature: $!\n");
  my $savesig = JSON::XS->new->pretty(1)->canonical(1)->encode(\%testresults);
  print SAVESIG $savesig;
  close SAVESIG;
}

exit(0);

## data handling helpers
############################


# perldoc perlref - make a new hash from the parameters, and give me a reference to it
sub hashem { +{ @_ } }

sub add_testresult
{
  my $sigkeyname = $_[0];
  my $signature  = $_[1];

  unless (keys %$signature) { return; }

  # compress identical results into one result + references
  # max one extra level of indirection, shouldn't slow anything down

  TESTRES: foreach (sort keys %testresults) {
    my $tmphashref = $testresults{$_} ;
    unless ( $tmphashref->{ 'identical_to' } )
    {
       my $equal = eq_deeply ( $tmphashref, $signature );
       if ($equal)
       {
         $signature = { 'identical_to' => $_ } ;
         last TESTRES;
       }
    }
  }
  $testresults{$sigkeyname} = $signature ;
}

sub classify_test
{
  my $sigkeyname = $_[0];
  my $testclass = $_[1];

  $firstclasstests{$sigkeyname} = 1 if ($testclass == 1) ;
  $secondclasstests{$sigkeyname} = 1 if ($testclass == 2) ;
  $thirdclasstests{$sigkeyname} = 1 if ($testclass == 3) ;
}

sub del_testresult
{
  my $sigkeyname = $_[0];
  my $testresref = $_[1];

  # explode identical_to references to us
  TESTRES: foreach (sort keys %$testresref) {
    my $tmphashref = $testresref->{$_} ;
    if ( exists $tmphashref->{'identical_to'} and $tmphashref->{'identical_to'} eq $sigkeyname )
    {
      my $ref = $testresref->{$sigkeyname};
      $testresref->{$_} = hashem ( %$ref );
    }
  }
  delete $testresref->{$sigkeyname};
}

sub deref_sig
{
  my $sigkeyname = $_[0];
  my $signature  = $_[1];

  my $tmpsig = $signature->{$sigkeyname};

  if ($tmpsig->{'identical_to'})
  {
    $sigkeyname = $tmpsig->{'identical_to'};
    $tmpsig = $signature->{$sigkeyname};
  }
  return $tmpsig;
}

sub promote_test
{
  my $testname = $_[0];
  my $firstclassref = $_[1];
  my $secondclassref = $_[2];

  if (defined $firstclassref->{$testname} && $firstclassref->{$testname} == 1)
  {
    print_sane ("promote_test: $testname already first class\n") if ($debug);
    return;
  }
  if (defined $secondclassref->{$testname} && $secondclassref->{$testname} == 1)
  {
    $firstclassref->{$testname} = 1;
    delete $secondclassref->{$testname};
    print_sane ("promote_test: $testname promoted second=>first class\n") if ($debug);
    return;
  }
  print_sane ("promote_test: $testname not found\n") if ($debug);
}

sub promote_first_test
{
  my $firstclassref = $_[0];
  my $secondclassref = $_[1];

  if (keys %$secondclassref > 0)
  {
    my @secondclasscitizens = sort keys %$secondclassref;
    print_sane("promote_first_test: promoting $secondclasscitizens[0]\n") if $debug;
    promote_test ($secondclasscitizens[0], $firstclassref, $secondclassref);
  }
  else
  {
    print_sane("promote_first_test: secondclassref empty, running out of visibility\n") if $debug;
  }  
}

## signature matching helpers
######################################

sub match_sigtree
{
  my $testref = $_[0];
  my %testresults = %$testref;
  my $matchref = $_[1];
  my %matchresults = %$matchref;
  my $matchscoreref = $_[2];
  my $maxscoreref = $_[3];
  my $olinfo_ref = $_[4];
  my @olinfo; @olinfo = @$olinfo_ref if defined $olinfo_ref;
  my $firstclassref = $_[5];
  my %firstclass = %$firstclassref;
  my $secondclassref = $_[6];
  my %secondclass = %$secondclassref;

  # overlay stuff
  my $ol_addscore = $olinfo[0]; unless (defined $ol_addscore) { $ol_addscore = 0; }
  my $ol_addmax = $olinfo[1]; unless (defined $ol_addmax) { $ol_addmax = 0; }
  my $ol_prefix = $olinfo[2];
  if (defined $ol_prefix) { $ol_prefix = $ol_prefix . "(" . floor(100*$ol_addscore/$ol_addmax) . "%) => " ; }
  else { $ol_prefix = ""; }

  foreach my $sigtomatch (keys %matchresults)
  {
    if ( eq_deeply(\%testresults, $matchresults{$sigtomatch}) )
    {
      $matchscoreref->{$ol_prefix . $sigtomatch} = (100 * keys %firstclass) + $ol_addscore;
      $maxscoreref->{$ol_prefix . $sigtomatch} = (100 * keys %firstclass) + $ol_addmax;
      delete $matchresults{$sigtomatch};
    }
  }
  foreach my $sigtomatch (keys %matchresults)
  {
    print_sane "fuzzy matching: comparing to $sigtomatch\n" if ($debug);

SIGPART:
    foreach my $sigpart (keys %testresults)
    {
      # match only first-class test signatures to begin with
      next SIGPART unless ($firstclass{$sigpart}) ;

      my $sig1 = deref_sig ($sigpart, \%testresults);
      my $sig2 = deref_sig ($sigpart, $matchresults{$sigtomatch});

      if ( eq_deeply($sig1, $sig2))
      {
        print_sane "fuzzy matching: perfect match on $sigpart, 100 points\n" if ($debug);
        $matchscoreref->{$ol_prefix . $sigtomatch} += 100;
      }
      else
      {
        my $score = compare_sigs ($sig1, $sig2);
        print_sane "fuzzy matching: partial match on $sigpart, $score points\n" if ($score > 0 and $debug);
        print_sane "fuzzy matching: no match on $sigpart, $score points\n" if ($score == 0 and $debug);
        if ($score > 0) { $matchscoreref->{$ol_prefix . $sigtomatch} += $score; }
      }
    }
    # if we have no result for the test, don't score it
    foreach my $sigpart (keys %firstclass)
    {
      my $exists_in_results = deref_sig ($sigpart, \%testresults);
      unless (keys %$exists_in_results) { delete $firstclass{$sigpart}; }
    }
    $maxscoreref->{$ol_prefix . $sigtomatch} = 100 * keys %firstclass if ($matchscoreref->{$ol_prefix . $sigtomatch} and $matchscoreref->{$ol_prefix . $sigtomatch} > 0);
    print_sane "fuzzy matching: total score " . $matchscoreref->{$ol_prefix . $sigtomatch} . "\n" if ($matchscoreref->{$ol_prefix . $sigtomatch} and $matchscoreref->{$ol_prefix . $sigtomatch} > 0 and $debug);

    $maxscoreref->{$ol_prefix . $sigtomatch} += $ol_addmax if ($matchscoreref->{$ol_prefix . $sigtomatch} and $matchscoreref->{$ol_prefix . $sigtomatch} > 0);
    $matchscoreref->{$ol_prefix . $sigtomatch} += $ol_addscore if ($matchscoreref->{$ol_prefix . $sigtomatch} and $matchscoreref->{$ol_prefix . $sigtomatch} > 0);
  }

  # FIXME: if >1 perfect match, promote tests, rerun, see if differentiation improves
  #        - for examples, see nginx, where hairy_method promotion would be good
}

sub compare_sigs
{
  my $sig1 = $_[0];
  my $sig2 = $_[1];

  # FIXME: easily changeable keys should have lower weight - response_text and similar
  my $count = uniq (keys %$sig1, keys %$sig2);
  my $matchscore = 10;
  my $perfectmatch = 100;
  my $score = 0;

  print_sane ("sig1 dump: ") if ($count == 0 or ($debug and $debug >= 4));
  debug_dump ($sig1) if ($count == 0 or ($debug and $debug >= 4));
  print_sane ("sig2 dump: ") if ($count == 0 or ($debug and $debug >= 4));
  debug_dump ($sig2) if ($count == 0 or ($debug and $debug >= 4));

  foreach my $key (uniq (keys %$sig1, keys %$sig2))
  {
    if ( exists $sig1->{$key} and
         exists $sig2->{$key} and
         $sig1->{$key} eq $sig2->{$key})
    {
      $score+=$matchscore;
      print_sane "match! key = $key\n" if ($debug and $debug >= 3);
    }
    elsif ( exists $sig1->{$key} and
            exists $sig2->{$key} and
            $key =~ 'order' )
    {
      my @order1 = split(',', $sig1->{$key});
      my @order2 = split(',', $sig2->{$key});

      for (my $x=0; $x < ($#order1+1); $x++)
      {
	my $keymatched = 0;
        for (my $y=0; $y < ($#order2+1); $y++)
        {
	  if ($order1[$x] eq $order2[$y])
          {
            $keymatched = 1;
#            print_sane "match! $order1[$x] eq $order2[$y]\n";
          }
        }
        if ($keymatched == 0)
        {
#          print_sane "no match! removing $order1[$x] from " . join (',',@order1) . "\n";
          splice(@order1, $x, 1); # didn't find header x in sig2, delete from sig1
          $x--;
        }
#        print_sane '@order1 = ' . join (',',@order1) . "\n";
#        print_sane '@order2 = ' . join (',',@order2) . "\n";
      }
      # testresult is a ordered superset of the signature
      if (join (',',@order1) eq $sig2->{$key})
      {
        $score+=($matchscore * 3 / 4);
        print_sane ("$sig1->{$key} superset of $sig2->{$key}\n") if ($debug and $debug >= 2);
        next;
      }

      my @order3 = split(',', $sig1->{$key}); # refresh @order1 to check for subsets properly
      for (my $x=0; $x < ($#order2+1); $x++)
      {
	my $keymatched = 0;
        for (my $y=0; $y < ($#order3+1); $y++)
        {
	  if ($order2[$x] eq $order3[$y])
          {
            $keymatched = 1;
#            print_sane "match! $order2[$x] eq $order3[$y]\n";
          }
        }
        if ($keymatched == 0)
        {
#          print_sane "no match! removing $order2[$x] from " . join (',',@order2) . "\n";
          splice(@order2, $x, 1); # didn't find header x in sig1, delete from sig2
          $x--;
        }
#        print_sane '@order3 = ' . join(',',@order3) . "\n";
#        print_sane '@order2 = ' . join(',',@order2) . "\n";
      }
      # testresult is a ordered subset of the signature
      if (join (',',@order2) eq $sig1->{$key})
      {
        $score+=($matchscore * 3 / 4);
        print_sane ("$sig1->{$key} subset of $sig2->{$key}\n") if ($debug and $debug >= 2);;
        next;
      }

      # intersection of testresult and signature has more than one element, and same ordering - a bit weaker match
      if ( ($#order1+1) > 1 and join (',',@order2) eq join (',',@order1))
      {
        $score+=($matchscore / 2);
        print_sane ("intersection " . join (',',@order2) . " of $sig1->{$key} and $sig2->{$key} has same order\n") if ($debug and $debug >= 2);;
        next;
      }

#      print_sane "$sig1->{$key} and $sig2->{$key} does not match.\n";
    }
  }

  print_sane "match_sigs: score = $score\n" if ($debug and $debug >= 2);
  return floor ($score * $perfectmatch / ($matchscore * $count));
}

## ECCM helpers
################

sub helper_eccm
{
  my $signature = $_[0];

  return unless $eccm;

  print_sane ("helper_eccm signature dump: ") if ($debug and $debug >= 4);
  debug_dump ($signature) if ($debug and $debug >= 4);

  if (exists $signature->{'connection_closed'})
  {
    print_sane "Possible ECM detected, backing off\n" if ($verbose);
    sleep 20;
    return;
  }

  if (exists $signature->{'connection_refused'})
  {
    print_sane "ECM detected, no result on this test, backing off\n" if ($verbose);
    %$signature = ();
    sleep 40;
    return;
  }
}

## THE ACTUAL TESTS
##############################

sub test_get_09_knowngood()
{
  # tests a 1991-style "GET /" request
  # we get lots of fun, illuminating responses to this one these days...
  # my favourite so far is the wonderful oxymoron "HTTP/0.9 200 OK"

  my $request   = "GET $path\r\n";
  my $request2  = "GET $path\r\n\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  if ($#data == -1)
  {
    %signature = () ;
    @data = helper_talk($request2, $sigref);
    unless ($#data == -1)
    {
      $signature{'http09_needcrlfcrlf'} = 1;
    }
  }

  unless ($#data == -1)
  {
    if ($debug) {
      print_sane "$me returned data (if any):\n" ;
      foreach (join '',@data) {
        print_sane "$_\n";
      }
    }

    my $sigref2 = classify_signature(@data);

    # get test, ignore.
    if (exists $sigref2->{'response_body'}) { delete $sigref2->{'response_body'} ; }

    # irrelevant for http/0.9, false positive
    if (exists $sigref2->{'http_response_as_html'})
    {
      delete $sigref2->{'http_response_as_html'};
      delete $sigref2->{'response_code'};
      delete $sigref2->{'response_text'};
    }

    my @sig2keys = keys %$sigref2;

    if ($#sig2keys > -1)
    {
      $signature{'http09_returnsheaders'} = 1;
      %signature = ( %signature, %$sigref2 );
    }
    else { $signature{'http09_valid'} = 1; }
  }

  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http_unexpected_keepalive'} = 1;
  }
  return $sigref;
}

sub is_namevhost()
{
  # XXX: implement me
  # tests HTTP/1.0 request with and without host header, compares returned data
}

sub test_get_10_knowngood()
{
  my $try = 1;

buildtest:
  # tests a plain HTTP/1.0 request with Host header and a bland User-Agent.
  # no Accept: headers, in order to see default values.
  my $request  = "GET $path HTTP/1.0\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  if (exists $signature{'connection_closed'})
  {
    # this basic a request really ought to work, but
    # I've seen fun threading bugs in unstable servers - so try again
    delete $signature{'connection_closed'};
    @data = helper_talk($request, $sigref);
  }

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http_unexpected_keepalive'} = 1;
  }

  if (   (exists $signature{'response_code'} and $signature{'response_code'} != "200")
      or  exists $signature{'connection_refused'} )
  {
    # last-ditch typofixing, add / to end of url - $path is global (for all practical purposes), so this all works out.
    if($try++ == 1)
    {
      $path =~ s/$/\//;
      goto buildtest;
    }
    print STDERR "The web server isn't responding politely to a plain HTTP/1.0 GET request.\n";
    print STDERR "Signature matching and generation won't work reliably - bailing out.\n";
    exit(1);
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_10_nlonly()
{
  # tests a plain HTTP/1.0 request with \n only, mostly expected to work
  my $request  = "GET $path HTTP/1.0\n";
     $request .= "Host: $host\n";
     $request .= "User-Agent: $bland_useragent\n";
     $request .= "\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if (exists $signature{'connection_closed'})
  {
    return $sigref;
  }

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http_unexpected_keepalive'} = 1;
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_10_cronly()
{
  # tests a plain HTTP/1.0 request with \r only, mostly expected to fail or provoke errors
  my $request  = "GET $path HTTP/1.0\r";
     $request .= "Host: $host\r";
     $request .= "User-Agent: $bland_useragent\r";
     $request .= "\r";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if (exists $signature{'connection_closed'})
  {
    return $sigref;
  }

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http_unexpected_keepalive'} = 1;
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_getlc_10_knowngood()
{
  # lowercase get test, may sneak through some access restrictions
  my $request  = "get $path HTTP/1.0\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http_unexpected_keepalive'} = 1;
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_knowngood()
{
  # tests a plain HTTP/1.1 request with Host header and a bland User-Agent.
  # no Accept: headers, in order to see default values.
  # Connection: close, for convenience.
  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_headercontinuation_kind()
{
  # HTTP/1.1 fun with header continuation,
  # i.e. "\r\n\t" in the middle of header values
  # (being deprecated in current w3c httpbis)
  # also allowed, but discouraged, in HTTP/1.0 (RFC1945 section 2.2)

  # kind variant - folds User-Agent header
  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "Connection: close\r\n";
     $request .= "User-Agent:\r\n\t$bland_useragent\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_headercontinuation_kind_unfold_sst()
{
  # HTTP/1.1 fun with header continuation, unfolded by proxy to "  \t"

  # kind variant - folds User-Agent header
  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "Connection: close\r\n";
     $request .= "User-Agent:  \t$bland_useragent\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_headercontinuation_kind_unfold_sss()
{
  # HTTP/1.1 fun with header continuation, unfolded by proxy to "   "

  # kind variant - folds User-Agent header
  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "Connection: close\r\n";
     $request .= "User-Agent:   $bland_useragent\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_headercontinuation()
{
  # HTTP/1.1 fun with header continuation,
  # i.e. "\r\n\t" in the middle of header values
  # (being deprecated in current w3c httpbis)
  # also allowed, but discouraged, in HTTP/1.0 (RFC1945 section 2.2)

  # less kind variant - folds Host header, very likely to break
  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host:\r\n\t$host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_headercontinuation_unfold_sst()
{
  # HTTP/1.1 fun with header continuation, unfolded by proxy to "  \t"
  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host:  \t$host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_headercontinuation_unfold_sss()
{
  # HTTP/1.1 fun with header continuation, unfolded by proxy to "   "
  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host:   $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_duplicate_contentlength()
{
  # fun with Content-Length: 0, doubled

  my $request  = "GET $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "Content-Length: 0\r\n";
     $request .= "Content-length: 0\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_options_11_star()
{
  # HTTP/1.1 OPTIONS request applicable for whole server
  my $request  = "OPTIONS * HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }
  return $sigref;
}

sub test_options_11_knowngood()
{
  # HTTP/1.1 OPTIONS request applicable for the known good url
  my $request  = "OPTIONS $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }
  return $sigref;
}

sub test_get_12_knowngood()
{
  # test_get_11_knowngood() with http version changed to 1.2, to test handling of unknown minor http versions
  #  some servers fall back to talking 1.1, some to 1.0
  my $request  = "GET $path HTTP/1.2\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }

  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_doesnotexist_11_knowngood()
{
  # HTTP/1.1 guaranteed non-existent method request for the known good url.
  # some simple implementations treat it as a GET and doesn't worry further.
  my $request  = "DOESNOTEXIST $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # irrelevant
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_hairy_method_11_knowngood()
{
  # HTTP/1.1 guaranteed non-existent method request for the known good url.
  # nginx had a behaviour change on methods with _ in them at some point.
  my $request  = "HAIRY_METHOD $path HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # irrelevant
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_20_knowngood()
{
  # test_get_11_knowngood() with http version changed to 2.0, to test handling of unknown major http versions
  my $request  = "GET $path HTTP/2.0\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http_unexpected_keepalive'} = 1;
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

sub test_get_11_knowngood_percentzero()
{
  # Evil test - truncated/invalid urlencoding.
  # Connection: close, for convenience.
  my $request  = "GET $path%0 HTTP/1.1\r\n";
     $request .= "Host: $host\r\n";
     $request .= "User-Agent: $bland_useragent\r\n";
     $request .= "Connection: close\r\n";
     $request .= "\r\n";

  my $me = (caller(0))[3];
  if ($debug) { print_sane "$me starting\n"; }

  my %signature ;
  my $sigref = \%signature ;

  my @data = helper_talk($request, $sigref);

  helper_eccm($sigref);

  if ($debug and @data) {
    print_sane "$me returned data (if any):\n" ;
    foreach (join '',@data) {
      print_sane "$_\n";
    }
  }
  my $sigref2 = classify_signature(@data);
  %signature = ( %signature, %$sigref2 );
  if (exists $signature{'does_keepalive'})
  {
    delete $signature{'does_keepalive'};
    $signature{'http11_ignores_connection_close'} = 1 unless $signature{'forced_keepalive'};
  }

  # get test, ignore
  if (exists $signature{'response_body'}) { delete $signature{'response_body'}; }

  return $sigref;
}

## test helpers
#############################

# helper, opening/closing connection and doing non-blocking i/o
sub helper_talk
{
  my @data;
  my $blarg;
  my $amountread;
  my $sigref = $_[1] ;
 
  # lookup and cache ip in a state variable
  # TODO: IPv6 - needs a bit more code, inet_ntop/pton want address family parameters :(
  state $ip;
  unless (defined $ip) {
    $ip = inet_ntoa(inet_aton($host));
    print_sane "resolved, using \$host = $host, \$ip = $ip\n" if $verbose;
  }

  my $sock = new IO::Socket::INET ( PeerAddr => $ip, PeerPort => $port, Proto => 'tcp' );

  unless ($sock)
  {
    print STDERR "Could not create socket: $!\n" if $verbose;
    $sigref->{'connection_refused'} = 1;
    return @data;
  }
  
  $sock->blocking(0);

  print $sock $_[0];

  my $rin = '';
  vec($rin,fileno($sock),1) = 1;

  my ($nfound,$timeleft) = select(my $rout=$rin, undef, undef, $mediumtimeout);

  # slow reply? timeout, perhaps?
  if ($nfound and ($timeleft < ($mediumtimeout / 2)))
  {
    $sigref->{'delayed_reply'} = 1;
  }

  if ($nfound)
  {
    # $amountread = undef means connection closed during the select, or similar.
    while ($nfound and $amountread = sysread $sock, $blarg, 5000000)
    {
      push @data, $blarg;
      vec($rin,fileno($sock),1) = 1;
      ($nfound,$timeleft) = select(my $rout=$rin, undef, undef, $mediumtimeout);
    }
    if (defined $amountread and $amountread == 0)
    {
      # controlled connection close straightaway after the last of the data -
      # reset timeleft to avoid triggering delayed_close
      $timeleft = $mediumtimeout;
    }
  };

  unless ($nfound)
  {
    $sigref->{'does_keepalive'} = 1; # which is mostly unexpected for our tests.
  }

  # connection slow to close?
  if ($nfound and (not defined ($amountread) or $amountread < 1) and ($timeleft < ($mediumtimeout / 2)))
  {
    $sigref->{'delayed_close'} = 1;
  }

  close($sock);

  if ($#data == -1) { $sigref->{'connection_closed'} = 1; }

  return @data;
}

## the meaty big classification routine
##############################################

sub classify_signature()
{
  my @data = (@_) ;
  my @lines;
  my %signature ;
  my $sigref = \%signature ;
  my $linenum = 0 ;
  my @headers;

  unless (defined $data[0]) { return $sigref; }

  @data = ( join '', @data );

  while (@data)
  {
    my @tmp = split /\n/, pop @data ;
    @lines = (@lines, @tmp) ;
  }

  # preprocessing: do header line folding, cut everything after the header
  my @linestmp;
  my $restisbody = 0;
  my @body;
  my $hundredcontinue = 0;
  LINEPRE: foreach my $line (@lines)
  {
    my $tmp = $/ ;
    $/ = "\r" ;
    chomp $line ;
    $/ = $tmp ;

    if (($restisbody == 0) and ($#linestmp > -1) and ($line =~ /^\s+(\S+.+)/))
    {
      print_sane "classify preprocessing: folding line '$1' into previous '$linestmp[$#linestmp]'\n" if $debug;
      $linestmp[$#linestmp] = $linestmp[$#linestmp] . $1 ;
      next;
    }

    if ($line =~ m|^[Hh][Tt][Tt][Pp]/\d\.\d 100|)
    {
      # a 100 continue... optionally skip one crlfcrlf
      $hundredcontinue++;
    }
    elsif ($line =~ m|^[Hh][Tt][Tt][Pp]/\d\.\d| and $hundredcontinue > 0)
    {
      $hundredcontinue--;
    }

    if ($#linestmp > 0 and $line =~ /^$/ and not $hundredcontinue--)
    {
      $restisbody = 1; # crlfcrlf - end of headers
      next LINEPRE;
    }
    if ($restisbody == 1)
    {
      push @body, $line;
    }
    else
    {
      push @linestmp, $line;
    }
  }
  @lines = @linestmp ;

  my $tmp_conn_keep_alive = undef;
  my $tmp_keep_alive = undef;
  my $tmp_content_length = undef;
  my $tmp_conn = undef;
  my $tmp_date = undef;

  # analysis
  LINE: foreach my $line (@lines)
  {
    $linenum++;

    # 100 Continue?
    if ($line =~ m|^([Hh][Tt][Tt][Pp])/(\d\.\d) 100| and not exists $signature{'http'} and not exists $signature{'temp_http'})
    {
      $signature{'temp_http'} = $1;
      $signature{'temp_http_version'} = $2;

      if ($line =~ m|^[Hh][Tt][Tt][Pp]/\d\.\d(\s+)(\d{3})(\s+)(.+)| )
      {
        $signature{'temp_response_code'} = $2;
        $signature{'temp_response_text'} = $4;
      }
      next LINE;
    }

    # Don't look too hard - shortcircuit if we see <HTML><?xml> or similar.
    if ( not exists $signature{'http'} and
         (   $line =~ m|(^<[Hh][Tt][Mm][Ll]>)|
          or $line =~ m|(^<?[Xx][Mm][Ll])|
          or $line =~ m|(^<!--)|
          or $line =~ m|(^<[Hh]\d)|
          or $line =~ m|(^<![Dd][Oo][Cc][Tt][Yy][Pp][Ee])|
         )
       )
    {
      $signature{'response_body'} = 1;
      print_sane "shortcircuit: found $1 before any HTTP response\n" if $debug;
      last LINE;
    }

    # look for the http response
    if ($line =~ m|^([Hh][Tt][Tt][Pp])/(\d\.\d)| and not exists $signature{'http'})
    {
      $signature{'http'} = $1;
      $signature{'http_version'} = $2;

      if ($line =~ m|^[Hh][Tt][Tt][Pp]/\d\.\d(\s+)(\d{3})(\s+)(.+)| )
      {
        $signature{'response_code'} = $2;
        $signature{'response_text'} = $4;
        unless ($1 eq " ") { $signature{'quirk_http_to_responsecode_whitespace'} = $1; }
        unless ($3 eq " ") { $signature{'quirk_responsecode_to_responsetext_whitespace'} = $3; }
      }
    }

    # look for CONFUSEDBUT 200 OK  -thttpd quirk
    if ($line =~ m|^(\S+)\s+(\d{3})\s+(.+)| and not exists $signature{'http'})
    {
      $signature{'http'} = $1;
      $signature{'response_code'} = $2;
      $signature{'response_text'} = $3;
    }

    if ($linenum == 1 and not exists $signature{'http'} and not exists $signature{'temp_http'} and $line =~ m|^\s*$|)
    {
      $signature{'quirk_whitespace_before_http_response'} = 1;
      next LINE;
    }

    # read headers, ignore 100 continue headers
    if ($line =~ /^([^\s<>]+)( ?):( ?)(.*)/ and exists $signature{'http'})
    {
      my $hname = ucfirst lc $1 ; # Standardize header name to Header-name: form
      my $hcontent = $4 ;
      push @headers, $hname ;
      unless ($2 eq "") {
        my $quirkname = 'quirk_whitespace_pre_' . lc ($1);
        $signature{$quirkname} = hex $2 ;
      }
      unless ($3 eq " ") {
        my $quirkname = 'quirk_whitespace_post_' . lc ($1);
        $signature{$quirkname} = hex $3 ;
      }
      if ($hname =~ m|^Allow$|i)
      {
        my @tmp = split ( / |,/, $hcontent ) ;
        $signature{'allow_order'} = uc join ",", grep { ! m|^$| } @tmp ;
      }
      # public (order) arrived in rfc2068, just long enough to shanghai rtsp/1.0, then
      # disappeared from the revised http/1.1 specification rfc2616 just two years later.
      if ($hname =~ m|^Public$|i)
      {
        my @tmp = split ( / |,/, $hcontent ) ;
        $signature{'public_order'} = uc join ",", grep { ! m|^$| } @tmp ;
      }
      # if we get something weird in Accept-Ranges, record it ... say 'mail' or 'records' or something.
      if ($hname =~ m|^Accept-Ranges$|i)
      {
        if ($hcontent ne 'bytes')
        {
          $signature{'accept_ranges'} = $hcontent ;
        }
      }
      if ($hname =~ m|^Connection$|i)
      {
        if ($hcontent =~ m|keep-alive|i)
        {
          $tmp_conn_keep_alive = 1;
        }

	# zope quirk - duplicate connection header
	if ($tmp_conn)
	{
	  pop @headers;
	  $signature{'duplicate_connection_header'} = 1;
	}
	else
	{
	  $tmp_conn = 1;
	}
      }
      if ($hname =~ m|^Date$|i)
      {
	# zope quirk - duplicate date header
	if ($tmp_date)
	{
	  pop @headers;
	  $signature{'duplicate_date_header'} = 1;
	}
	else
	{
	  $tmp_date = 1;
	}
      }
      if ($hname =~ m|^Keep-Alive$|i)
      {
        $tmp_keep_alive = 1;
      }
      if ($hname =~ m|^Content-Length$|i)
      {
        $tmp_content_length = $hcontent;
      }
    }
  }
  if ($tmp_keep_alive and ! defined ($tmp_conn_keep_alive))
  {
    $signature{'keepalive_header_without_conntoken'} = 1;
  }
  if ($tmp_keep_alive and $tmp_conn_keep_alive)
  {
    $signature{'forced_keepalive'} = 1;
  }

  # list header order
  #  ignore X-* headers, Set-Cookie, SLASH-LOG-DATA, Content-Disposition, Content-Language
  #  ignore Age, Via ... should be stripped off by outermost reverse proxy anyway
  #  ignore Expires, Cache-Control, Pragma - usually set by application, not server
  if (@headers) {
    $signature{'header_order'} = join(',', grep { (!/^X-.*/i and !/^Set-Cookie/i and !/^SLASH[-_]LOG[-_]DATA/i and !/^Content-Disposition/i and !/^Content-Language/i and !/^P3P/i and !/^Age/i and !/^Via/i and !/^Keep-Alive/i and !/^Expires/i and !/^Cache-Control/i and !/^Pragma/i)} @headers) ;
  }

  # superfluous 100 Continue quirk - none of our tests have any real business getting a 100 Continue back
  if (exists $signature{'temp_http_version'} and exists $signature{'http_version'})
  {
    $signature{'superfluous_100_continue'} = 1;
    delete $signature{'temp_http'};
    delete $signature{'temp_http_version'};
    delete $signature{'temp_response_code'};
    delete $signature{'temp_response_text'};
  }

  # Did we really get just a 100 Continue? All right.
  # Varnish may get confused if it receives an unsolicited 100 Continue, and serve
  # a content-free 100 Continue to clients.
  if (exists $signature{'temp_http_version'} and not exists $signature{'http_version'})
  {
    $signature{'orphan_100_continue'} = 1;
    $signature{'http_version'} = $signature{'temp_http_version'};
    delete $signature{'temp_http'};
    delete $signature{'temp_http_version'};
    delete $signature{'temp_response_code'};
    delete $signature{'temp_response_text'};
  }

  if (exists $signature{'http'} and $signature{'http'} eq 'HTTP')
  {
    # this is the default case, no need to record it. If it's lowercase or RTSP or something, however, keep it...
    delete $signature{'http'};
  }

  if ($#body > -1)
  {
    $signature{'response_body'} = 1;
  }

  if (defined $tmp_content_length)
  {
    # Content-Length > real body size? Breaks varnish (#519).
    my $real_content_length = 0;
    foreach my $line (@body)
    {
       $real_content_length += (length($line) + 2);
    }
    if ($real_content_length < $tmp_content_length)
    {
       $signature{'content_length'} = 'too_big';
    }
  }

  # no http response found at all?
  # expected, for HTTP/0.9
  # mildly unexpected otherwise
  unless (exists $signature{'http_version'})
  {
    delete $signature{'quirk_whitespace_before_http_response'}; # we didn't get any http response after all, so this is bogus
    # grope through the data looking for error codes.
    # the server may have sent a html error page with the http response embedded.
    # some boa releases do this
    FALLBACKLINE: foreach my $line (grep { m|<.+?>(?:Error: )*\d{3}.*</.+?>| } @lines)
    {
      if ($line =~ m|<.+?>(?:Error: )*(\d{3})\s*([^<]*?)</.+?>|)
      {
         $signature{'response_code'} = $1;
         $signature{'response_text'} = $2 if $2;
         $signature{'http_response_as_html'} = 1;
         delete $signature{'response_body'}; # implicit in http_response_as_html, really
         last FALLBACKLINE;
      }
    }
  }

  return $sigref;
}

