OSEC

Neohapsis is currently accepting applications for employment. For more information, please visit our website www.neohapsis.com or email hr@neohapsis.com
 
Subject: Re: static analysis
From: Antonomasia (antnotatla.demon.co.uk)
Date: Thu Jun 29 2000 - 17:45:05 CDT


(bcc readers: here is an argument number checker as
discussed on security audit for the kind of bug recently seen
in wuftpd where arguments expected exceeded arguments supplied.)

Where are the prizes for being quick and dirty ? This is clearly
based on last year's file race checker.

Counting is done by splitting a string using either /%/ or /,/
and deducting 1 if a successful split was made. This does not
yet take account of "%%" in format strings so I should loop over
the fragments and ensure that I adjust for empty ones and get the
pairing right.

The list of functions is massively incomplete I suppose a grep
of man3 files for "..." in the synopsis would be a start.

Detection of multi-line statements is absent.

As I address some of the above and you make further
suggestions/criticisms the improved versions will be under
http://www.notatla.demon.co.uk/SOFTWARE/SCANNER/ .

#!/usr/bin/perl -w
require 5.004;

# Source code scanner for argument counting in C.
# See http://www.notatla.demon.co.uk/SOFTWARE for details.
# Gnu Public Licence V2 or later.

# remaining defects
# XXX unfinished undebugged

# This is the list of functions ... incomplete

%uglies=("syslog" => 1,
         "printf" => 0,
         "sprintf" => 1,
         "buf_appendf" => 1);

############################################
sub SCAN_ONE_SRC_FILE
{
my $srcname; # what source file to read
my $src; # line of source under scrutiny
my $fcall; # name of a function
my $properties; # our hardcoded description of a function
my $argnum; # see above
my $fname; # a file name
my $usage; # record of where and how a file is used
my $rc; # return code
my $answer; # return code from this function
my tmpprops;
my %findings;
my $ln=0; # do we need to fake line numbers ?

$srcname=shift;
# Use dot as a special filename to flag a preprocessed file.
# Should not match a real filename because dot means a directory.
if ("." eq $srcname) {$ln=1; $srcname=shift;}

$rc=open SRC, "<$srcname";
if (!$rc) {
   $duffstuff = $duffstuff."\nCannot open ". $srcname;
   return "";
}

SOURCEFILE: while (<SRC>) {
   chomp;
   $src=$_;
   # this is where we track line numbers - the possibilities are:
   # - use the line number of the original input file applied
   # to this preprocessed file as a (numeric) label
   # - use the previous value as above (for macro cont'n lines)
   # - use the line number of the input file (for the non -E mode)
   s/([0-9]+):.*/$1/;
   if ($ln) {
       if ( $_ =~ /[0-9]+/ ) {
               $lastline=$_;
       } else {
               $_=$lastline;
       }
       $line=$_;
   } else {
       $line=$.;
   }

   # Loop over uglies to see if any appear in the line.
   # If so, note what it was and what argument appeared, and the position
   # in the source.
   # Later tally up other uglies with the same argument for
   # possible danger.

   while (($fcall,$properties) = each(%uglies) ) {
           if ($debug) {printf(OUT "FCALL IS %s\n",$fcall);}

           tmpprops=split(/,/,$properties);

           while(tmpprops) {
               $argnum=$tmpprops[0];
               shift(tmpprops);
               if ($debug) {
                 printf(OUT " ARGNUM IS %s\n",$argnum);
               }

               $_=$src;
               # first test and remove function name and preceding args
               if (s/(.*)\b$fcall\(([^,]+,){$argnum}//) {

          # remove (and count) following args and whitespace
          # XXX breaks if parens or quoted commas appear
                    junk=percent={};
                    $wanted=$got=0;
                    junk=split(/,/);
                    $got=(junk)-1 if (junk > 1);
                    s/[,)].*// ;
                    s/\s+//g ;
                    percent=split(/%/);
                    $wanted=(percent)-1 if (percent > 1);
                      if ($verbose){
                          printf(OUT "%8d: %s\n", $., $src);
                          if ($verbose>1){
                               printf(OUT " %s(%s)\n",$fcall, $_);
                          }
                      }
                      if ( $_ =~ /"[^"]*"/ ) {
                          # fixed format string
                          if ($wanted != $got){
                               printf(OUT "%8d: %s %s\n",
                                           $line,$fcall,$_);
                          }
                      } else {
                          # variable format string
                          printf(OUT "%8d: %s %s\n",
                                           $line, $fcall, $_);
                      }
               }
           }
   }
$debug=0; # debug mode ends after one input file
}
close(SRC); # end of reading that source file

return $answer;
}

############################################

# MAIN starts here

$debug=0;
$verbose=0;
$cpp=0;
$tmpdir="";
$duffstuff=""; # to hold error messages
$lastline="???"; # fallback used in tracing difficult line numbers
$cflags="";
$help="scancode [-v] [-d] [-h] -E -Y [-D] [-I] [-L] file1.c ...
          -v verbose - can be used more than once
          -d debug
          -h this usage message
          -E 'cpp -E' expansion of source files
          -D -I and -L parameters are passed to gcc if -E is in use
";

# direct output to $ENV{"OUT"}, else STDOUT
if ($ENV{"OUT"}) {
     my $outfilename=$ENV{"OUT"};
     open OUT, ">>$outfilename";
} else {
     open(OUT, ">&STDOUT")
}

clargs=ARGV;

SWITCHES: while (clargs) {
   my $arg=$clargs[0];
# switches start with a -
   last SWITCHES unless ($arg =~ /^-/) ;
   shift clargs;
   if ($arg =~ /^-[ILD].*/) {
                  $cflags .= " ". $arg;
                  next SWITCHES;
   }

# -- means end of switches
   last SWITCHES if $arg eq "--";

      $arg=substr($arg, 1);
# now read it char-by-char - there are no long options
      while ($arg) {
         my $sense=0;
          if ("v" eq substr($arg, 0,1)) {$verbose++; $sense=1;}
          if ("d" eq substr($arg, 0,1)) {$debug=1; $sense=1;}
          if ("E" eq substr($arg, 0,1)) {$cpp=1; $sense=1;}
          if (!$sense) {
                $|=1;
                printf(OUT "Confused by: %s\n",$arg)
                        unless ("h" eq substr($arg, 0,1));
                die($help);
          }
          $arg=substr($arg, 1);
      }
}

FILENAMES: while (clargs) {
    my $text;
    if ("." eq $clargs[0]) {shift clargs; next FILENAMES;}
    if ($cpp) {
        my $count=1;
        my $hinclude=0;
        # run gcc-E on the file and scan the output
        if (! $tmpdir) {
            do {
               $tmpdir=int(rand(1000000000));
               if ($ENV{TMPDIR}) {
                    $tmpdir=$ENV{TMPDIR}."/".$tmpdir;
               } else {
                    $tmpdir="/tmp/".$tmpdir;
               }
            } while (0==mkdir $tmpdir, 0700);
        }

        $_=$clargs[0];
        ($dirname,$basename)=/(.*)\/(.*)/;
        if (!$dirname) {$dirname=".";}
        if (!$basename) {$basename=$clargs[0];}
        if ($debug) {
               printf(OUT "dirname is %s\n",$dirname);
               printf(OUT "basename is %s\n",$basename);
        }
        $tmpfile=$tmpdir."/".$basename;

        # Copy the source file to another file with added line
        # numbers, except on cpp directive lines.
        open TF, ">$tmpfile" or die("Cannot write file with line numbers.");
                      open IF, $clargs[0] or
                           die("Cannot read source file.");
                      while (<IF>){
                          if ( ($_ =~ /^#.*/) || ($hinclude) ) {
                              print TF $_;
                         # avoid numbering preprocessor directives
                         # including continuation lines
                              $hinclude=1;
                              $hinclude=0 unless ($_ =~ /.*\\$/);
                          } else {
                              printf TF "%d: %s" , $count, $_;
                          }
                          $count++;
                      }
        close TF;

        # Now run gcc -E on that to remove comments, expand macros,
        # decide ifdefs and whatever else cpp does.
        $tmpfile2=$tmpfile."E";
        open TF, ">$tmpfile2" or die("Cannot write file of gcc output.");
                      $cflagsdie=0;
                      if (!$cflags) {
                              $cflags="-I$dirname";
                              $cflagsdie=1;
                      }
                      open IF, "gcc -E $cflags $tmpfile |" or
                           die("Cannot run gcc -E.");
                      while (<IF>){
                         print TF;
                      }
                      if ($cflagsdie) {$cflags="";}
        close TF;

        $text=SCAN_ONE_SRC_FILE (".",$tmpfile2);
        unlink $tmpfile, $tmpfile2;
    } else {
        # simple case (no gcc) just scan the source file
        $text=SCAN_ONE_SRC_FILE ($clargs[0]);
    }
    # Results are printed here as a separate step from the
    # SCAN_ONE_SRC_FILE function in case they needed a bit of
    # massaging. Seems not so far.
    if ($text) {
         printf(OUT "%s\n",$clargs[0]);
         printf(OUT "%s",$text);
    }
    # move on to next source file named on the command line
    shift clargs;
}

# show error messages and tidy up
printf OUT "%s\n",$duffstuff;
if ($tmpdir) {rmdir $tmpdir;}
close(OUT);

--
##############################################################
# Antonomasia   antnotatla.demon.co.uk                      #
# See http://www.notatla.demon.co.uk/                        #
##############################################################