OSEC

Neohapsis is currently accepting applications for employment. For more information, please visit our website www.neohapsis.com or email hr@neohapsis.com
errors in col(1) line number handling

From: Ingo Schwarze (schwarzeusta.de)
Date: Fri Oct 17 2014 - 20:47:34 CDT


Hi,

after fixing column number handling in col(1), i looked at line
number handling in the same file and found about a dozen issues -
two definite int overflows, four potential int overflows, four
potential int underflows, and a group of closely related logic
errors.

Because they are all more or less related to each other,
i'm sending one patch rather than a dozen, with a list stating
which chunk is fixing what. Sorry this got a bit long, but it's
not me who introduced all these issues... :-(

OK?
  Ingo

 * chunk 145 - int overflow
   happens when giving -l with arg > INT_MAX/2
   consequence: running with bogus max_bufd_lines
   fix that in chunk 129: report error and exit

 * chunk 168, subchunks 1 and 2 and chunk 199, subchunk 1 - int underflow
   happens when backing up INT_MAX lines beyond the top of the file
   consequence: silent output corruption, writing to wrong line
   fix that using addto_lineno(): report error and exit

 * chunk 168, subchunks 3 and 4 - int overflows
   happens when advancing beyond line INT_MAX
   consequence: silent output corruption, writing to wrong line
   fix that using addto_lineno(): report error and exit

 * chunk 199, subchunk 1, (this_line - adjust) - int underflow
   fix that using addto_lineno(): report error and exit

 * chunk 199, subchunk 2, nmove = cur_line - this_line - int overflow
   happens when cur_line is very positive and this_line very negative
     or vice versa
   consequence: silent output corruption, writing to wrong line
   fix that avoiding the subtraction and eliminating the variable

 * chunk 199, last subchunk, max_bufd_lines + BUFFER_MARGIN - int overflow
   happens when giving -l with arg > (INT_MAX - BUFFER_MARGIN) / 2
   fix that in chunk 129

 * chunk 199, last subchunk - excessive memory consumption
   happens when the input first backs up many lines beyond the
     top of the file such that extra_lines becomes large,
     then continues to high line numbers such that max_line
     becomes large: the memory needed for extra_lines is not
     released until the very end
   fix this by flushing extra_lines right before the first regular flush

 * chunk 283, subchunk 2, this_line-nflushd_lines + extra_lines - int overflow
   happens when extra_lines is close to INT_MAX
   consequence: the last part of the output is silently lost
   fix that by flushing extra_lines earlier: subchunk 1

 * chunk 283, subchunk 1, if (max_line == 0) - logic error
   happens for example if the file contains some printable
     characters, but no newline character
   consequence: input characters lost, no output
   fix that by removing the premature exit

 * the previous fix uncovers another issue:
   chunk 283, subchunk 4, else if (!nblank_lines) - logic error
   happens for example with input /dev/null
   consequence: a bogus blank line is printed
   fix that by removing the bogus blank line

 * the previous fix uncovers another issue:
   chunk 283, subchunk 3, nblank_lines = max_line - this_line; - logic error
   happens for example for input "\013x" (reverse line feed followed
     by some output without a trailing newline); in this case, the
     value on nblank_lines that was set in flush_lines() is lost
   consequence: output without trailing newline
   fix that by not overwriting nblank_lines with zero

 * the previous fix uncovers another issue:
   chunk 318 - logic error
   happens for example for input /dev/null: because at least one LINE
     is always allocated into *lines, nblank_lines is incremented
     even when there is no output whatsoever
   consequence: a bogus blank line is printed
   fix that by only requesting a new line when the current line
     contained some output or another line follows

Index: col.c
===================================================================
RCS file: /cvs/src/usr.bin/col/col.c,v
retrieving revision 1.14
diff -u -p -r1.14 col.c
--- col.c 17 Oct 2014 21:27:10 -0000 1.14
+++ col.c 18 Oct 2014 00:49:30 -0000
-78,6 +78,7 struct line_str {
         int l_needs_sort; /* set if chars went in out of order */
 };
 
+void addto_lineno(int *, int);
 LINE *alloc_line(void);
 void dowarn(int);
 void flush_line(LINE *);
-91,7 +92,7 CSET last_set; /* char_set of last char
 LINE *lines;
 int compress_spaces; /* if doing space -> tab conversion */
 int fine; /* if `fine' resolution (half lines) */
-int max_bufd_lines; /* max # lines to keep in memory */
+int max_bufd_lines; /* max # of half lines to keep in memory */
 int nblank_lines; /* # blanks after last flushed line */
 int no_backspaces; /* if not to output any backspaces */
 
-115,7 +116,7 main(int argc, char *argv[])
         int adjust, opt, warned;
         const char *errstr;
 
- max_bufd_lines = 128;
+ max_bufd_lines = 256;
         compress_spaces = 1; /* compress spaces into tabs */
         while ((opt = getopt(argc, argv, "bfhl:x")) != -1)
                 switch (opt) {
-129,7 +130,8 main(int argc, char *argv[])
                         compress_spaces = 1;
                         break;
                 case 'l': /* buffered line count */
- max_bufd_lines = strtonum(optarg, 1, INT_MAX, &errstr);
+ max_bufd_lines = strtonum(optarg, 1,
+ (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
                         if (errstr != NULL)
                                 errx(1, "bad -l argument, %s: %s", errstr,
                                         optarg);
-145,9 +147,6 main(int argc, char *argv[])
         if (optind != argc)
                 usage();
 
- /* this value is in half lines */
- max_bufd_lines *= 2;
-
         adjust = extra_lines = warned = 0;
         cur_col = 0;
         cur_line = max_line = nflushd_lines = this_line = 0;
-168,19 +167,19 main(int argc, char *argv[])
                         case ESC: /* just ignore EOF */
                                 switch(getchar()) {
                                 case RLF:
- cur_line -= 2;
+ addto_lineno(&cur_line, -2);
                                         break;
                                 case RHLF:
- cur_line--;
+ addto_lineno(&cur_line, -1);
                                         break;
                                 case FHLF:
- cur_line++;
+ addto_lineno(&cur_line, 1);
                                         if (cur_line > max_line)
                                                 max_line = cur_line;
                                 }
                                 continue;
                         case NL:
- cur_line += 2;
+ addto_lineno(&cur_line, 2);
                                 if (cur_line > max_line)
                                         max_line = cur_line;
                                 cur_col = 0;
-199,65 +198,68 main(int argc, char *argv[])
                                 ++cur_col;
                                 continue;
                         case VT:
- cur_line -= 2;
+ addto_lineno(&cur_line, -2);
                                 continue;
                         }
                         continue;
                 }
 
                 /* Must stuff ch in a line - are we at the right one? */
- if (cur_line != this_line - adjust) {
+ if (cur_line + adjust != this_line) {
                         LINE *lnew;
- int nmove;
 
- adjust = 0;
- nmove = cur_line - this_line;
- if (!fine) {
- /* round up to next line */
- if (cur_line & 1) {
- adjust = 1;
- nmove++;
- }
- }
- if (nmove < 0) {
- for (; nmove < 0 && l->l_prev; nmove++)
+ /* round up to next line */
+ adjust = !fine && (cur_line & 1);
+
+ if (cur_line + adjust < this_line) {
+ while (cur_line + adjust < this_line &&
+ l->l_prev != NULL) {
                                         l = l->l_prev;
- if (nmove) {
+ this_line--;
+ }
+ if (cur_line + adjust < this_line) {
                                         if (nflushd_lines == 0) {
                                                 /*
                                                  * Allow backup past first
                                                  * line if nothing has been
                                                  * flushed yet.
                                                  */
- for (; nmove < 0; nmove++) {
+ while (cur_line + adjust
+ < this_line) {
                                                         lnew = alloc_line();
                                                         l->l_prev = lnew;
                                                         lnew->l_next = l;
                                                         l = lines = lnew;
                                                         extra_lines++;
+ this_line--;
                                                 }
                                         } else {
                                                 if (!warned++)
                                                         dowarn(cur_line);
- cur_line -= nmove;
+ cur_line = this_line - adjust;
                                         }
                                 }
                         } else {
                                 /* may need to allocate here */
- for (; nmove > 0 && l->l_next; nmove--)
+ while (cur_line + adjust > this_line) {
+ if (l->l_next == NULL) {
+ l->l_next = alloc_line();
+ l->l_next->l_prev = l;
+ }
                                         l = l->l_next;
- for (; nmove > 0; nmove--) {
- lnew = alloc_line();
- lnew->l_prev = l;
- l->l_next = lnew;
- l = lnew;
+ this_line++;
                                 }
                         }
- this_line = cur_line + adjust;
- nmove = this_line - nflushd_lines;
- if (nmove >= max_bufd_lines + BUFFER_MARGIN) {
- nflushd_lines += nmove - max_bufd_lines;
- flush_lines(nmove - max_bufd_lines);
+ if (this_line > nflushd_lines &&
+ this_line - nflushd_lines >=
+ max_bufd_lines + BUFFER_MARGIN) {
+ if (extra_lines) {
+ flush_lines(extra_lines);
+ extra_lines = 0;
+ }
+ flush_lines(this_line - nflushd_lines -
+ max_bufd_lines);
+ nflushd_lines = this_line - max_bufd_lines;
                         }
                 }
                 /* grow line's buffer? */
-283,25 +285,23 main(int argc, char *argv[])
                         l->l_max_col = cur_col;
                 cur_col++;
         }
- if (max_line == 0)
- exit(0); /* no lines, so just exit */
+ if (extra_lines)
+ flush_lines(extra_lines);
 
         /* goto the last line that had a character on it */
         for (; l->l_next; l = l->l_next)
                 this_line++;
- flush_lines(this_line - nflushd_lines + extra_lines + 1);
+ flush_lines(this_line - nflushd_lines + 1);
 
         /* make sure we leave things in a sane state */
         if (last_set != CS_NORMAL)
                 PUTC('\017');
 
         /* flush out the last few blank lines */
- nblank_lines = max_line - this_line;
+ if (max_line > this_line)
+ nblank_lines = max_line - this_line;
         if (max_line & 1)
                 nblank_lines++;
- else if (!nblank_lines)
- /* missing a \n on the last line? */
- nblank_lines = 2;
         flush_blanks();
         exit(0);
 }
-318,7 +318,8 flush_lines(int nflush)
                         flush_blanks();
                         flush_line(l);
                 }
- nblank_lines++;
+ if (l->l_line || l->l_next)
+ nblank_lines++;
                 if (l->l_line)
                         (void)free((void *)l->l_line);
                 free_line(l);
-455,6 +456,23 flush_line(LINE *l)
                         PUTC('\b');
                 }
         }
+}
+
+/*
+ * Increment or decrement a line number, checking for overflow.
+ * Stop one below INT_MAX such that the adjust variable is safe.
+ */
+void
+addto_lineno(int *lno, int offset)
+{
+ if (offset > 0) {
+ if (*lno >= INT_MAX - offset)
+ errx(1, "too many lines");
+ } else {
+ if (*lno < INT_MIN - offset)
+ errx(1, "too many reverse line feeds");
+ }
+ *lno += offset;
 }
 
 #define NALLOC 64