OSEC

Neohapsis is currently accepting applications for employment. For more information, please visit our website www.neohapsis.com or email hr@neohapsis.com
PATCH: poor SMTP/Milter performance over loopback (127.0.0.1 etc)

From: Wietse Venema (wietseporcupine.org)
Date: Sun Jul 29 2007 - 13:59:23 CDT


Mark Martinec discovered that Postfix can have poor network performance
when sending mail via 127.0.0.1 (or any interface with an MTU larger
than 4 kbyte or so). This can happen with Milters (e.g. for DKIM
signing) and with SMTP-based content filters.

Below is a proposed patch that fixes the problem for Postfix 2.4
and Postfix 2.5. When the TCP maximal segment size (MSS) is larger
than the VSTREAM buffer size, the patch increases the VSTREAM buffer
size to avoid Nagle delays.

The patch does not change Internet delivery performance, because
turning off Nagle delays there would actually hurt.

The fix is most easily tested with the smtp-source and smtp-sink
utilities.

Start the smtp sink program in one window:

$ ./smtp-sink -4 :9999 100

In a different window, run the following commands and see if the
performance of connections to 127.0.0.1 is worse than connections
to the machine's network address (192.168.0.2 or whatever it is).

$ /usr/bin/time ./smtp-source -d -l 1000000 -m 100 127.0.0.1:9999
$ /usr/bin/time ./smtp-source -d -l 1000000 -m 100 127.0.0.1:9999
$ /usr/bin/time ./smtp-source -d -l 1000000 -m 100 127.0.0.1:9999

$ /usr/bin/time ./smtp-source -d -l 1000000 -m 100 192.168.0.2:9999
$ /usr/bin/time ./smtp-source -d -l 1000000 -m 100 192.168.0.2:9999
$ /usr/bin/time ./smtp-source -d -l 1000000 -m 100 192.168.0.2:9999

With FreeBSD 6.2 uniprocessor, the loopback connections now finish
in 72% of the time that is needed for the connections to the machine's
own network address. Without the fix, the loopback finish time is
highly variable, and may increase up to 2-5 times.

With FreeBSD 6.2 multiprocessor, the fix makes no difference and
loopback connections finish in 85% of the time that is needed for
connections to the machine's own network address.

        Wietse

diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/milter/milter8.c ./src/milter/milter8.c
*** /var/tmp/postfix-2.5-20070724/src/milter/milter8.c Sun Jul 22 10:06:16 2007
--- ./src/milter/milter8.c Sun Jul 29 12:08:36 2007
***************
*** 1591,1596 ****
--- 1591,1599 ----
                      VSTREAM_CTL_DOUBLE,
                      VSTREAM_CTL_TIMEOUT, milter->cmd_timeout,
                      VSTREAM_CTL_END);
+ /* Avoid poor performance when TCP MSS > VSTREAM_BUFSIZE. */
+ if (connect_fn == inet_connect)
+ vstream_tweak_tcp(milter->fp);
  
      /*
       * Open the negotiations by sending what actions the Milter may request
diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/smtp/smtp_connect.c ./src/smtp/smtp_connect.c
*** /var/tmp/postfix-2.5-20070724/src/smtp/smtp_connect.c Sun Dec 3 14:58:09 2006
--- ./src/smtp/smtp_connect.c Sun Jul 29 13:38:45 2007
***************
*** 304,309 ****
--- 304,315 ----
      stream = vstream_fdopen(sock, O_RDWR);
  
      /*
+ * Avoid poor performance when TCP MSS > VSTREAM_BUFSIZE.
+ */
+ if (sa->sa_family == AF_INET || sa->sa_family == AF_INET6)
+ vstream_tweak_tcp(stream);
+
+ /*
       * Bundle up what we have into a nice SMTP_SESSION object.
       */
      return (smtp_session_alloc(stream, destination, name, addr,
diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/smtpstone/qmqp-source.c ./src/smtpstone/qmqp-source.c
*** /var/tmp/postfix-2.5-20070724/src/smtpstone/qmqp-source.c Sat Mar 17 13:59:38 2007
--- ./src/smtpstone/qmqp-source.c Sun Jul 29 12:09:01 2007
***************
*** 356,361 ****
--- 356,364 ----
          dequeue_connect(session);
          non_blocking(fd, BLOCKING);
          event_disable_readwrite(fd);
+ /* Avoid poor performance when TCP MSS > VSTREAM_BUFSIZE. */
+ if (sa->sa_family == AF_INET || sa->sa_family == AF_INET6)
+ vstream_tweak_tcp(session->stream);
          send_data(session);
      }
  }
diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/smtpstone/smtp-source.c ./src/smtpstone/smtp-source.c
*** /var/tmp/postfix-2.5-20070724/src/smtpstone/smtp-source.c Thu Jul 19 18:05:16 2007
--- ./src/smtpstone/smtp-source.c Sun Jul 29 12:09:05 2007
***************
*** 483,488 ****
--- 483,491 ----
          event_disable_readwrite(fd);
          event_enable_read(fd, read_banner, (char *) session);
          dequeue_connect(session);
+ /* Avoid poor performance when TCP MSS > VSTREAM_BUFSIZE. */
+ if (sa->sa_family == AF_INET || sa->sa_family == AF_INET6)
+ vstream_tweak_tcp(session->stream);
      }
  }
  
diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/util/Makefile.in ./src/util/Makefile.in
*** /var/tmp/postfix-2.5-20070724/src/util/Makefile.in Sat Mar 17 13:51:33 2007
--- ./src/util/Makefile.in Sun Jul 29 12:02:35 2007
***************
*** 30,36 ****
          username.c valid_hostname.c vbuf.c vbuf_print.c vstream.c \
          vstream_popen.c vstring.c vstring_vstream.c watchdog.c writable.c \
          write_buf.c write_wait.c sane_basename.c format_tv.c allspace.c \
! allascii.c load_file.c killme_after.c
  OBJS = alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \
          attr_print64.o attr_print_plain.o attr_scan0.o attr_scan64.o \
          attr_scan_plain.o auto_clnt.o base64_code.o basename.o binhash.o \
--- 30,36 ----
          username.c valid_hostname.c vbuf.c vbuf_print.c vstream.c \
          vstream_popen.c vstring.c vstring_vstream.c watchdog.c writable.c \
          write_buf.c write_wait.c sane_basename.c format_tv.c allspace.c \
! allascii.c load_file.c killme_after.c vstream_tweak.c
  OBJS = alldig.o allprint.o argv.o argv_split.o attr_clnt.o attr_print0.o \
          attr_print64.o attr_print_plain.o attr_scan0.o attr_scan64.o \
          attr_scan_plain.o auto_clnt.o base64_code.o basename.o binhash.o \
***************
*** 62,68 ****
          username.o valid_hostname.o vbuf.o vbuf_print.o vstream.o \
          vstream_popen.o vstring.o vstring_vstream.o watchdog.o writable.o \
          write_buf.o write_wait.o sane_basename.o format_tv.o allspace.o \
! allascii.o load_file.o killme_after.o
  HDRS = argv.h attr.h attr_clnt.h auto_clnt.h base64_code.h binhash.h \
          chroot_uid.h cidr_match.h clean_env.h connect.h ctable.h dict.h \
          dict_cdb.h dict_cidr.h dict_db.h dict_dbm.h dict_env.h dict_ht.h \
--- 62,68 ----
          username.o valid_hostname.o vbuf.o vbuf_print.o vstream.o \
          vstream_popen.o vstring.o vstring_vstream.o watchdog.o writable.o \
          write_buf.o write_wait.o sane_basename.o format_tv.o allspace.o \
! allascii.o load_file.o killme_after.o vstream_tweak.o
  HDRS = argv.h attr.h attr_clnt.h auto_clnt.h base64_code.h binhash.h \
          chroot_uid.h cidr_match.h clean_env.h connect.h ctable.h dict.h \
          dict_cdb.h dict_cidr.h dict_db.h dict_dbm.h dict_env.h dict_ht.h \
***************
*** 1600,1605 ****
--- 1600,1610 ----
  vstream_popen.o: vbuf.h
  vstream_popen.o: vstream.h
  vstream_popen.o: vstream_popen.c
+ vstream_tweak.o: msg.h
+ vstream_tweak.o: sys_defs.h
+ vstream_tweak.o: vbuf.h
+ vstream_tweak.o: vstream.h
+ vstream_tweak.o: vstream_tweak.c
  vstring.o: msg.h
  vstring.o: mymalloc.h
  vstring.o: sys_defs.h
diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/util/vstream.c ./src/util/vstream.c
*** /var/tmp/postfix-2.5-20070724/src/util/vstream.c Fri Feb 16 16:03:16 2007
--- ./src/util/vstream.c Sun Jul 29 13:53:09 2007
***************
*** 277,282 ****
--- 277,292 ----
  /* Enable exception handling with vstream_setjmp() and vstream_longjmp().
  /* This involves allocation of additional memory that normally isn't
  /* used.
+ /* .IP "VSTREAM_CTL_BUFSIZE (ssize_t)"
+ /* Specify a non-default write buffer size, or zero to implement
+ /* a no-op. Requests to shrink an existing buffer size are
+ /* ignored. Requests to change a fixed-size buffer (stdin,
+ /* stdout, stderr) are not allowed.
+ /*
+ /* NOTE: the VSTREAM_CTL_BUFSIZE argument type is ssize_t, not
+ /* int. Use an explicit cast to avoid problems on LP64
+ /* environments and other environments where ssize_t is larger
+ /* than int.
  /* .PP
  /* vstream_fileno() gives access to the file handle associated with
  /* a buffered stream. With streams that have separate read/write
***************
*** 384,389 ****
--- 394,406 ----
    * Initialization of the three pre-defined streams. Pre-allocate a static
    * I/O buffer for the standard error stream, so that the error handler can
    * produce a diagnostic even when memory allocation fails.
+ *
+ * XXX We don't (yet) statically initialize the req_bufsize field: it is the
+ * last VSTREAM member so we don't break Postfix 2.4 binary compatibility,
+ * and Wietse doesn't know how to specify an initializer for the jmp_buf
+ * VSTREAM member (which can be a struct or an array) without collateral
+ * damage to the source code. We can fix the initialization later in the
+ * Postfix 2.5 development cycle.
    */
  static unsigned char vstream_fstd_buf[VSTREAM_BUFSIZE];
  
***************
*** 762,774 ****
  
      /*
       * Remember the direction. If this is the first PUT operation for this
! * stream, allocate a new buffer; obviously there is no data to be
! * flushed yet. Otherwise, flush the buffer if it is full.
! */
! if (bp->data == 0) {
! vstream_buf_alloc(bp, VSTREAM_BUFSIZE);
! if (bp->flags & VSTREAM_FLAG_DOUBLE)
! VSTREAM_SAVE_STATE(stream, write_buf, write_fd);
      } else if (bp->cnt <= 0) {
          if (VSTREAM_FFLUSH_SOME(stream))
              return (VSTREAM_EOF);
--- 779,792 ----
  
      /*
       * Remember the direction. If this is the first PUT operation for this
! * stream or if the buffer is smaller than the requested size, allocate a
! * new buffer; obviously there is no data to be flushed yet. Otherwise,
! * flush the buffer.
! */
! if (stream->req_bufsize == 0)
! stream->req_bufsize = VSTREAM_BUFSIZE; /* Postfix 2.4 binary compat. */
! if (bp->len < stream->req_bufsize) {
! vstream_buf_alloc(bp, stream->req_bufsize);
      } else if (bp->cnt <= 0) {
          if (VSTREAM_FFLUSH_SOME(stream))
              return (VSTREAM_EOF);
***************
*** 822,833 ****
  #define VSTREAM_ROUNDUP(count, base) VSTREAM_TRUNCATE(count + base - 1, base)
  
      if (want > bp->cnt) {
! if ((used = bp->len - bp->cnt) > VSTREAM_BUFSIZE)
! if (vstream_fflush_some(stream, VSTREAM_TRUNCATE(used, VSTREAM_BUFSIZE)))
                  return (VSTREAM_EOF);
          if ((shortage = (want - bp->cnt)) > 0) {
! incr = VSTREAM_ROUNDUP(shortage, VSTREAM_BUFSIZE);
! vstream_buf_alloc(bp, bp->len + incr);
          }
      }
      return (vstream_ferror(stream) ? VSTREAM_EOF : 0); /* mmap() may fail */
--- 840,857 ----
  #define VSTREAM_ROUNDUP(count, base) VSTREAM_TRUNCATE(count + base - 1, base)
  
      if (want > bp->cnt) {
! if (stream->req_bufsize == 0)
! stream->req_bufsize = VSTREAM_BUFSIZE; /* 2.4 binary compat. */
! if ((used = bp->len - bp->cnt) > stream->req_bufsize)
! if (vstream_fflush_some(stream, VSTREAM_TRUNCATE(used, stream->req_bufsize)))
                  return (VSTREAM_EOF);
          if ((shortage = (want - bp->cnt)) > 0) {
! if (shortage > __MAXINT__(ssize_t) -bp->len - stream->req_bufsize) {
! bp->flags |= VSTREAM_FLAG_ERR;
! } else {
! incr = VSTREAM_ROUNDUP(shortage, stream->req_bufsize);
! vstream_buf_alloc(bp, bp->len + incr);
! }
          }
      }
      return (vstream_ferror(stream) ? VSTREAM_EOF : 0); /* mmap() may fail */
***************
*** 1027,1032 ****
--- 1051,1057 ----
      stream->context = 0;
      stream->jbuf = 0;
      stream->iotime.tv_sec = stream->iotime.tv_usec = 0;
+ stream->req_bufsize = VSTREAM_BUFSIZE;
      return (stream);
  }
  
***************
*** 1169,1174 ****
--- 1194,1200 ----
      va_list ap;
      int floor;
      int old_fd;
+ ssize_t req_bufsize = 0;
  
      for (va_start(ap, name); name != VSTREAM_CTL_END; name = va_arg(ap, int)) {
          switch (name) {
***************
*** 1248,1253 ****
--- 1274,1291 ----
              }
              break;
  #endif
+
+ /*
+ * Postpone memory (re)allocation until the space is needed.
+ */
+ case VSTREAM_CTL_BUFSIZE:
+ req_bufsize = va_arg(ap, ssize_t);
+ if (req_bufsize < 0)
+ msg_panic("VSTREAM_CTL_BUFSIZE with negative size: %ld",
+ (long) req_bufsize);
+ if (req_bufsize > stream->req_bufsize)
+ stream->req_bufsize = req_bufsize;
+ break;
          default:
              msg_panic("%s: bad name %d", myname, name);
          }
diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/util/vstream.h ./src/util/vstream.h
*** /var/tmp/postfix-2.5-20070724/src/util/vstream.h Wed Feb 14 18:46:29 2007
--- ./src/util/vstream.h Sun Jul 29 13:32:24 2007
***************
*** 50,55 ****
--- 50,57 ----
      int timeout; /* read/write timout */
      jmp_buf *jbuf; /* exception handling */
      struct timeval iotime; /* time of last fill/flush */
+ /* At bottom for Postfix 2.4 binary compatibility. */
+ ssize_t req_bufsize; /* write buffer size */
  } VSTREAM;
  
  extern VSTREAM vstream_fstd[]; /* pre-defined streams */
***************
*** 123,128 ****
--- 125,131 ----
  #ifdef F_DUPFD
  #define VSTREAM_CTL_DUPFD 11
  #endif
+ #define VSTREAM_CTL_BUFSIZE 12
  
  extern VSTREAM *PRINTFLIKE(1, 2) vstream_printf(const char *,...);
  extern VSTREAM *PRINTFLIKE(2, 3) vstream_fprintf(VSTREAM *, const char *,...);
***************
*** 152,157 ****
--- 155,165 ----
    */
  #define vstream_setjmp(stream) setjmp((stream)->jbuf[0])
  #define vstream_longjmp(stream, val) longjmp((stream)->jbuf[0], (val))
+
+ /*
+ * Tweaks and workarounds.
+ */
+ extern int vstream_tweak_tcp(VSTREAM *);
  
  /* LICENSE
  /* .ad
diff -cr --exclude=man --exclude=html --exclude=README_FILES --exclude=.indent.pro --new-file /var/tmp/postfix-2.5-20070724/src/util/vstream_tweak.c ./src/util/vstream_tweak.c
*** /var/tmp/postfix-2.5-20070724/src/util/vstream_tweak.c Wed Dec 31 19:00:00 1969
--- ./src/util/vstream_tweak.c Sun Jul 29 12:09:26 2007
***************
*** 0 ****
--- 1,70 ----
+ /*++
+ /* NAME
+ /* vstream_tweak 3
+ /* SUMMARY
+ /* performance tweaks
+ /* SYNOPSIS
+ /* #include <vstream.h>
+ /*
+ /* VSTREAM *vstream_tweak_tcp(stream)
+ /* VSTREAM *stream;
+ /* DESCRIPTION
+ /* vstream_tweak_tcp() does a best effort to boost your
+ /* Internet performance on the specified stream.
+ /*
+ /* Arguments:
+ /* .IP stream
+ /* The stream being boosted.
+ /* DIAGNOSTICS
+ /* Panics: interface violations.
+ /* LICENSE
+ /* .ad
+ /* .fi
+ /* The Secure Mailer license must be distributed with this software.
+ /* AUTHOR(S)
+ /* Wietse Venema
+ /* IBM T.J. Watson Research
+ /* P.O. Box 704
+ /* Yorktown Heights, NY 10598, USA
+ /*--*/
+
+ /* System library. */
+
+ #include <sys_defs.h>
+ #include <sys/socket.h>
+ #include <netinet/in.h>
+ #include <netinet/tcp.h>
+
+ /* Utility library. */
+
+ #include <msg.h>
+ #include <vstream.h>
+
+ /* Application-specific. */
+
+ /* vstream_tweak_tcp - boost your Internet performance */
+
+ int vstream_tweak_tcp(VSTREAM *fp)
+ {
+ const char *myname = "vstream_tweak_tcp";
+ int mss;
+ SOCKOPT_SIZE mss_len = sizeof(mss);
+ int err = 0;
+
+ /*
+ * Avoid Nagle delays when VSTREAM buffers are smaller than the MSS.
+ *
+ * Forcing TCP_NODELAY to be "always on" would hurt performance in the
+ * common case where VSTREAM buffers are larger than the MSS.
+ */
+ if ((err = getsockopt(vstream_fileno(fp), IPPROTO_TCP, TCP_MAXSEG,
+ &mss, &mss_len)) == 0) {
+ if (msg_verbose)
+ msg_info("%s: TCP_MAXSEG %d", myname, mss);
+ if (mss > 0)
+ vstream_control(fp,
+ VSTREAM_CTL_BUFSIZE, (ssize_t) mss,
+ VSTREAM_CTL_END);
+ }
+ return (err);
+ }