All my patches to parse.c

From: Jose Kahan <Jose.Kahan_at_inrialpes.fr_at_hypermail-project.org>
Date: Fri, 6 Aug 1999 22:00:02 +0200 (MET DST)
Message-ID: <Pine.SOL.3.94.990806215829.1807A-200000_at_tuvalu.inrialpes.fr>


Here they are (if you're interested)B!

Note that I haven't yet tested them against regenerating an mbox. For the moment, my tests are done on a one message at the time context, using an online mailing list.

-Jose

-
#include "hypermail.h"
#include <ctype.h>
#include <errno.h>

--- 25,30 ----


-

  /*
!  * Prefix to prepend to all saved attachments' filenames
   */

! #define FILE_PREFIXER "att-"   

  /*

  /*
!  * Suffix to prepend to all saved attachments' filenames when the
!  * headers don't propose a filename
   */

! #define FILE_SUFFIXER "part"   
  /*
   * Prefix to prepend to all saved attachments' directory names (before
***************

*** 47,52 ****
--- 46,52 ----

   */
#define DIR_PREFIXER "att-"
  

+

  /* 
   * Used to replace invalid characters in supplied attachment filenames
   */
***************
*** 93,113 ****
     return (inlist(set_inline_types, type));
  }   

! int preferedcontent(char *type)
  {

!   /* We let plain text remain PREFERED at all times */
!   if(!strcasecmp("text/plain", type))
!     return 1;
  
!   /* 
!   ** Parsing for embedded html needs more work before we 
!   ** can actually do this... 
!   */
!   if(set_prefered_types) {
!     return (inlist(set_prefered_types, type));

    }   

! return 0;
  }   

  int textcontent(char *type)
--- 93,133 ----

     return (inlist(set_inline_types, type));   }   

! int preferedcontent(int *current_weight, char *type)   {

!   int weight;
!   int status;
! 
!   status = 0;
  
!   /* We let plain text remain PREFERED at all times */
!   if(!strcasecmp("text/plain", type)) {
!     if (*current_weight != 0) {
!       /* to avoid having two text/plain alternatives */
!       *current_weight = 0;
!       status = 1;
!     }
!   }
!   /* find the weight of the type arg. If the weight is
!      inferior to the current_weight, we make it the
!      prefered content */  
!   else if (set_prefered_types) {
!     weight = inlist_pos (set_prefered_types, type);
!     if (weight != -1) {
!       /* +1 so that weight 0 is reserved for text/plain */
!       weight++;
!       if (*current_weight == -1) {
! 	*current_weight = weight;
! 	status = 1;
!       }
!       else if (*current_weight > weight) {
! 	*current_weight = weight;
! 	status = 1;
!       }
!     }

    }   

! return status;
  }   

  int textcontent(char *type)


! void emptydir(char *firstdir, char *directory)   {
    struct stat fileinfo;   

! char *realdir = maprintf("%s%c%s", firstdir, PATH_SEPARATOR, directory);     

    if(!lstat(realdir, &fileinfo)) {
      if(S_ISDIR(fileinfo.st_mode)) {
--- 897,907 ----
  }      

! void emptydir(char *directory)
  {
    struct stat fileinfo;   

! char *realdir = directory;     

    if(!lstat(realdir, &fileinfo)) {
      if(S_ISDIR(fileinfo.st_mode)) {


  /*
--- 924,929 ----


    /* -- variables for the multipart/alternative parser -- */

    struct body *origbp=NULL;     /* store the original bp */
    struct body *origlp=NULL;     /* ... and the original lp */
    char alternativeparser=FALSE; /* set when inside alternative parser mode */
    /* -- end of alternative parser variables -- */   

    struct body *bp;
--- 951,980 ----

    char *emailp =NULL;
    char line[MAXLINE],
      fromdate[DATESTRLEN]="";
! char *cp;
! char *dp = NULL;

    int num, isinheader, hassubject, hasdate;     struct emailinfo *emp;

!   char *att_dir=NULL; /* directory name to store attachments in */
! 
!   typedef enum {
!     NO_FILE,
!     MAKE_FILE,
!     MADE_FILE
!   } FileStatus; /* for attachments */
  
    /* -- variables for the multipart/alternative parser -- */
    struct body *origbp=NULL;     /* store the original bp */
    struct body *origlp=NULL;     /* ... and the original lp */
    char alternativeparser=FALSE; /* set when inside alternative parser mode */
+   int alternative_weight=-1;     /* the current weight of the prefered alternative content */
+   struct body *alternative_lp;  /* the previous alternative lp */
+   struct body *alternative_bp;  /* the previous alternative bp */
+   FileStatus alternative_lastfile_created = NO_FILE; /* previous alternative attachments, for non-inline MIME types */
+   char alternative_file[129];  /* file name where we store the non-inline alternatives */
+   char alternative_lastfile[129];  /* last file name where we store the non-inline alternatives */
+ int att_counter = 0; /* used to generate a unique name for attachments */ +

    /* -- end of alternative parser variables -- */   

    struct body *bp;


    char *binname=NULL; /* file name to store binary attachments in */     int binfile=-1;
-

    char *charset=NULL; /* this is the LOCAL charset used in the mail */   

    char *boundary;
    char type[129]; /* for Content-Type type */     char charbuffer[129]; /* for Content-Type charset */

!   enum {
!     NO_FILE,
!     MAKE_FILE,
!     MADE_FILE
!   } file_created = NO_FILE; /* for attachments */
  

    char attachname[129]; /* for attachment file names */   

    EncodeType decode=ENCODE_NORMAL;
    ContentType content=CONTENT_TEXT;
--- 1002,1018 ----   

    char *binname=NULL; /* file name to store binary attachments in */     int binfile=-1;
    char *charset=NULL; /* this is the LOCAL charset used in the mail */   

    char *boundary;
    char type[129]; /* for Content-Type type */     char charbuffer[129]; /* for Content-Type charset */ ! FileStatus file_created = NO_FILE; /* for attachments */   

    char attachname[129]; /* for attachment file names */

+   char inline_force = FALSE; /* show a attachment in-line, regardles of
+ 				the content_disposition */
+   char *description = NULL; /* user-supplied description for an attachment */
  

    EncodeType decode=ENCODE_NORMAL;
    ContentType content=CONTENT_TEXT;


          /*
--- 1087,1097 ----

          /*


            else if (!strncasecmp( head->line, "Content-Type:", 13)) {
              char *ptr=head->line+13;
--- 1178,1205 ----
                  safe_filename(attachname);
                }
                else {
!                 attachname[0]='\0'; /* just clear it */
                }
                file_created = MAKE_FILE; /* please make one */
              }
! 	    else if (!strncasecmp(ptr, "inline;", 7)
! 		     && (content != CONTENT_IGNORE)
! 		     && (!strncasecmp (type, "image/", 5))) {
! 	      inline_force = TRUE;
!               /* make sure it is binary */
!               content = CONTENT_BINARY;
! 	      /* see if there's a file name to use: */
! 	      fname = strstr(ptr, "filename=");
! 	      if (fname) {
! 		sscanf(fname+10, "%128[^\"]", attachname);
! 		safe_filename(attachname);
! 	      }
! 	      else {
! 		attachname[0]='\0'; /* just clear it */
! 	      }
! 	      file_created = MAKE_FILE; /* please make one */
! 	    }
! 	  }
  
            else if (!strncasecmp( head->line, "Content-Type:", 13)) {
              char *ptr=head->line+13;

!             if(alternativeparser) {
!               /* We are parsing alternatives... */
  
!               if(preferedcontent(type) ) {
!                 /* ... this is a prefered type, we want to store
                     this [instead of the earlier one]. */

#if 0
! struct body *next; ! printf("%s is more fun than the previous one\n", ! type); ! #endif ! #if 0 ! /* ! ** Not sure why this free section is here. ! ** It is causing purify to barf with massive numbers of ! ** "FMR: Free memory reads". When I commented it out it ! ** cleared up the problem with no associated memory leaked ! ** or difference in output. It's history for now. ! */ ! while(bp) { ! next=bp->next; ! if (bp->line) free(bp->line); ! if (bp) free(bp); ! bp=next; ! }
#endif
! headp = NULL; } else { /* ...and this type is not a prefered one. Thus, we * shall ignore it completely! */ content = CONTENT_IGNORE; } ! } ! if(ignorecontent(type)) { ! /* don't save this */ ! content = CONTENT_IGNORE; ! } ! else if (textcontent(type) || ! (inlinehtml && !strcasecmp(type, "text/html"))) { ! /* text content or text/html follows. ! */ ! ! if (!strcasecmp(type, "text/html")) ! content = CONTENT_HTML; ! else ! content = CONTENT_TEXT; ! continue; ! } else if (!strncasecmp(type, "message/rfc822", 14)) { /* ** Here comes an attached mail! This can be ugly, --- 1232,1305 ---- charset = strsav(charbuffer); } ! if (alternativeparser) { ! struct body *next, *temp_bp; ! ! /* We are parsing alternatives... */ ! if(preferedcontent(&alternative_weight, type) ) { ! /* ... this is a prefered type, we want to store this [instead of the earlier one]. */
#if 0
! printf("%s is more fun than the previous one\n", ! type);
#endif
! /* erase the previous alternative info */ ! temp_bp = alternative_bp; /* remember the value of bp for GC */ ! alternative_bp = alternative_lp = NULL; ! alternative_lastfile_created = NO_FILE; ! content = CONTENT_UNKNOWN; ! if (alternative_lastfile[0] != '\0') { ! /* remove the previous attachment */ ! unlink (alternative_lastfile); ! alternative_lastfile[0] = '\0'; ! } } else { /* ...and this type is not a prefered one. Thus, we * shall ignore it completely! */ content = CONTENT_IGNORE; + /* erase the current alternative info */ + temp_bp = bp; /* remember the value of bp for GC */ + lp = alternative_lp; + bp = alternative_bp; + strcpy (alternative_file, alternative_lastfile); + file_created = alternative_lastfile_created; + alternative_bp = alternative_lp = NULL; + alternative_lastfile_created = NO_FILE; + alternative_lastfile[0] = '\0'; + /* we haven't yet created any attachment file, so there's no need + to erase it yet */ } ! /* free any previous alternative */ ! while (temp_bp) { ! next = temp_bp->next; ! if (temp_bp->line) free (temp_bp->line); ! free (temp_bp); ! temp_bp = next; ! } ! /* _at_@ not sure if I should add a diff flag to do this break */ ! if (content == CONTENT_IGNORE) ! /* end the header parsing... we already know what we want */ ! break; ! } ! ! if (content == CONTENT_IGNORE) ! continue; ! else if (ignorecontent(type)) ! /* don't save this */ ! content = CONTENT_IGNORE; ! else if (textcontent(type) ! || (inlinehtml && !strcasecmp(type, "text/html"))) { ! /* text content or text/html follows and we should display it ! */ ! ! if (!strcasecmp(type, "text/html")) ! content = CONTENT_HTML; ! else ! content = CONTENT_TEXT; ! continue; ! } else if (!strncasecmp(type, "message/rfc822", 14)) { /* ** Here comes an attached mail! This can be ugly,

                  file_created = MAKE_FILE; /* please make one */
--- 1340,1346 ----
                    safe_filename(attachname);
                  }
                  else {
!                   attachname[0]='\0'; /* just clear it */
                  }
  
                  file_created = MAKE_FILE; /* please make one */

            alternativeparser = TRUE;
-

            /* restart on a new list: */
            lp=bp=NULL;
          }
          headp = lp; /* start at this point next time */
        }
        else {
!         bp = addbody(bp, &lp, line, BODY_HEADER|bodyflags);
        }
      }
      else {
--- 1492,1509 ----
            origlp=lp;
  
            alternativeparser = TRUE;
            /* restart on a new list: */
            lp=bp=NULL;
+ 	  /* clean the alternative status variables */
+ 	  alternative_weight = -1;
+ 	  alternative_lp = alternative_bp = NULL;
+ 	  alternative_lastfile_created = NO_FILE;
+ 	  alternative_file[0] = alternative_lastfile[0] = '\0';
          }
          headp = lp; /* start at this point next time */
        }
        else {
! 	bp = addbody(bp, &lp, line, BODY_HEADER|bodyflags);
        }
      }
      else {

            num++;

- 
-           /* create a directory name to store all attachments in */
-           if(newdir)
-             free(newdir);
-           newdir = maprintf("%s%04d", DIR_PREFIXER, num);
- 
-           /* If this is a repeated run on the same archive we already have
-            * HTML'ized, we risk extracting the same attachments several times
-            * and therefore we need to remove all the attachments currently
-            * present before we go ahead!  (Daniel -- August 6, 1999) */
-           
-           emptydir(dir, newdir);
          }
  
          if(hasdate)

--- 1559,1564 ----
          /* by default we have none! */
          hassubject = 0;

                isinheader = 0; /* no header, the ending boundary
                                   can't have any describing
--- 1628,1642 ----
                !strncmp(line, "--", 2) &&
                !strncmp(line+2, boundp->line, strlen(boundp->line))) {
              /* right at this point, we have another part coming up */

#if 0
printf("hit %s\n", line);
#endif
if (!strncmp(line+2+strlen(boundp->line), "--", 2)) { + /* _at_@@ don't know why we had this line here. Doesn't hurt to take + it out, though */ + #if 0 bp = addbody(bp,&lp,"\n",BODY_HTMLIZED|bodyflags);
! #endif   
                isinheader = 0; /* no header, the ending boundary
                                   can't have any describing

                file_created = MADE_FILE; /* we have, or at least we tried */
  
!               /* If we found a name then lets check it out to see if we can use
!                * it.  */
!               
!               if (fname[0]) {
!                 /* Check if we can use this attached name * for storing: */
!                 
!                 if(strlen(fname) +
!                    strlen(dir) +
!                    strlen(newdir) +
!                    strlen(FILE_PREFIXER) <= 500) {
!                   /* yes, we have room in our array */
!                   struct stat fileinfo;
!                   char alter[2]="";
!                   int counter='a';
! 
!                   mkdir(newdir, set_dirmode);
! 
!                   fprintf(stderr, "\nCreated dir %s\n", newdir);
! 
!                   nameisuniq=TRUE;
!                   do {
!                     sprintf(checkpath, "%s%c%s%c" FILE_PREFIXER "%s%s",
!                             dir, PATH_SEPARATOR,
!                             newdir, PATH_SEPARATOR,
!                             alter, fname);
!                     /* loop while the file exist and try a few
!                        alternative file names before giving up */
!                     if(counter>='z') {
!                       nameisuniq=FALSE; /* we didn't find a unique file name */
!                       break;
!                     }
!                     sprintf(alter, "%c", counter++);
!                   } while(0 == lstat(checkpath, &fileinfo));
!                 }
!               }
  
                /* Saving of the attachments is being done inline as they are
                 * encountered. The directories must exist first...  */
!   
!               if(!nameisuniq) {
!                 /* first make the path to create this in */
!                 sprintf(checkpath, "%s%c%s",
!                         dir, PATH_SEPARATOR,
!                         newdir);
!                 /* get a random name */
!                 binname = tmpname(checkpath, FILE_PREFIXER "bin");
!               }             
!               else
!                 binname = checkpath;
! 
                if (binname) {
!                 binfile=open(binname, O_WRONLY|O_CREAT,
!                              set_filemode);
                  
                  if(-1 != binfile) {
                    chmod(binname, set_filemode);
                    if(set_showprogress) 
- 
                      print_progress(num,lang[MSG_CREATED_ATTACHMENT_FILE],binname);
!                 }
! 
!                 file=strrchr(binname, PATH_SEPARATOR);
!                 if (file)
!                   file++; /* pass the separator */
!                 else
!                   file=binname;
!               }
! 
!               if( inlinecontent(type) ) {
!                 /* if we know our browsers can show this type of context
!                    as-is, we make a <IMG> tag instead of <A HREF>! */
! 
!                 snprintf(buffer, sizeof(buffer),
!                          "%s<IMG SRC=\"%s/%s\" ALT=\"%s\">\n",
!                          (set_showhr ? "<HR>\n" : ""),
!                          newdir,
!                          file,
!                          fname ? fname : "picture");
!               }
!               else {
!                 snprintf(buffer, sizeof(buffer),
!                          "%s<UL>\n<LI>%s %s: <A HREF=\"%s/%s\">%s</A>\n</UL>\n",
!                         (set_showhr ? "<HR>\n" : ""),
!                          type, 
!                          lang[MSG_ATTACHMENT], 
!                          newdir,
!                          file ? file : "<void>", 
!                          fname[0] ? fname : "stored");
!   
!               }
!               /* Print attachment comment before attachment */
!               bp = addbody(bp, &lp, buffer, BODY_HTMLIZED|bodyflags);
!               snprintf(buffer, sizeof(buffer),
!                        "<!-- attachment=\"%.80s\" -->\n",
!                       file);
!               bp = addbody(bp, &lp, buffer, BODY_HTMLIZED|bodyflags);
  
!               if (binname && (binfile!=-1))
!                 content = CONTENT_BINARY;
!               else
!                 content = CONTENT_UNKNOWN;
!               
!               if (!nameisuniq && binname)
!                 free(binname);
!             }
!           }

#endif
if (-1 != binfile) { if (datalen < 0)

--- 1799,1930 ----
#endif
            }
            else if (content == CONTENT_BINARY) {

#ifndef REMOVED_990310
/* If there is no file created, we create and init one */ if(file_created == MAKE_FILE) { ! char *fname; ! char *binname; ! char *file = NULL; ! char buffer[512]; file_created = MADE_FILE; /* we have, or at least we tried */ ! /* create the attachment directory if it doesn't exist */ ! if (att_dir == NULL) { ! /* first check the DIR_PREFIXER */ ! att_dir = maprintf ("%s%c" DIR_PREFIXER "%.4d", dir, ! PATH_SEPARATOR, num); ! check1dir (att_dir); ! /* If this is a repeated run on the same archive we already ! * have HTML'ized, we risk extracting the same attachments ! * several times and therefore we need to remove all the ! * attachments currently present before we go ahead! ! *(Daniel -- August 6, 1999) */ ! /* jk: removed it for a while, as it's not so necessary ! once we can generate the same file names */ ! #if 0 ! emptydir(att_dir); ! #endif ! } ! ! /* If the attachment has a name, we keep it and add the ! current value of the counter, to guarantee that we ! have a unique name. Otherwise, we use a fixed name + ! the counter. We go thru all this trouble so that we ! can easily regenerate the same archive, without breaking ! any links */ ! ! if (att_counter > 99) ! binname = NULL; ! else { ! if (attachname[0]) ! fname = attachname; ! else ! fname = FILE_SUFFIXER; ! ! binname = maprintf ("%s%c%.2d-%s", ! att_dir, PATH_SEPARATOR, att_counter, ! fname); ! att_counter++; ! } /* Saving of the attachments is being done inline as they are * encountered. The directories must exist first... */ ! if (binname) { ! binfile = open(binname, O_WRONLY|O_CREAT, ! set_filemode); if(-1 != binfile) { chmod(binname, set_filemode); if(set_showprogress) print_progress(num,lang[MSG_CREATED_ATTACHMENT_FILE],binname); ! ! if (alternativeparser) ! /* save the last name, in case we need to supress it */ ! strncpy (alternative_file, binname, sizeof (alternative_file) -1); ! } else { ! if (alternativeparser) ! /* save the last name, in case we need to supress it */ ! alternative_file[0] = '\0'; ! } ! ! /* point to the filename and skip the separator */ ! file = &binname[strlen (dir) + 1]; ! ! /* protection against having a filename bigger than buffer */ ! if (strlen (file) <= 500) { ! char *desc; ! ! if (description && description[0] != '\0') ! desc = description; ! else if (inline_force || inlinecontent(type)) ! desc = attachname[0] ? attachname : "picture"; ! else ! desc = attachname[0] ? attachname : "stored"; ! ! if (description) ! description = NULL; ! ! if( inline_force || inlinecontent(type) ) { ! /* if we know our browsers can show this type of context ! as-is, we make a <IMG> tag instead of <A HREF>! */ ! ! snprintf(buffer, sizeof(buffer), ! "%s<IMG SRC=\"%s\" ALT=\"%s\">\n", ! (set_showhr ? "<HR>\n" : ""), file, ! desc); ! } ! else { ! snprintf(buffer, sizeof(buffer), ! "%s<UL>\n<LI>%s %s: <A HREF=\"%s\">%s</A>\n</UL>\n", ! (set_showhr ? "<HR>\n" : ""), type, ! lang[MSG_ATTACHMENT], ! file ? file : "<void>", ! desc); ! } ! ! /* Print attachment comment before attachment */ ! bp = addbody(bp, &lp, buffer, BODY_HTMLIZED|bodyflags); ! snprintf(buffer, sizeof(buffer), ! "<!-- attachment=\"%.80s\" -->\n", ! file); ! bp = addbody(bp, &lp, buffer, BODY_HTMLIZED|bodyflags); ! } ! } ! inline_force = FALSE; ! attachname[0] = '\0'; ! ! if (binname && (binfile!=-1)) ! content = CONTENT_BINARY; ! else ! content = CONTENT_UNKNOWN; ! ! if (binname) ! free (binname); ! } ! }
#endif
if (-1 != binfile) { if (datalen < 0)

    if (!isinheader || readone) {

      if (!hassubject)
        subject = NOSUBJECT;
--- 1939,1946 ----
        }
      }

    }     

+

    if (!isinheader || readone) {

      if (!hassubject)
        subject = NOSUBJECT;

+     /* _at_@@ if we didn't add the mail, we should consider erasing the attdir
+        if it's there */
+ 
      if(hasdate)
        free(date);
      if(hassubject)

+     /* reset the status counters */
+     /* _at_@ verify we're doing it everywhere */
+     bodyflags=0; /* reset state flags */
+      
+     /* go back to default mode: */
+     content = CONTENT_TEXT;
+     decode = ENCODE_NORMAL;
+     Mime_B = FALSE;
+     headp = NULL; 
+     multilinenoend = FALSE;
+     if (att_dir) {
+       free (att_dir);
+       att_dir = NULL;
+     }
+     att_counter = 0;
+     description = NULL;
+  
+     /* by default we have none! */
+     hassubject = 0;
+     hasdate = 0;
    }
      

    if (set_showprogress && !readone) Received on Sat 14 Aug 1999 07:06:20 PM GMT

This archive was generated by hypermail 2.2.0 : Thu 22 Feb 2007 07:33:51 PM GMT GMT