1 files changed, 832 insertions, 0 deletions
diff --git a/src/tools/munch.c b/src/tools/munch.c
new file mode 100644
index 0000000..2087efa
--- /dev/null
+++ b/src/tools/munch.c
@@ -0,0 +1,832 @@
+/* Munch a word list and generate a smaller root word list with affixes*/
+
+#include <ctype.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef __linux__
+#include <error.h>
+#include <errno.h>
+#include <sys/mman.h>
+#endif
+
+#include "munch.h"
+
+int main(int argc, char** argv)
+{
+
+  int i, j, k, n;
+  int rl, p , nwl;
+  int al;
+
+  FILE * wrdlst;
+  FILE * afflst;
+
+  char *nword, *wf, *af;
+  char as[(MAX_PREFIXES + MAX_SUFFIXES)];
+  char * ap;
+
+  struct hentry * ep;
+  struct hentry * ep1;
+  struct affent * pfxp;
+  struct affent * sfxp;
+
+  /* first parse the command line options */
+  /* arg1 - wordlist, arg2 - affix file */
+
+  if (argv[1]) {
+       wf = mystrdup(argv[1]);
+  } else {
+    fprintf(stderr,"correct syntax is:\n"); 
+    fprintf(stderr,"munch word_list_file affix_file\n");
+    exit(1);
+  }
+  if (argv[2]) {
+       af = mystrdup(argv[2]);
+  } else {
+    fprintf(stderr,"correct syntax is:\n"); 
+    fprintf(stderr,"munch word_list_file affix_file\n");
+    exit(1);
+  }
+
+  /* open the affix file */
+  afflst = fopen(af,"r");
+  if (!afflst) {
+    fprintf(stderr,"Error - could not open affix description file\n");
+    exit(1);
+  }
+
+  /* step one is to parse the affix file building up the internal
+     affix data structures */
+
+  numpfx = 0;
+  numsfx = 0;
+
+  if (parse_aff_file(afflst)) {
+    fprintf(stderr,"Error - in affix file loading\n");
+    exit(1);
+  }
+  fclose(afflst);
+
+  fprintf(stderr,"parsed in %d prefixes and %d suffixes\n",numpfx,numsfx);
+
+  /* affix file is now parsed so create hash table of wordlist on the fly */
+
+  /* open the wordlist */
+  wrdlst = fopen(wf,"r");
+  if (!wrdlst) {
+    fprintf(stderr,"Error - could not open word list file\n");
+    exit(1);
+  }
+
+  if (load_tables(wrdlst)) {
+    fprintf(stderr,"Error building hash tables\n");
+    exit(1);
+  }
+  fclose(wrdlst);
+
+  for (i=0; i< tablesize; i++) {
+    ep = &tableptr[i];
+    if (ep->word == NULL) continue;
+    for (  ;  ep != NULL;  ep = ep->next) {
+      numroots = 0;
+      aff_chk(ep->word,strlen(ep->word));
+      if (numroots) {
+            /* now there might be a number of combinations */
+            /* of prefixes and suffixes that might match this */
+            /* word.  So how to choose?  As a first shot look */
+            /* for the shortest remaining root word to */
+            /* to maximize the combinatorial power */
+
+	    /* but be careful, do not REQUIRE a specific combination */
+            /* of a prefix and a suffix to generate the word since */
+            /* that violates the rule that the root word with just */
+            /* the prefix or just the suffix must also exist in the */
+            /* wordlist as well */
+
+	    /* in fact because of the cross product issue, this not a  */
+	    /* simple choice since some combinations of previous */ 
+	    /* prefixes and new suffixes may not be valid. */
+	    /*  The only way to know is to simply try them all */
+  
+            rl = 1000;
+            p = -1;
+
+            for (j = 0; j < numroots; j++){
+
+	      /* first collect the root word info and build up */
+              /* the potential new affix string */
+               nword = (roots[j].hashent)->word;
+               nwl = strlen(nword);
+               *as = '\0';
+               al = 0;
+               ap = as;
+               if (roots[j].prefix) *ap++ = (roots[j].prefix)->achar;
+               if (roots[j].suffix) *ap++ = (roots[j].suffix)->achar;
+               if ((roots[j].hashent)->affstr) {
+		   strcpy(ap,(roots[j].hashent)->affstr);
+               } else {
+		 *ap = '\0';
+               }
+               al =strlen(as);
+
+               /* now expand the potential affix string to generate */
+               /* all legal words and make sure they all exist in the */
+               /* word list */
+               numwords = 0;
+               wlist[numwords].word = mystrdup(nword);
+               wlist[numwords].pallow = 0;
+               numwords++;
+               n = 0;
+               if (al)
+		 expand_rootword(nword,nwl,as,al);
+               for (k=0; k<numwords; k++) {
+		 if (lookup(wlist[k].word)) n++;
+                 free(wlist[k].word);
+                 wlist[k].word = NULL;
+                 wlist[k].pallow = 0;
+               }
+
+               /* if all exist in word list then okay */
+               if (n == numwords) {               
+                  if (nwl < rl) {
+                     rl = nwl;
+                     p = j;
+                  }
+               }
+            }
+            if (p != -1) {
+               ep1 = roots[p].hashent;
+               pfxp = roots[p].prefix;
+               sfxp = roots[p].suffix;
+               ep1->keep = 1;
+               if (pfxp != NULL) add_affix_char(ep1,pfxp->achar);
+               if (sfxp != NULL) add_affix_char(ep1,sfxp->achar);
+            } else {
+	      ep->keep = 1;
+            }
+      } else {
+            ep->keep = 1;
+      }
+    }
+  }
+
+  /* now output only the words to keep along with affixes info */
+  /* first count how many words that is */
+  k = 0;
+  for (i=0; i< tablesize; i++) {
+    ep = &tableptr[i];
+    if (ep->word == NULL) continue;
+    for (  ;  ep != NULL;  ep = ep->next) {
+       if (ep->keep > 0) k++;
+    }
+  }
+  fprintf(stdout,"%d\n",k);
+
+  for (i=0; i< tablesize; i++) {
+    ep = &tableptr[i];
+    if (ep->word == NULL) continue;
+    for (  ;  ep != NULL;  ep = ep->next) {
+      if (ep->keep > 0) {
+        if (ep->affstr != NULL) { 
+	  fprintf(stdout,"%s/%s\n",ep->word,ep->affstr);
+	} else {
+          fprintf(stdout,"%s\n",ep->word);
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+
+int parse_aff_file(FILE * afflst)
+{  
+    int i, j;
+    int numents = 0;
+    char achar = '\0';
+    short ff=0;
+    char ft;
+    struct affent * ptr= NULL;
+    struct affent * nptr= NULL;
+    char * line = malloc(MAX_LN_LEN);
+
+    while (fgets(line,MAX_LN_LEN,afflst)) {
+       mychomp(line);
+       ft = ' ';
+       fprintf(stderr,"parsing line: %s\n",line);
+       if (strncmp(line,"PFX",3) == 0) ft = 'P';
+       if (strncmp(line,"SFX",3) == 0) ft = 'S';
+       if (ft != ' ') {
+          char * tp = line;
+          char * piece;
+          i = 0;
+          ff = 0;
+          while ((piece=mystrsep(&tp,' '))) {
+             if (*piece != '\0') {
+                 switch(i) {
+                    case 0: break;
+                    case 1: { achar = *piece; break; }
+                    case 2: { if (*piece == 'Y') ff = XPRODUCT; break; }
+                    case 3: { numents = atoi(piece); 
+                              ptr = malloc(numents * sizeof(struct affent));
+                              ptr->achar = achar;
+                              ptr->xpflg = ff;
+	                      fprintf(stderr,"parsing %c entries %d\n",achar,numents);
+                              break;
+                            }
+		    default: break;
+                 }
+                 i++;
+             }
+             free(piece);
+          }
+          /* now parse all of the sub entries*/
+          nptr = ptr;
+          for (j=0; j < numents; j++) {
+             if (!fgets(line,MAX_LN_LEN,afflst)) return 1;
+             mychomp(line);
+             tp = line;
+             i = 0;
+             while ((piece=mystrsep(&tp,' '))) {
+                if (*piece != '\0') {
+                    switch(i) {
+                       case 0: { if (nptr != ptr) {
+                                   nptr->achar = ptr->achar;
+                                   nptr->xpflg = ptr->xpflg;
+                                 }
+                                 break;
+                               }
+                       case 1: break;
+                       case 2: { nptr->strip = mystrdup(piece);
+                                 nptr->stripl = strlen(nptr->strip);
+                                 if (strcmp(nptr->strip,"0") == 0) {
+                                   free(nptr->strip);
+                                   nptr->strip=mystrdup("");
+				   nptr->stripl = 0;
+                                 }   
+                                 break; 
+                               }
+                       case 3: { nptr->appnd = mystrdup(piece);
+                                 nptr->appndl = strlen(nptr->appnd);
+                                 if (strcmp(nptr->appnd,"0") == 0) {
+                                   free(nptr->appnd);
+                                   nptr->appnd=mystrdup("");
+				   nptr->appndl = 0;
+                                 }   
+                                 break; 
+                               }
+                       case 4: { encodeit(nptr,piece);}
+                               fprintf(stderr, "   affix: %s %d, strip: %s %d\n",nptr->appnd,
+                                                   nptr->appndl,nptr->strip,nptr->stripl);
+		       default: break;
+                    }
+                    i++;
+                }
+                free(piece);
+             }
+             nptr++;
+          }
+          if (ft == 'P') {
+             ptable[numpfx].aep = ptr;
+             ptable[numpfx].num = numents;
+             fprintf(stderr,"ptable %d num is %d\n",numpfx,ptable[numpfx].num);
+             numpfx++;
+          } else {
+             stable[numsfx].aep = ptr;
+             stable[numsfx].num = numents;
+             fprintf(stderr,"stable %d num is %d\n",numsfx,stable[numsfx].num);
+             numsfx++;
+          }
+          ptr = NULL;
+          nptr = NULL;
+          numents = 0;
+          achar='\0';
+       }
+    }
+    free(line);
+    return 0;
+}
+
+
+void encodeit(struct affent * ptr, char * cs)
+{
+  int nc;
+  int neg;
+  int grp;
+  unsigned char c;
+  int n;
+  int ec;   
+  int nm;
+  int i, j, k;
+  unsigned char mbr[MAX_WD_LEN];
+
+  /* now clear the conditions array */
+  for (i=0;i<SET_SIZE;i++) ptr->conds[i] = (unsigned char) 0;
+
+  /* now parse the string to create the conds array */
+  nc = strlen(cs);
+  neg = 0;  /* complement indicator */
+  grp = 0;  /* group indicator */
+  n = 0;    /* number of conditions */
+  ec = 0;   /* end condition indicator */
+  nm = 0;   /* number of member in group */
+  i = 0;
+  if (strcmp(cs,".")==0) {
+    ptr->numconds = 0;
+    return;
+  }
+  while (i < nc) {
+    c = *((unsigned char *)(cs + i));
+    if (c == '[') {
+       grp = 1;
+       c = 0;
+    }
+    if ((grp == 1) && (c == '^')) {
+       neg = 1;
+       c = 0;
+    }
+    if (c == ']') {
+       ec = 1;
+       c = 0;
+    }
+    if ((grp == 1) && (c != 0)) {
+      *(mbr + nm) = c;
+      nm++;
+      c = 0;
+    }
+    if (c != 0) {
+       ec = 1;
+    }
+    if (ec) {
+      if (grp == 1) {
+        if (neg == 0) {
+	  for (j=0;j<nm;j++) {
+	     k = (unsigned int) mbr[j];
+             ptr->conds[k] = ptr->conds[k] | (1 << n);
+          }
+	} else {
+	   for (j=0;j<SET_SIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+	   for (j=0;j<nm;j++) {
+	     k = (unsigned int) mbr[j];
+             ptr->conds[k] = ptr->conds[k] & ~(1 << n);
+	   }
+        }
+        neg = 0;
+        grp = 0;   
+        nm = 0;
+      } else {
+	/* not a group so just set the proper bit for this char */
+	/* but first handle special case of . inside condition */
+	if (c == '.') {
+	  /* wild card character so set them all */
+	  for (j=0;j<SET_SIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+	} else {
+	  ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
+	}
+      }
+      n++;
+      ec = 0;
+    }
+    i++;
+  }
+  ptr->numconds = n;
+  return;
+}
+
+
+
+/* search for a prefix */
+void pfx_chk (const char * word, int len, struct affent* ep, int num)
+{
+    struct affent *     aent;
+    int			cond;
+    int	tlen;
+    struct hentry *	hent;
+    unsigned char *	cp;		
+    int			i;
+    char	        tword[MAX_WD_LEN];
+
+    for (aent = ep, i = num; i > 0; aent++, i--) {
+
+	tlen = len - aent->appndl;
+
+	if (tlen > 0 &&  (aent->appndl == 0 ||  
+            strncmp(aent->appnd, word, aent->appndl) == 0)
+	    &&  tlen + aent->stripl >= aent->numconds) {
+
+	    if (aent->stripl) strcpy (tword, aent->strip);
+	    strcpy((tword + aent->stripl), (word + aent->appndl));
+
+            /* now go through the conds and make sure they all match */
+	    cp = (unsigned char *) tword;
+	    for (cond = 0;  cond < aent->numconds;  cond++) {
+		if ((aent->conds[*cp++] & (1 << cond)) == 0)
+		    break;
+	    }
+
+	    if (cond >= aent->numconds) {
+		tlen += aent->stripl;
+		if ((hent = lookup(tword)) != NULL) {
+		   if (numroots < MAX_ROOTS) {
+		       roots[numroots].hashent = hent;
+		       roots[numroots].prefix = aent;
+		       roots[numroots].suffix = NULL;
+		       numroots++;
+		   }
+		}
+	    }
+	}
+    }
+}
+
+
+
+void suf_chk (const char * word, int len, struct affent * ep, 
+              int num, struct affent * pfxent, int cpflag)
+{
+    struct affent *     aent;	
+    int	                tlen;	
+    int			cond;	
+    struct hentry *	hent;	
+    unsigned char *	cp;
+    int			i;
+    char	        tword[MAX_WD_LEN];
+
+    for (aent = ep, i = num; i > 0; aent++, i--) {
+
+        if ((cpflag & XPRODUCT) != 0 &&  (aent->xpflg & XPRODUCT) == 0)
+	    continue;
+
+	tlen = len - aent->appndl;
+	if (tlen > 0  &&  (aent->appndl == 0 ||  
+            strcmp(aent->appnd, (word + tlen)) == 0)
+	    &&  tlen + aent->stripl >= aent->numconds) {
+
+	    strcpy (tword, word);
+	    cp = (unsigned char *) (tword + tlen);
+	    if (aent->stripl) {
+	       strcpy ((char *)cp, aent->strip);
+	       tlen += aent->stripl;
+	       cp = (unsigned char *)(tword + tlen);
+	    } else *cp = '\0';
+
+	    for (cond = aent->numconds;  --cond >= 0;  ) {
+		if ((aent->conds[*--cp] & (1 << cond)) == 0) break;
+	    }
+	    if (cond < 0) {
+	       if ((hent = lookup(tword)) != NULL) {
+		  if (numroots < MAX_ROOTS) {
+		     roots[numroots].hashent = hent;
+		     roots[numroots].prefix = pfxent;
+		     roots[numroots].suffix = aent;
+		     numroots++;
+		  }
+	       }
+	    }
+	}
+    }
+}
+
+
+
+void aff_chk (const char * word, int len)
+{
+    int i;
+    int j;
+    int nh=0;
+    char * nword;
+    int nwl;
+
+    if (len < 4) return;
+
+    for (i=0; i < numpfx; i++) {
+       pfx_chk(word, len, ptable[i].aep, ptable[i].num);
+    }
+
+    nh = numroots;
+
+    if (nh > 0) {
+       for (j=0;j<nh;j++){
+         if (roots[j].prefix->xpflg & XPRODUCT) {
+            nword = mystrdup((roots[j].hashent)->word);
+            nwl = strlen(nword);
+            for (i=0; i < numsfx; i++) {
+               suf_chk(nword,nwl,stable[i].aep, stable[i].num, roots[j].prefix, XPRODUCT);
+            }
+            free(nword);
+         }
+       }
+    }
+    for (i=0; i < numsfx; i++) {
+       suf_chk(word, len, stable[i].aep, stable[i].num, NULL, 0);
+    }
+}
+
+
+
+/* lookup a root word in the hashtable */
+
+struct hentry * lookup(const char *word)
+{
+    struct hentry * dp;
+    dp = &tableptr[hash(word)];
+    if (dp->word == NULL) return NULL;
+    for (  ;  dp != NULL;  dp = dp->next) {
+      if (strcmp(word,dp->word) == 0) return dp;
+    }
+    return NULL;
+}
+
+
+
+/* add a word to the hash table */
+
+int add_word(char * word)
+{
+    int i;
+    struct hentry * dp;
+    struct hentry * hp = (struct hentry *) malloc (sizeof(struct hentry));
+
+    hp->word = word;
+    hp->affstr = NULL;
+    hp->keep = 0;
+    hp->next = NULL;
+
+    i = hash(word);
+    dp = &tableptr[i];
+    
+    if (dp->word == NULL) {
+      *dp = *hp;
+       free(hp);
+    } else {
+      while (dp->next != NULL) dp=dp->next; 
+      dp->next = hp;
+    }
+    return 0;
+}     
+
+
+
+/* load a word list and build a hash table on the fly */
+
+int load_tables(FILE * wdlst)
+{
+  char * ap;
+  char ts[MAX_LN_LEN];
+
+  /* first read the first line of file to get hash table size */
+  if (! fgets(ts, MAX_LN_LEN-1,wdlst)) return 2;
+  mychomp(ts);
+  tablesize = atoi(ts);
+  tablesize = tablesize + 5;
+  if ((tablesize %2) == 0) tablesize++;
+
+  /* allocate the hash table */
+  tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
+  if (! tableptr) return 3;
+
+  /* loop thorugh all words on much list and add to hash
+   * table and store away word and affix strings in tmpfile
+   */
+
+  while (fgets(ts,MAX_LN_LEN-1,wdlst)) {
+    mychomp(ts);
+    ap = mystrdup(ts);
+    add_word(ap);
+
+  }
+  return 0;
+}
+
+
+/* the hash function is a simple load and rotate
+ * algorithm borrowed
+ */
+
+int hash(const char * word)
+{
+    int i;
+    long  hv = 0;
+    for (i=0; i < 4  &&  *word != 0; i++)
+	hv = (hv << 8) | (*word++);
+    while (*word != 0) {
+      ROTATE(hv,ROTATE_LEN);
+      hv ^= (*word++);
+    }
+    return (unsigned long) hv % tablesize;
+}
+
+
+void add_affix_char(struct hentry * ep, char ac)
+{
+  int al;
+  int i;
+  char * tmp;
+  if (ep->affstr == NULL) {
+     ep->affstr = (char *) malloc(2);
+     *(ep->affstr) = ac;
+     *((ep->affstr)+1) = '\0';
+     return;
+  }
+  al = strlen(ep->affstr);
+  for (i=0; i< al; i++)
+    if (ac == (ep->affstr)[i]) return;
+  tmp = calloc(al+2,1);
+  memcpy(tmp,ep->affstr,(al+1));
+  *(tmp+al) = ac;
+  *(tmp+al+1)='\0';
+  free(ep->affstr);
+  ep->affstr = tmp;
+  return;
+}
+
+
+/* add a prefix to word */
+void pfx_add (const char * word, int len, struct affent* ep, int num)
+{
+    struct affent *     aent;
+    int			cond;
+    int	tlen;
+    unsigned char *	cp;		
+    int			i;
+    char *              pp;
+    char	        tword[MAX_WD_LEN];
+
+    
+    for (aent = ep, i = num; i > 0; aent++, i--) {
+
+        /* now make sure all conditions match */
+        if ((len > aent->stripl) && (len >= aent->numconds)) {
+
+            cp = (unsigned char *) word;
+            for (cond = 0;  cond < aent->numconds;  cond++) {
+	       if ((aent->conds[*cp++] & (1 << cond)) == 0)
+	          break;
+            }
+            if (cond >= aent->numconds) {
+
+	      /* we have a match so add prefix */
+              tlen = 0;
+              if (aent->appndl) {
+	          strcpy(tword,aent->appnd);
+                  tlen += aent->appndl;
+               } 
+               pp = tword + tlen;
+               strcpy(pp, (word + aent->stripl));
+               tlen = tlen + len - aent->stripl;
+
+               if (numwords < MAX_WORDS) {
+                  wlist[numwords].word = mystrdup(tword);
+                  wlist[numwords].pallow = 0;
+                  numwords++;
+               }
+	    }
+	}
+    }
+}
+
+
+/* add a suffix to a word */
+void suf_add (const char * word, int len, struct affent * ep, int num)
+{
+    struct affent *     aent;	
+    int	                tlen;	
+    int			cond;	
+    unsigned char *	cp;
+    int			i;
+    char	        tword[MAX_WD_LEN];
+    char *              pp;
+
+    for (aent = ep, i = num; i > 0; aent++, i--) {
+
+      /* if conditions hold on root word 
+       * then strip off strip string and add suffix
+       */
+
+      if ((len > aent->stripl) && (len >= aent->numconds)) {
+	cp = (unsigned char *) (word + len);
+	for (cond = aent->numconds;  --cond >= 0;  ) {
+	    if ((aent->conds[*--cp] & (1 << cond)) == 0) break;
+	}
+	if (cond < 0) {
+	  /* we have a matching condition */
+          strcpy(tword,word);
+          tlen = len;
+	  if (aent->stripl) {
+             tlen -= aent->stripl;
+          }
+          pp = (tword + tlen);
+          if (aent->appndl) {
+	       strcpy (pp, aent->appnd);
+	       tlen += aent->stripl;
+	  } else *pp = '\0';
+
+          if (numwords < MAX_WORDS) {
+              wlist[numwords].word = mystrdup(tword);
+              wlist[numwords].pallow = (aent->xpflg & XPRODUCT);
+              numwords++;
+          }
+	}
+      }
+    }
+}
+
+
+
+int expand_rootword(const char * ts, int wl, const char * ap, int al)
+{
+    int i;
+    int j;
+    int nh=0;
+    int nwl;
+
+    for (i=0; i < numsfx; i++) {
+      if (strchr(ap,(stable[i].aep)->achar)) {
+         suf_add(ts, wl, stable[i].aep, stable[i].num);
+      }
+    }
+   
+    nh = numwords;
+
+    if (nh > 1) {
+       for (j=1;j<nh;j++){
+         if (wlist[j].pallow) {
+            for (i=0; i < numpfx; i++) {
+               if (strchr(ap,(ptable[i].aep)->achar)) {
+		 if ((ptable[i].aep)->xpflg & XPRODUCT) {
+                   nwl = strlen(wlist[j].word);
+                   pfx_add(wlist[j].word, nwl, ptable[i].aep, ptable[i].num);
+		 }
+	       }
+	    }
+	 }
+       }
+    }
+
+    for (i=0; i < numpfx; i++) {
+       if (strchr(ap,(ptable[i].aep)->achar)) {
+          pfx_add(ts, wl, ptable[i].aep, ptable[i].num);
+       }
+    }
+    return 0;
+}
+
+
+
+/* strip strings into token based on single char delimiter
+ * acts like strsep() but only uses a delim char and not
+ * a delim string
+ */
+char * mystrsep(char ** stringp, const char delim)
+{
+  char * rv = NULL;
+  char * mp = *stringp;
+  int n = strlen(mp);
+  if (n > 0) {
+    char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+    if (dp) {
+      int nc;
+      *stringp = dp+1;
+      nc = (int)((unsigned long)dp - (unsigned long)mp);
+      rv = (char *) malloc(nc+1);
+      if (rv) {
+        memcpy(rv,mp,nc);
+        *(rv+nc) = '\0';
+      }
+    } else {
+      rv = (char *) malloc(n+1);
+      if (rv) {
+        memcpy(rv, mp, n);
+        *(rv+n) = '\0';
+        *stringp = mp + n;
+      }
+    }
+  }
+  return rv;
+}
+
+
+char * mystrdup(const char * s)
+{
+  char * d = NULL;
+  if (s) {
+    int sl = strlen(s)+1;
+    d = (char *) malloc(sl);
+    if (d) memcpy(d,s,sl);
+  }
+  return d;
+}
+
+
+void mychomp(char * s)
+{
+  int k = strlen(s);
+  if (k > 0) *(s+k-1) = '\0';
+  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+