/* txt2phoNL: DOS version by Eric Auer (Perl was more fun...) */
/* C port based on my 11/2001 university homework             */
/* "word count and char statistics" ... pipeable filter       */

#include <stdio.h>
#include <string.h> /* strncpy etc.     */
#include <ctype.h>  /* isupper, isalpha */

/* #define DUMMY_XLATE 1 */

#define DBG(a,b) (void)(a ? b : b)
#define DBG2(a,b) (void)(a ? b : b)
#define DBG3(a,b) fprintf(stderr,a,b)

/* ********************************************************** */

void phoneout(FILE* out, char pho) { /* print the phone */

  /* FEATURE MISSING: more sanitizer rules */
  /* sanitize: voiced/semi doubling -> single */
  /* sanitize: schwa next to dipht -> removed */
  /* sanitize: voice2 before lrj -> insert schwa */
  /* sanitize: conson before semi -> insert schwa */
  /* sanitize: dipht next to vowel/j -> insert " " / jE -> j@ */

/*
  char * Pvowel  = "aeiouAEIOy2Y13456789";
  char * Pdipht  = "56789";
  char * Pvoiced = "bdcvzZGhJjg";
  char * Pconson = "ptkbdgcfvszSZxGhmn";
  char * Pvoice2 = "czZGhJj";
  char * Psemi   = "GNJL"; 
*/ /* g as part of voiced is a hack... */
  char * Pspace  = ".?,_ ";

  char * xlate[13] = { "1Ei", "39y", "4Au", "5ai", "6oi",
                       "7ui", "8Ai", "9Oi", " _",  "?_",
                       "._",  ",_", 0
                     };

  char * longph = "iuyaeo213456789rmnNJ";
  char * devoice = "bp dt cx vf zs ZS Gx h_ JI jI gk \0\0\0";
       /* could also do vowel lengthening here */
       
  int i,dur,freq;
  char phone[5];
  static char oldphone[5] = "     ";
  static char oldpho = ' ';
  int kludge = 0;
  
  phone[0] = pho;
  phone[1] = 0;
  dur = 100;
  freq = 200;
  
  for (i=0; xlate[i] != NULL; i++) {
    if (xlate[i][0] == pho) {
      strcpy(phone,&xlate[i][1]);
    }
  }
  
 if ((phone[0] == '_') && (strchr("bdcvzZGhJjg",oldpho))) {
   /* end of word -> devoice */
   for (i=0; devoice[i] != 0; i+=3) {
     if (devoice[i] == oldphone[0]) {
       /* we would need to rewind here, sigh... */
       /* MISSING FEATURE: this without a kludge */
       oldpho = devoice[i+1]; /* PROBLEM: not used yet */
       oldphone[0] = oldpho;
       oldphone[1] = 0;
       kludge = 1;
     }
   }
 }

 if (strchr(longph,pho) != NULL) { dur = 200; }

 if (pho == '?') { freq = 252; } /* prosody up */
 if (pho == ',') { freq = 178; } /* prosody down a bit */
 if (pho == '.') { freq = 158; } /* prosody down */

 if (strchr(Pspace,pho)) {
   if (kludge == 1) {
     fprintf(out,"@ 25\n"); /* evil devoice: short schwa */
   }
   fprintf(out, "_ 100 (50, %d)\n", freq); /* space */
 } else {
   fprintf(out, "%s %d\n", phone, dur);  /* audible */
 }

 strcpy(oldphone,phone);
 oldpho = pho;
 
 return;
}

/* ********************************************************** */

int specials(volatile char * ch) {
  static char * pendstr;
  char * lexicon[42] = 
            { "0null", "1een", "2twee", "3drie", "4vier",
              "5vijf", "6zes", "7zeven", "8acht", "9negen",
              "*ster", "+plus", "#hekje", ".punt.", ",koma,",
              ">groter", "<kleiner", "^dakje", "grad", "=is",
              "!uitroepteken.", "\"aanhaalingsteken", "$dollar",
              "%percent", "&en", "/door", "(haakje?",
              ")eind haakje,", "\\backslash", "?vraagteken?",
              "|pijpteken?", "_onderstreepje", "-streepje",
              ";semikolon?", ":dubbele punt?", "@aapestaartje",
              "{accolade","}eind accolade", "[hoekje", "~tilde",
              "}eind accolade", 0
            };
  int i;
  
  DBG("[%c/",ch[0]);
  DBG("%s\n",pendstr);
  
  if (pendstr != NULL) {
    if (pendstr[0] == 0) {
      ch[0] = ' '; /* end with a space, nothing left pending */
      DBG("/EOF]%c",'\n');
      pendstr = NULL; /* return to normal! */
      return 0;
    } else {
      ch[0] = pendstr[0]; /* return a char of our new string */
      pendstr++;   /* return next char on next request */
      DBG("%c",ch[0]);
      DBG("%s]",pendstr);
      return 1;
    }
  } else { /* else: check if char has a translation */
    for (i=0; lexicon[i] != NULL; i++) {
      if (lexicon[i][0] == ch[0]) { /* found one */
        pendstr = &lexicon[i][1]; /* replacement string */
        ch[0] = ' ';              /* first char is space */
        DBG("/%c",ch[0]);
        DBG("/%s]",pendstr);
        return 1;
      }
    }
  }
  DBG("---]%c",'\n');
  return 0;
}

/* ********************************************************** */

int translate(char * win, FILE* out) { /* convert "atomic" */
#ifdef DUMMY_XLATE
  fprintf(out,"%c",win[0]);
  return 1;
#else
  char * five[ 8]  = { " lijk l1k", "lijk l@k ",
                       "elijkEl1k", " m n  m@n",
                       " m'n  m@n", "bijv b1 vorbelt",
                       "bijv.b1 vorbelt", 0
                     };
  char * four[11]  = { "httpha te te pe ",
                       "htmlha te em el ",
                       " je  je",  "agenaxEn",
                       "ooieoie",  "ooitoIt",
                       "hou h4w",  " pc pe se ",
                       "evenev@n", "ipv.in plats van", 0
                     };
  char* three[20]  = { "aai5", "ooi6", "oei7", "ai 8",
                       "oi 9", "tjec@", "ageaZe", "ch x",
                       "ftpef te pe ", "wwwwe we we ",
                       "htmha te em ", "je j@",
                       "eeue2", "en @n", "he he", "ps pe es",
                       "ppspe pe es", " ngen ge", "njlJ@l",
                       0
                     };
  char * two[37]   = { "iei", "oeu", "uuy", "aaa", "eee",
                       "ooo", "eu2", "ei1", "ui3", "ou4",
                       "ij1", "sjS", "g x", "njJ", "l l",
                       "ngN", "dtt", "chx", "iuju", "dld@l",
                       "lfl@f", "bbb", "ddd", "e e", "d t",
                       "hrr", "hll", "o o", "e e", "yl1l",
                       "zlz l", "a a", "i i", "u y", "hjj",
                       "hrr", 0
                     };
  char * one[31]   = { "aA", "bb", "ck", "dd", "eE",
                       "ff", "gx", "hh", "iI", "jj",
                       "kk", "ll", "mm", "nn", "oO",
                       "pp", "qk", "rr", "ss", "tt",
                       "uY", "vv", "ww", "xks", "yj",
                       "zz", "  ", "..", "??", ",,", 0
                     };

  int i,matchlen;
  char * match = NULL;
  matchlen = 0;
  
  for (i=0; (five[i] != NULL) && (match == NULL); i++) {
    if (!strncmp(five[i],win,5)) {
      match = &five[i][5];
      matchlen = 5;
    }
  }
  for (i=0; (four[i] != NULL) && (match == NULL); i++) {
    if (!strncmp(four[i],win,4)) {
      match = &four[i][4];
      matchlen = 4;
    }
  }
  for (i=0; (three[i] != NULL) && (match == NULL); i++) {
    if (!strncmp(three[i],win,3)) {
      match = &three[i][3];
      matchlen = 3;
    }
  }
  for (i=0; (two[i] != NULL) && (match == NULL); i++) {
    if (!strncmp(two[i],win,2)) {
      match = &two[i][2];
      matchlen = 2;
    }
  }
  for (i=0; (one[i] != NULL) && (match == NULL); i++) {
    if (one[i][0] == win[0]) {
      match = &one[i][1];
      matchlen = 1;
    }
  }
  
  if ((matchlen > 1) && (win[matchlen-1] == ' ')) { matchlen--; }
  if (matchlen == 0) { return matchlen; }
  
  DBG2("[%s]",match);
  
  while (match[0] != 0) {
  
    phoneout(out,match[0]);
    
    match++;
  }
  
  return matchlen;
  
#endif
}

/* ********************************************************** */

int txt2pho(FILE* in, FILE* out) { /* convert Dutch to MBROLA */
#define MAXCHECK 5 /* max len of translate rules */
  int c,i,pos,j;
  volatile char ch;
  char window[16];
  int pending = 0;
  int matchlen;
  
  for (i=0; i<16; i++) { window[i] = ' '; }
  window[15] = 0;
  pos = 1; /* where in window the next char will end */
  
  /* rewind(in); */

  while ( (pending != 0) || ((c=getc(in)) != EOF )) { 
                          /* or use feof(in) ... */
    ch = (char) c;
    if (isupper(ch)) { ch = tolower(ch); } /* ignore case */
    
    if ((ch == '') || (ch == '')) { ch = 'u'; }
    if ((ch == '') || (ch == '')) { ch = 'o'; }
    if ((ch == '') || (ch == '')) { ch = 'a'; }
    if (ch == '') { ch = 's'; }
    if (ch == '') { ch = 'n'; }
    if (iscntrl(ch)) { ch = ' '; }

    if (isspace(ch)) {
      ch = ' '; /* cr etc */
    } else {
      if (!isprint(ch)) { ch = 'm'; }
      /* mmmm... forgot my text */
    }
    
    /* FEATURE MISSING: strip > at line start  */
    /* FEATURE MISSING: translate :// to " "   */
    /* FEATURE MISSING: translate :-) :-( ;-)  */
    DBG("%c->",ch);
    
    pending = specials(&ch); /* "." -> "punt"," " etc */
    DBG("%c[",ch);
    DBG("%d]\n",pending);
    
    if (pos < MAXCHECK) {
      pos++;
      window[pos] = ch; /* refill */
    } else {
      for (i=0; i<(16-1); i++) { window[i] = window[i+1]; }
      window[pos] = ch; /* scroll */
      DBG("%c-",ch);
    }

    if (pos == MAXCHECK) { /* only translate while queue full */
      /* we find the longest match of a rule, matching at     */    
      /* window[0..?]: IF found, scroll away the match (apart */
      /* from " " at the end), adjust pos doing that.         */
      /* ELSE just wait for a queue refill (problem at EOF !) */

      DBG("\n%s->",window);
      matchlen = translate(window,out);
      DBG("[%d]\n",matchlen);

      if (matchlen > 0) {
        for (j=0; j<matchlen; j++) {
          for (i=0; i<(16-1); i++) { window[i] = window[i+1]; }
          pos--;
        }
      } else {
        /* the problem will scroll away, but warn anyway: */
        fprintf(stderr,"Untranslateable: <%s...>\n",window);
      }
    } else {
      /* wait for more chars coming in */
    }

  }

  /* now flush the window */
  matchlen = 1;
  pending = MAXCHECK;
  while ((matchlen > 0) && (pending > 0)) {
    matchlen = translate(window,out);
    if (matchlen > 0) {
      for (j=0; j<matchlen; j++) {
        for (i=0; i<(16-1); i++) { window[i] = window[i+1]; }
        pos--;
      }
    }
    pending--;
  }
  fprintf(out,"\n\n");
  
  return(42); /* fprintf(out,...) */
}

/* ********************************************************** */

int main(int argc, char** argv) {
  FILE* theinfile;
  FILE* theoutfile;
  int args;
  char* my_basename;

 my_basename = strrchr(argv[0],'/');
 if (my_basename == 0) { my_basename = argv[0]; }
    else               { my_basename++; }

 args = argc - 1; /* do not count argv[0] the own-name */
 
  if (args > 2) {
    printf("Convert Dutch text to mbrola phoneme files\n"); 
    printf("Usage: %s [infile] [outfile],",argv[0]);
    return(1);
  }
  
  if (args < 2) {
    theoutfile = stdout;
  } else {
    theoutfile = fopen(argv[2],"a");
  }
  if (args < 1) {
    theinfile = stdin;
  } else {
    if (strcmp(argv[1],"-")) {
      theinfile = fopen(argv[1],"r");
    } else {
      theinfile = stdin;
    }
  }

  if (theoutfile == NULL) {
    fprintf(stderr, "%s: could not open output\n",argv[0]);
  }
  if (theinfile == NULL) {     
    fprintf(stderr, "%s: could not open input\n", argv[0]);
  }
  if ((theinfile == NULL) || (theoutfile == NULL)) {
    return(1);
  }

  /* ************************* */
  txt2pho(theinfile, theoutfile); 
  /* ************************* */
  
  if (theoutfile != stdout) {
    (void) fclose(theoutfile);
  }
  if (theinfile != stdin) {
    (void) fclose(theinfile);
  }

  return(0);    
}
