/**
 *  Helper class to represent keystroke sequence to glyph sequence
 *  assignments. Supports various import/export formats.
 *  2003 by Eric Auer.
 *
 * This file is part of the Input Method Editor made at
 * http://www.mpi.nl/ and is free software, licensed under
 * the GNU General Public License (GPL) which can
 * be found at http://www.gnu.org/licenses/gpl.txt or in the
 * file EditIM-COPYING.txt included in this distribution.
 *
 * Note that some other EditIM files have LGPL license.
 * GPL means: You may copy, use and edit code (not license) at
 * your wish. Everything that contains GPLed code must be GPLed,
 * too. Sources must be available to all users of the binaries.
 * With GPL, you still have to provide access to THIS source
 * file, but the rest of your project can stay closed source.
 */

package guk.editIM;

// import guk.editIM.DebugEditIM;
// import guk.editIM.KeyStrokeParser; // HTML highlighting
// import.guk.editIM.MenuHelpers; // toHex()

import java.util.List;
import java.util.Vector;
import java.util.Collection;
import java.util.Iterator;
import java.util.StringTokenizer;

import java.awt.event.KeyEvent;
import javax.swing.KeyStroke;
import java.awt.Canvas;

// can use KeyStroke.getKeyStrokeForEvent(KeyEvent ke) ...
// GUK IM (GIM) only uses KeyEvent. KeyStroke is Swing (newer),
// buf for key event processing in an IM, KeyEvent is probably better.

// for the other way round:
//            buf.add(new KeyEvent(new Canvas() /* source component */,
//            (kstrok.isOnKeyRelease() ? KeyEvent.KEY_RELEASED :
//              (kstrok.getKeyCode() == KeyEvent.VK_UNDEFINED ?
//                 KeyEvent.KEY_TYPED : KeyEvent.KEY_PRESSED)),
//            System.currentTimeMillis() /* time */,
//            kstrok.getModifiers(), kstrok.getKeyCode(),
//            kstrok.getKeyChar() ));
// (time and id cannot be modified later, Component must be non-null)

/**
 *  Using nice keystroke vs. keyevent conversion could help
 *  to make GUK IM more compatible / universal. Investigate that!
 *  See: KeyStroke.getKeyStrokeForEvent(...) and new KeyEvent(...).
 *  (so: change this to use key strokes rather than key events.)
 *  Would also have to modify GUK IM in a similar way, of course.
 */


/**
 *  Helper class to represent keystroke sequence to glyph sequence
 *  assignments. Supports various import/export formats and is the
 *  main cleverness part for highlighting and file read write
 *  functionality.
 *  2003 by Eric Auer.
 */
public class AssignObject {


  /**
   * Static values that encode import/export formats.
   */

  /**
   * Ad hoc format for human readers. Most useful when only
   * single fields are exported.
   */
  static final public int UNICODE_HUMAN = 0; // some easy to read format
  /**
   * As UNICODE_HUMAN, but only uses ISO-8859-1 characters.
   */
  static final public int ASCII_HUMAN = 1; // with \\u1234 for all non-ascii stuff
  /**
   * Basically as UNICODE_HUMAN, but uses HTML for highlighting.
   * Escape Unicode glyphs as &amp;#1234;.
   */
  static final public int UNICODE_HTML  = 2; // easy to read, highlighted
  /**
   * As UNICODE_HTML, but only uses ISO-8859-1 characters.
   */
  static final public int ASCII_HTML = 3; // with \\u1234 for all non-ascii

  /**
   * THE important format for importing and exporting .gim files
   * (GUK IM keymaps). Most important modes are importing and
   * exporting whole lines, but parts are okay, too. Headers and
   * empty / comment only lines must be processed by the caller,
   * as we only process data lines. GUK IM are made by GATE.
   * @see <a href="http://gate.ac.uk/">GATE Unicode Toolkit</a>
   */
  static final public int GIM_FILE = 4; // for GUK IM
  /**
   * A possible future expansion of GIM where \\k+pressed HOME+
   * and similar things are allowed. The editor and the HTML
   * formats allow those, but GUK IM does not. Probably it is
   * not even useful for normal users at all.
   */
  static final public int XGIM_FILE = 5; // with \\k+stuff+ escapes
  /**
   * The ad-hoc file format that MPI IM keymaps use.
   * This is plain UTF-8 text. First column is a key sequence,
   * then a tab follows, then one or more tab-separated glyph
   * strings form the rest of the line. No comments can be
   * stored! Only ONE glyph string is allowed by design: The
   * caller has to split up lines with multiple glyph strings,
   * because one AssignObject is for editing exactly one
   * mapping of a key sequence to a glyph string.
   * MPI IM are used by ELAN, EUDICO.
   * @see <a href="http://www.mpi.nl/">MPI Psycholinguistics
   * Nijmegen</a>
   */
  static final public int U8_FILE = 6; // for MPI IM
  /**
   * The keymap data format of Yudit. Yudit is a free Unicode
   * editor that can run on otherwise not Unicode enabled systems.
   * Notice that a few Yudit files (currently only Hangul and the
   * &quot;Type Unicode glyphs by typing u1234&quot; map use this)
   * are not one line per mapping but have sections that are
   * multiplied with each other. We cannot handle this, as one
   * AssignObject only holds one key sequence and one glyph string,
   * by design. You have to multiply out sections BEFORE processing
   * a file with this class.
   * @see <a href="http://www.yudit.org/">Yudit Unicode editor</a>
   */
  static final public int YUDIT_FILE = 7; // for Yudit


  /**
   * used as an HTML highlighting helper for the HTML modes.
   */
  KeyStrokeParser htmlizer = null; // new KeyStrokeParser();
  /**
   * set to true to enable the use of HTML font name tags
   */
  static final public boolean FONTbyTAG = false;


  /**
   * Used for creating HTML locally.
   */
  String htmlHeader = "<html>" + (FONTbyTAG ?
      "<font name=\"Arial Unicode MS\">" // can be overridden
      : "") + "<pre>";
  String htmlFooter = "</pre>"
    + (FONTbyTAG ? "</font>" : "") + "</html>";


  /**
   * Here we keep our keystroke sequence
   */
  List keys = new Vector();

  /**
   * key events need some non-null component as origin, we
   * use this canvas for that.
   */
  static Canvas canvas = new Canvas();


  /**
   * Here we keep our glyph sequence
   */
  String glyphs = "";


  /**
   * Here we store comments
   */
  String comments = "";


  /**
   * The constructor creates a new empty object.
   */
  public AssignObject() {
	htmlizer = new KeyStrokeParser();
    // no HTML font set for htmlizer in this case
  } // constructor


  /**
   * The constructor creates a new empty object
   * (now with Font setting, which affects the
   * HTML generation, if the FONTbyTAG flag is on).
   * @param font A Font to be mentioned in a font
   * name HTML tag if FONTbyTAG is configured as true.
   */
  public AssignObject(java.awt.Font font) {
    if ((font != null) && FONTbyTAG) {
      htmlHeader = "<html><font name=\"" + font.getName() + "\"><pre>";
      htmlizer = new KeyStrokeParser(font); // font used for HTML
    } else {
	  htmlizer = new KeyStrokeParser();
      // no HTML font set for htmlizer in this case
    }
  } // constructor with Font


  /**
   * obvious function returning the glyphs
   */
  public String getGlyphs() {
    return glyphs;
  } // getGlyphs


  /**
   * obvious function returning Iterator for keys
   */
  public Collection getKeys() {
    return keys;
  } // getKeys


  /**
   * obvious function returning the comments
   */
  public String getComments() {
    return comments;
  } // getComments


  /**
   * obvious function setting the comments
   */
  public void setComments(String comS) {
    if (comS == null) comS = "";
    comments = comS;
  } // setComments


  /**
   * obvious function setting the glyphs
   */
  public void setGlyphs(String glyS) {
    if (glyS == null) glyS = "";
    glyphs = glyS;
  } // setGlyphs


  /**
   * obvious function setting the keys
   */
  public void setKeys(Collection keyColl) {
    keys = new Vector();
    if (keyColl == null) {
      return;
    }
    keys.addAll(keyColl);
    // PS: collections have iterator(), toArray() and other things.
  } // setKeys


  /* *** */


  /**
   * Helper method to convert a char to an escaped \\u1234 string.
   * The string will have a fixed length of 6 characters.
   */
  static public String escapeGlyph(char ch) {
    // could check for 32 bit char here
    return "\\u" + MenuHelpers.toHex((int)ch);
  } // static escapeGlyph


  /**
   * Helper to convert a char to HTML only when needed.
   * Applies to quotes, amp, lt, gt, non-ASCII.
   * <b>(Unicode needs HTML 4.0 to render,
   * JLabels only render ISO-8859-1 HTML 3.2)</b>
   * @param ch An arbitrary Unicode character.
   * @return A version of the character that can be
   * safely included in HTML files. Also hardens
   * space to underlined nonbreakable space.
   */
  static public String escapeToHTML(char ch) {
    if (ch > (char)125) {
      return "&#" + (int)ch + ";";
      // in JLabels, a box is shown for all non- ISO-8859-1
      // chars, no matter if escaped or not. However, all
      // ISO-8859-1 (single byte) chars work both escaped
      // and unescaped.
    }
    else if (ch == '<') { return "&lt;"; }
    else if (ch == '>') { return "&gt;"; }
    else if (ch == '&') { return "&amp;"; }
    else if (ch == ' ') { return "<u>&nbsp;</u>"; }
    return "" + ch; // safe char
  } // escapeToHTML


  /**
   * Helper method to convert an int to an 0x1234 fixed length string.
   */
  static public String toHex(int i) {
    // could check for 32 bit char here
    return "0x" + MenuHelpers.toHex(i);
  } // static toHex


  /**
   * Obvious function returning the string with all non-
   * ASCII-printables replaced by \\u1234 form or \\n.
   * Meant for human readable formats and HTML escaping.
   * @param str The string to be escaped
   * @param html If true, create HTML &amp;#1234; escapes,
   * otherwise create \\u1234 escapes visible to the user.
   * Also escapes HTML chars (quotes, lt, gt, amp) in HTML
   * mode.
   */
  public static String escapeString(String str, boolean html) {
    StringBuffer gb = new StringBuffer();
    for (int i = 0; i < str.length(); i++) {
      char ch = str.charAt(i);
      if (html) {
        gb.append(escapeToHTML(ch)); // escapes all reserved
        // HTML chars and uses &amp;#1234; for Unicode
      } else { // no html
        if (ch == '\n') {
          gb.append("\\n"); // newline
        } else if ((ch >= ' ') && (ch <= (int)125)) {
          gb.append(ch); // harmless char (only printable ASCII)
        } else {
          gb.append(escapeGlyph(ch)); // non-ASCII / non-harmless
        }
      } // no html
    } // for
    return gb.toString();
  } // static escapeString


  /* *** */


  /**
   * Helper function to convert a key event into a string.
   * This tries to keep the resulting string as short as
   * possible: Space, newline, quotes, - and backslash are
   * escaped with a backslash. Control and Alt are represented
   * by prefixing C- and A-. Characters beyond ISO-8859-1 are
   * escaped, too. More complex key events are converted into
   * XGIM syntax (and therefore likely to be useless for most
   * file formats!).
   * @param key A key event that should be stringified.
   * @param escapeUnicode Set this if you want characters beyond
   * ISO-8859-1 to be escaped.
   * @returns A string consisting of the typed key, possibly with
   * C- and A- prefix for control and alt modifiers, or a special
   * XGIM string that can describe any key event, if the shorter
   * versions could not represent the event. Characters with
   * special meanings are also escaped, like space, -, and newline.
   * No preparations for HTML are done.
   */
  public static String keyToString(KeyEvent key, boolean escapeUnicode) {
   String keyText = key.getKeyText(key.getKeyCode());
   int mod = key.getModifiers();
   char ch = key.getKeyChar();
   if (keyText.indexOf("nknown") >= 0) keyText = "";
   DebugEditIM.println(3,"Char [" + ch + "] mod=" + mod
     + " text=<" + keyText + ">");
   DebugEditIM.println(4,"KeyEvent=" + key);
   String chSt = "" + ch;
   if (ch == '\\') chSt = "\\\\";
   if (ch == '\r') chSt = "\\r";
   if (ch == '\n') chSt = "\\n";
   if (ch == '-') chSt = "\\-";
   if (ch == ' ') chSt = "\\ ";
   char sep = '+';
   if (ch == '+') sep = '*';
   if ( (key.getID() == key.KEY_TYPED) &&
// ***
// ???  ( (mod & ~(key.ALT_MASK | key.CTRL_MASK | key.SHIFT_MASK)) == 0 ) &&
// ***
        ( keyText.length() <= 1 ) &&
        ( ch != key.CHAR_UNDEFINED )
      ) {
     if (escapeUnicode && ((int)ch > 125))
       chSt = escapeGlyph(ch); // \\u1234 escape visible to the user
     return (key.isControlDown() ? "C-" : "")
       + (key.isAltDown() ? "A-" : "") + chSt;
     // luckily enough, ch is adjusted by shift (keyText is not)
   } // at most ctrl and alt, simple key stuff.
   else {
     // several modifiers, not sure if altgr is allowed!
     if (ch == ' ') chSt = "SPACE";
     if (ch == '\\') chSt = "BACK_SLASH";
     if (ch == '\n') chSt = "ENTER";
     return "\\k" + sep
       + (key.isShiftDown() ? "shift " : "")
       + (key.isControlDown() ? "control " : "")
       + (key.isMetaDown() ? "meta " : "")
       + (key.isAltDown() ? "alt " : "")
       + (key.isAltGraphDown() ? "altgr " : "")
       + (((mod & key.BUTTON1_MASK) > 0) ? "button1 " : "")
       + (((mod & key.BUTTON2_MASK) > 0) ? "button2 " : "")
       + (((mod & key.BUTTON3_MASK) > 0) ? "button3 " : "")
       + ((key.getID() == key.KEY_TYPED)  ? "typed " :
           ((key.getID() == key.KEY_PRESSED) ? "pressed " :
             ((key.getID() == key.KEY_RELEASED) ? "released " : "Xed? "
         ) ) )
       + ((key.getID() == key.KEY_TYPED) ?
         chSt : keyText.toUpperCase() /* must be upper case! */)
       + sep;
     // mods plus "typed CHAR" or "presed-or-released NAME"
     // (can be neither pressed nor released, but then it would be typed)
     // (for typed, name is always UNDEFINED)
   } // many modifiers
   // return "\\k+undescribeable key???+";
  } // keyToString


  /**
   * We override toString(), of course. The output is
   * an ISO-8859 version of all important object contents in
   * human readable form.
   */
  public String toString() {
    return "AssignObject("
      + "keys=" + exportKeys(ASCII_HUMAN)
      + ", "
      + "glyphs=" + exportString(ASCII_HUMAN)
      + ", "
      + "comments=" + getComments()
      + ")";
  } // toString


  /* *** */


  /**
   * Export the whole object in a certain format.
   * @param format One of the constant values of this class.
   * Some formats may not allow all features to be exported.
   * @return Returns a string version of this object, typically
   * one line that can be displayed or inserted into a file of
   * the requested format directly.
   * @throws UnsupportedOperationException Whenever some
   * feature cannot be exported, an exception is thrown (and
   * nothing is returned). The exception contains the error  message.
   */
  public String exportString(int format)
    throws UnsupportedOperationException {
    switch (format) {
      case UNICODE_HUMAN:
      case ASCII_HUMAN: { // some easy to read format
        return "keys=" + exportKeys(format) + ", "
          + "glyphs=" + exportGlyphs(format) + ", "
          + "comments=" + escapeString(comments,
            false /* no HTML */);
        // break;
      }
      case UNICODE_HTML:
      case ASCII_HTML:  { // also easy to read, highlighted
        return htmlHeader
          + "<font color=\"red\">keys=</font>"
          + exportKeys(
            (format == ASCII_HTML) ? ASCII_HUMAN : UNICODE_HUMAN)
          + "<font color=\"red\">, glyphs=</font>"
          + exportGlyphs(
            (format == ASCII_HTML) ? ASCII_HUMAN : UNICODE_HUMAN)
          + "<font color=\"red\">, comments=</font>"
            + escapeString(comments, (format == ASCII_HTML))
          + htmlFooter;
        // break;
      }
      case XGIM_FILE:
      case GIM_FILE:      { // for GUK IM and extended GUK IM
        return "bind \"" + exportKeys(format) + "\"  "
          + "send " + exportGlyphs(format)
          + ( (comments.length() == 0) ?
            "" : (" # " + escapeString(comments,
              false /* no HTML */)) );
        // maybe add "   keycap " + exportGlyhps(format) before comments?
        // break;
      }
      case U8_FILE:       { // for MPI IM (with space-separated alternatives)
        if (comments.length() != 0)
          throw new UnsupportedOperationException(
            "Cannot export comments to U8, please remove them first");
        return exportKeys(format) + "\t" + exportGlyphs(format);
        // break;
      }
      case YUDIT_FILE:    { // for Yudit
        return "\"" + exportKeys(format) + "="
          + exportGlyphs(format) + "\","
          + ( (comments.length() == 0) ?
            "" : ("// " + comments) ); // UTF-8 comments, by the way
        // break;
      }
      default:            { // other
        throw new UnsupportedOperationException("Unknown format " + format);
      }
    } // switch
    // return "exportString";
  } // exportString


  /**
   * Export the glyph part in a certain format.
   * @param format One of the constant values of this class.
   * Some formats may not allow all features to be exported.
   * @return Returns a string version of the glyph string,
   * for example enriched with highlighting or escaped to
   * avoid certain characters.
   * @throws UnsupportedOperationException Whenever some
   * feature cannot be exported, an exception is thrown (and
   * nothing is returned). The exception contains the error  message.
   */
  public String exportGlyphs(int format)
    throws UnsupportedOperationException {
    switch (format) {
      case UNICODE_HUMAN: { // some easy to read format
        return glyphs;
        // break;
      }
      case ASCII_HUMAN:   { // with \\u1234 for all non-ascii stuff
        return escapeString(glyphs, false /* no HTML */);
        // break;
      }
      case UNICODE_HTML:  { // also easy to read, highlighted
        return htmlHeader
          + escapeString(glyphs, true /* HTML */) + htmlFooter;
        // break;
      }
      case ASCII_HTML:    { // with \\u1234 for all non-ascii stuff
        return htmlHeader
          + escapeString(
            escapeString(glyphs, false /* no HTML */),
              true /* HTML */) + htmlFooter;
        // break;
      }
      case XGIM_FILE:       // with \\k+stuff+ escapes
      case GIM_FILE:      { // for GUK IM
        StringBuffer gb = new StringBuffer();
        for (int i = 0; i < glyphs.length(); i++) {
          gb.append("0x" + MenuHelpers.toHex(glyphs.charAt(i)));
            // no spaces, but 0x... prefixes. Weird format...
        } // for
        // question: why do we sometimes have both SEND and KEYCAP?
        return gb.toString();
        // break;
      }
      case U8_FILE:       { // for MPI IM (no escapes, pure UTF-8)
        if (glyphs.indexOf(" ") != -1)
          throw new UnsupportedOperationException(
            "U8 has space reserved for choices");
        if (glyphs.indexOf("\n") != -1)
          throw new UnsupportedOperationException(
            "U8 has return reserved");
        // tab, which separates keys from results, is probably okay.
        // kind of tricky to write this, as it wants all choices for
        // one key sequence to be bundled on one line!?
        return glyphs;
        // break;
      }
      case YUDIT_FILE:    { // for Yudit
        StringBuffer gb = new StringBuffer();
        for (int i = 0; i < glyphs.length(); i++) {
          char ch = glyphs.charAt(i);
          gb.append(MenuHelpers.toHex((int)ch));
          if (i != (glyphs.length() - 1)) gb.append(" ");
        } // for
        return gb.toString();
        // break;
      }
      default:            { // other
        throw new UnsupportedOperationException("Unknown format " + format);
      }
    } // switch
    // return "exportGlyphs";
  } // exportGlyphs


  /**
   * Export the key part in a certain format.
   * @param format One of the constant values of this class.
   * Some formats may not allow all features to be exported.
   * @return Returns a string version of the key sequence
   * in the particular file format requested.
   * @throws UnsupportedOperationException Whenever some
   * feature cannot be exported, an exception is thrown (and
   * nothing is returned). The exception contains the error  message.
   * Only GIM can handle Ctrl and Alt, and only XGIM can handle
   * even more complicated key events.
   */
  public String exportKeys(int format)
    throws UnsupportedOperationException {
    boolean needsXGIM = false;

    switch (format) {
      case UNICODE_HUMAN: { // some easy to read format
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          String keyEt = keyToString(keyE, false);
          gb.append(keyEt);
        } // while
        /** TODO?: Sanity checks (control chars)? */
        return gb.toString();
        // break;
      }
      case ASCII_HUMAN:   { // with \\u1234 for all non-ascii stuff
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          String keyEt = keyToString(keyE, true /* escape Unicode */);
          gb.append(keyEt);
        } // while
        return escapeString(gb.toString(), false /* no HTML */);
        // break;
      }
      case UNICODE_HTML:  { // also easy to read, highlighted
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          String keyEt = keyToString(keyE,
            false /* leave Unicode as is */);
          gb.append(keyEt); // do not yet escape anything!
        } // while
        return htmlHeader
          + htmlizer.highlight(gb.toString()) // also escapes HTML!
          + htmlFooter;
        // break;
      }
      case ASCII_HTML:    { // with \\u1234 for all non-ascii stuff
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          String keyEt = keyToString(keyE, true /* escape Unicode */);
            // uses \\u1234 style escapes for Unicode
          gb.append(keyEt); // no further escaping
        } // while
        return htmlHeader
          + htmlizer.highlight(gb.toString()) // also escapes HTML!
          + htmlFooter;
        // break;
      }
      case XGIM_FILE:     { // with \\k+stuff+ escapes
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          String keyEt = keyToString(keyE, true /* escape Unicode */);
          gb.append(keyEt);
        } // while
        /** TODO?: Sanity checks / escapes? */
        return gb.toString();
        // break;
      }
      case GIM_FILE:      { // for GUK IM
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          String keyEt = keyToString(keyE, true /* escape Unicode */);
          if (keyEt.startsWith("\\k"))
            throw new UnsupportedOperationException(
              "GIM can only handle typed keys, Ctrl and Alt");
          gb.append(keyEt);
        } // while
        return gb.toString();
        // break;
      }
      case U8_FILE:       { // for MPI IM (no escapes, pure UTF-8)
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          char ch = keyE.getKeyChar();
          if ((keyE.getID() != keyE.KEY_TYPED) ||
              (ch == '\t') || (ch == '\n') || (ch == ' '))
            throw new UnsupportedOperationException(
              "U8 can only handle typed keys,"
              + " tab, space and enter reserved");
            // tab separates key column from result column
            // space separates result choices
            // enter separates entries
          gb.append(ch); // any UTF-8 possible
        } // while
        return gb.toString();
        // break;
      }
      case YUDIT_FILE:    { // for Yudit
        StringBuffer gb = new StringBuffer();
        Iterator iter = keys.iterator();
        while (iter.hasNext()) {
          KeyEvent keyE = (KeyEvent)iter.next();
          if (keyE.getID() != keyE.KEY_TYPED)
            throw new UnsupportedOperationException(
              "Yudit can only handle typed keys");
          char ch = keyE.getKeyChar();
          if ( ((ch >= '0') && (ch <= '9')) || (ch < ' ') ||
            (ch == '=') || (ch == '|') || (ch == ' ') ||
            (ch == '/')) {
            // space and = must be escaped, and there must be
            // whitespace between / and / solving that by escaping.
            // digits must be escaped as any numbers count as char
            // codes (hex byte 0x1f, dec or \\octal).
            if ((int)ch < 16) { gb.append(" 0x0"); }
            else gb.append(" 0x");
            gb.append(Integer.toHexString((int)ch) + " ");
          } else if ((ch == '\"') || (ch == '\\') || (ch == '+') ) {
            // (do not produce \= for =, but allow it on input)
            gb.append("\\" + ch);
          } else {
            gb.append(ch);
            // "throwing exception if non-ASCII" is not needed
          } // if
        } // while
        return gb.toString();
        // break;
      }
      default:            { // other
        throw new UnsupportedOperationException("Unknown format " + format);
      }
    } // switch
    // return "exportKeys";
  } // exportKeys


  /* *** */


  /**
   * Helper method to replace all \\u1234 by their glyphs
   * and all \\\\ and \\r and \\n and \\- as well. Leaves
   * \\octal and \\k style escapes as is.
   * @param str A string possibly containing escapes.
   * @return A string with only octal and XGIM escapes left,
   * which is possibly shorter than the input string.
   */
  public static String unEscape(String str) {
    StringBuffer buf = new StringBuffer();
    for (int i = 0; i < str.length(); i++) {
      if ( (str.charAt(i) != '\\') || ((i+1) == str.length()) ) {
        buf.append(str.charAt(i)); // normal char or close to end
      } else {
        i++;
        char ch = str.charAt(i); // which escape?
        if        (ch ==  'n') { buf.append('\n'); // newline
        } else if (ch ==  'r') { buf.append('\r'); // carriage return
        } else if (ch == '\\') { buf.append('\\'); // backslash
        } else if (ch ==  'k') { buf.append("\\k"); // PRESERVE \\k
          // (\\k is needed for \\k+key description+ in XGIM)
        } else if ((ch >= '0') && (ch <= '7')) { buf.append("\\" + ch);
          // PRESERVE \\123 octal style escapes
        } else if (ch == 'u') { // unicode
          if ((i+4) >= str.length()) { // too close to end
            buf.append("\\u"); // pass through then
          } else {
            try {
              buf.append((char) Integer.parseInt(
                str.substring(i+1, i+5), 16 )); // parse hex value as glyph
              i += 4; // skip hex value string
            } catch (NumberFormatException nfe) {
              buf.append("\\\\u-ERROR-");
            } // try / catch
          }
        } else { buf.append(ch); // \\ something is something, for now
        } // end of backslash if chain
      } // backslash
    } // for
    return buf.toString();
  } // unEscape


  /**
   * Helper method to replace all numbers by their glyphs
   * (hex, octal and dec) and removes whitespace. Needs other
   * stuff to be parsed before by unEscape.
   * This is used for the particular way in which Yudit
   * allows Unicode glyphs to be escaped. Note that Yudit does
   * NOT allow Java style \\u12ab escapes.
   * @param A string with text and numbers, separated by
   * whitespace.
   * @return The translated string, where all numbers are
   * replaced by their corresponding glpyhs and all whitespace
   * that was in the input string has been removed.
   */
  public static String parseYuditEscapes(String str) {
    StringBuffer buf = new StringBuffer();
    StringTokenizer strtok = new StringTokenizer(str," ");
    while (strtok.hasMoreTokens()) {
      String token = strtok.nextToken().trim(); // remove whitespace
      try {
        if (token.startsWith("\\")) {
          buf.append((char) Integer.parseInt(token.substring(1), 8));
        } else if (token.startsWith("0x")) {
          buf.append((char) Integer.parseInt(token.substring(2), 16));
        } else if ((token.charAt(0) >= '0') && (token.charAt(0) <= '9')) {
          buf.append((char) Integer.parseInt(token, 10));
        } else {
          buf.append(token);
        }
      } catch (NumberFormatException nfe) {
        buf.append(token); // no numerical escape, we guessed wrong.
        // should probably check better and throw an exception when the
        // data looks good but is not. We just treat it as non-number now.
      } // catch
    } // while
    return buf.toString();
  } // parseYuditEscapes


  /**
   * Helper method to translate a String into something that
   * will type that String, in form of a KeyEvent Vector.
   * @param str An Unicode string for which key events will be
   * generated.
   * @return A List of KeyEvent objects that, when sent to
   * some target in sequence, will cause the input string
   * to be &quot;typed in&quot;.
   */
  public static List stringToKeys(String str) {
    List buf = new Vector();
    for (int i = 0; i < str.length(); i++) {
      // buf.add(KeyStroke.getKeyStroke(new Character(
      //   str.charAt(i)), 0 /* modifiers like shift */));
      buf.add(new KeyEvent(canvas /* source component */,
        KeyEvent.KEY_TYPED, System.currentTimeMillis() /* time */,
        0 /* modifiers */,
        KeyEvent.VK_UNDEFINED /* typed char, not certain key */,
        str.charAt(i) /* generated char */ ));
    }
    return buf; // Vector with simple "type that string" keystrokes
  } // stringToKeys


  /**
   * Helper method to create simple typed KeyEvents.
   * Quite useful wrapper that generates a key event for a given
   * glyph. Note that neither the wrapper nor Java tries to figure
   * out a corresponding key code!
   * @param ch The character which the event should type.
   * @param modifiers The modifiers, as defined for KeyEvent objects,
   * representing things like shift states.
   * @return A key event corresponding to the requested char.
   * It will be timestamped to the current time and originate from
   * a dummy canvas. The key code will be VK_UNDEFINED.
   * @throws UnsupportedOperationException If no translation into
   * a key event can be found, an exception is thrown (which will
   * contain a simple error message).
   */
  public static KeyEvent typedKeyEvent(char ch, int modifiers)
    throws UnsupportedOperationException {
          KeyEvent keyE = new KeyEvent(canvas /* source component */,
            KeyEvent.KEY_TYPED, System.currentTimeMillis() /* time */,
            modifiers,
            KeyEvent.VK_UNDEFINED /* typed char, not certain key */,
            ch /* generated char */ );
          if (keyE == null) {
           throw new UnsupportedOperationException(
            "Untranslateable simple KeyEvent ["
               + ch +"], mod=" + modifiers);
          } else {
            DebugEditIM.println(4, "(SIMPLE) event=" + keyE);
            return keyE;
          }
  } // typedKeyEvent


  /**
   * Helper method to generate a List of KeyEvents from a
   * String describing them. Only accepts xgim escapes when xgim
   * is true. Central point of string to event list translation:
   * This does the chunking, while translation of each chunk into
   * a key event is done elsewhere.
   * <i>Strange: KeyEvents only allow Shift/Ctrl/Alt/Meta,
   * but KeyStrokes also allow buttons and AltGr.</i>
   * All GIM parsing should match GIM LocaleHandler style in general.
   * Uses unEscape for simple and \\u1234 escapes (!?).
   * @param str A string of glyphs, possibly enriched with special
   * escapes like the C- prefix for Ctrl. See the other parsing
   * methods for details.
   * @param xgim Only when this is true, XGIM style escapes are
   * allowed.
   * @throws UnsupportedOperationException When the string cannot
   * be parsed or contains XGIM escapes while xgim is false, an
   * exception with a detailed error message is thrown.
   */
  public static List escapedStringToKeys(String str, boolean xgim) {
    List buf = new Vector();
    int modifiers = 0;
    str = unEscape(str);
    try {
      while (str.length() > 0) {
        int i = 0; // use first char if no escape active
        if (str.startsWith("C-")) { // CTRL
          modifiers |= KeyEvent.CTRL_MASK;
          i = 2;
        } else if (str.startsWith("M-")) { // ALT aka. META
          // (GIM calls ALT META, but a META would exist, too...)
          modifiers |= KeyEvent.ALT_MASK;
          i = 2;
        } else if (str.startsWith("A-")) { // ALT aka. META
          // (sercrol.gim uses A-, but GIM seems to parse only M-)
          modifiers |= KeyEvent.ALT_MASK;
          i = 2;
        } else if (str.startsWith("Alt-")) { // ALT
          // (crlurdu.gim uses this, but GIM does not parse it properly)
          modifiers |= KeyEvent.ALT_MASK;
          i = 4;
        } else if (str.startsWith("G-")) { // ALTGR: see above!?
          modifiers |= KeyEvent.ALT_GRAPH_MASK;
          i = 2;
        } else if (str.startsWith("\\k") && xgim) { // XGIM special escape
          char marker = str.charAt(2);
          String descr =
            str.substring(3 /* skip \\k* */, str.indexOf(marker, 4));
          KeyStroke kstrok = KeyStroke.getKeyStroke(descr);
            // allows very complex KeyEvents to be selected, see the
            // KeyEvent documentation.
          if (kstrok == null) {
            throw new UnsupportedOperationException(
              "Unparseable KeyEvent description <" + descr +">");
          } else {
            DebugEditIM.println(2, "event from: " + kstrok);
            KeyEvent keyE = new KeyEvent(canvas /* source component */,
              (kstrok.isOnKeyRelease() ? KeyEvent.KEY_RELEASED :
                (kstrok.getKeyCode() == KeyEvent.VK_UNDEFINED ?
                   KeyEvent.KEY_TYPED : KeyEvent.KEY_PRESSED)),
              System.currentTimeMillis() /* time */,
              kstrok.getModifiers(), kstrok.getKeyCode(),
              kstrok.getKeyChar());
            if (keyE == null) {
             throw new UnsupportedOperationException(
              "Untranslateable KeyEvent description <" + descr +">");
            } else {
              DebugEditIM.println(2, "(XGIM) event=" + keyE);
              buf.add(keyE);
            }
          } // kstrok non-null
          i = str.indexOf(marker, 4) + 1;
        } // \\k+...+ escape
        else if ( str.startsWith("\\") && (str.length() > 1) ) {
          char escCh = str.charAt(1); // default is pass through
          if (escCh == 'n') escCh = '\n'; // newline
          if (escCh == 'r') escCh = '\r'; // carriage return
          if (escCh == 't') escCh = '\t'; // tabulator
          KeyEvent keyE = typedKeyEvent(escCh, modifiers);
          if (keyE != null) { buf.add(keyE); }
          i = 2; // consume
          DebugEditIM.println(2, "SHORT escape: \\" + str.charAt(1));
        } // simple escape
        else {
          // buf.add(KeyStroke.getKeyStroke(new Character(
          //   str.charAt(i)), modifiers));
          KeyEvent keyE = typedKeyEvent(str.charAt(i), modifiers);
          if (keyE != null) { buf.add(keyE); }
          i++; // CONSUME that char
          modifiers = 0;
          // GIM only allows C- OR M- and calls trim() after processing
          // either of it. We do NOT simulate that here. Better modify
          // GIM to adapt to our flexibility (maybe A-, Alt- and G-, too).
          // (in LocaleHandler, Key, and possibly elsewhere)
        } // else
        // System.out.println("old: " + str);
        str = str.substring(i);
        // System.out.println("new: " + str);
      } // while
    } catch (Throwable th) {
      DebugEditIM.println(1, "Key escape parse error with <" + str + ">");
      // th.printStackTrace();
      throw new UnsupportedOperationException("Key escape parse error");
    } // catch
    return buf; // Vector with simple "type that string" keystrokes
  } // escapedStringToKeys


  /* *** */


  /**
   * Main method to read one line of data from a file.
   * Import whole entry in given format,
   * including COMMENTS. Cannot import HTML formats.
   * Cannot parse alternatives (GIM digit toggle, MPI U8 choicelists),
   * so you have to split them up yourself before calling importString.
   * Cannot parse headers or comment-only lines either, so you must
   * throw away (or store elsewhere) comments before calling this.
   * @param str A string, typically a line of text from the body of
   * a keymap file in the selected format.
   * @param format One of the format constants of this class,
   * selecting which file format the parser has to process.
   * @throws UnsupportedOperationException Whenever something turns
   * out to be unparseable or unstorable here or in one of the helper
   * methods, an exception with a detailed error message is thrown.
   */
  public void importString(String str, int format)
    throws UnsupportedOperationException {
    switch (format) {
      case UNICODE_HUMAN:   // some easy to read format
      case ASCII_HUMAN:   { // same, but \\u1234 escapes allowed
        if (format == ASCII_HUMAN) str = unEscape(str);
        // keys=... glyphs=... comments=...
        int ki = str.indexOf("keys=");
        int gi = str.indexOf("glyphs=");
        int ci = str.indexOf("comments=");
        if ( (ki < 0) || (gi < ki) || ((ci >= 0) && (ci < gi)) )
          throw new UnsupportedOperationException("Invalid input string");
        importKeys(str.substring(ki + "keys=".length(), gi-1), format);
        // we do not do additional trim(), in case the use MEANT space.
        importGlyphs(str.substring(gi + "glyphs=".length(),
           (ci < 0) ? str.length() : (ci-1) ), format);
        if (ci >= 0) { // if comments found...
          comments = str.substring(ci + "comments=".length()); // all rest
        }
        break;
      }
      case UNICODE_HTML:    // cannot import HTML
      case ASCII_HTML:    { // cannot import HTML
        throw new UnsupportedOperationException("Cannot parse HTML!");
      }
      case XGIM_FILE:       // with \\k+stuff+ escapes
      case GIM_FILE:      { // for GUK IM
        if (str.startsWith("inputmethod ") ||     // ... \"Lang\" \"Style\"
            str.startsWith("option backspace") || // for syllable languages
            str.startsWith("option digits ") ||   // \"keys\" [national]
              // (defines a key to toggle national / ASCII digits)
            str.startsWith("option ") // e.g. option hanfont japanese
           )
          throw new UnsupportedOperationException("forgotten GIM header?");
        if (!str.startsWith("bind "))
          throw new UnsupportedOperationException("forgotten GIM comment?");
          // comments usually are # something. Also, empty lines occur.
        str = str.substring("bind ".length()).trim();
        int nextPart = str.indexOf("send ");
        if (nextPart < 0) { nextPart = str.indexOf("digit "); }
        if (nextPart < 0) {
          nextPart = str.indexOf("resetorsend ");
          if (nextPart > 0)
            throw new UnsupportedOperationException(
              "GIM resetorsend not processed yet!");
            // would force state machine back to initial if it is not
            // already there. If it is, it sends the string.
        }
        if (nextPart < 0)
            throw new UnsupportedOperationException(
              "Cannot parse this GIM line");
        String leftPart = str.substring(0, nextPart).trim();
        importKeys(leftPart.substring(1, leftPart.length() - 1), format);
          // do not import enclosing quotes, just import the key definition.
        importGlyphs(str.substring(nextPart), format);
          // import glyphs along with their keywords
        break;
      }
      case U8_FILE:       { // for MPI IM (no escapes, pure UTF-8)
        int tabIndex = str.indexOf('\t');
        if (tabIndex < 0)
          throw new UnsupportedOperationException("Must have one tab in U8");
        if (str.indexOf(' ',tabIndex) >= 0)
          throw new UnsupportedOperationException(
            "No U8 choicelists accepted yet");
          // we refuse to parse entries with choice lists for now!
          // (space would separate the entries, see MPI initializeHash)
        importKeys(str.substring(0, tabIndex), format);
        importGlyphs(str.substring(tabIndex + 1), format);
        // U8 does never contain comments
        // format is: keys\tstring string string
        break;
      }
      case YUDIT_FILE:    { // for Yudit
        // format is: "escaped keys=escaped glyphs", // comment
        // ignores whitespace. 0xnumber allowed only at beginning for keys,
        // glyphs are usually escaped as 0xnumber, separated by " ".
        // quotes are escaped as \\". All whitespace is skipped! All
        // numbers of 123, \\123 and 0x123 style are treated as char
        // numbers! So we tokenize by whitespace, check for numbers and \\.
        //
        // keymaps that start with "foo+bar+baz" will contain some
        // "begin foo" / "end foo" / "begin bar" ... areas for RELATIVE
        // output numbering (up to 5 sections). Not supported.
        // lines that start with # are syntactically wrong comments,
        // but people sometimes use them instead of // ...
        if ( (str.indexOf('+') > 0) &&
             (str.indexOf('=') < 0) &&
             (str.indexOf('+') < str.indexOf("//"))
           ) // not commented-out "+" and no "=" sounds like "foo+bar"
          throw new UnsupportedOperationException(
          "Additive/Sectioned Yudit kmap files are NOT supported!");
          // not checking for "begin ..." and "end ..." because the
          // "foo+bar"... header will trigger an error before anyway.
        if ( (str.indexOf('#') > 0) &&
             ( (str.indexOf('\"') < 0) ||
               (str.indexOf('\"') > str.indexOf('#'))
           ) )
          throw new UnsupportedOperationException(
          "Illegal style to comment out a Yudit line");
        int startq = str.indexOf('\"');
        int endq = str.indexOf('\"', startq + 1);
        if ((startq < 0) || (endq <= startq))
          throw new UnsupportedOperationException(
            "Empty or unparseable Yudit line");
        if ((startq > 0) && (str.charAt(startq - 1) == '\\'))
          throw new UnsupportedOperationException("Fake Yudit opening quote!");
        while ((endq > 0) && (str.charAt(endq - 1) == '\\')) {
          endq = str.indexOf('\"', endq + 1); // search for real quote
        }
        if (endq < 0)
          throw new UnsupportedOperationException("Fake Yudit closing quote!");
        // ignore whether closing quote is followed by comma
        int startc = str.indexOf("//", endq);
        if (startc > 0) // comments found?
          comments = str.substring(startc + "//".length());
        str = str.substring(startq + 1, endq); // only part between quotes
        int sep = str.indexOf('=', startq);
        // int lastsep = 0;
        while ((sep > 0) && (str.charAt(sep - 1) == '\\')) { // escaped?
          // lastsep = sep;
          sep = str.indexOf('=', sep+1); // search on
        }
        if ((sep > 0) && (sep < str.length())) {
          importKeys(str.substring(0, sep), format);
          importGlyphs(str.substring(sep+1), format);
        } else {
          throw new UnsupportedOperationException(
            "No unescaped '=' found in line!");
        }
        break;
      }
      default:            { // other
        throw new UnsupportedOperationException("Unknown format " + format);
      }
    } // switch
  } // importString


  /**
   * Import glyphs (no multiple choice! no GIM digit toggle!)
   * in given format.
   * @param str A string containing only one glyph string and no
   * comments, in the selected format. When there are choices
   * like U8 choice lists or GIM digit (choice between 2), you
   * have to split them into several single values yourself
   * before calling this.
   * @param format One of the format constants of this class,
   * selecting which file format the parser has to process.
   * @throws UnsupportedOperationException Whenever something turns
   * out to be unparseable or unstorable here or in one of the helper
   * methods, an exception with a detailed error message is thrown.
   */
  public void importGlyphs(String str, int format)
    throws UnsupportedOperationException {
    switch (format) {
      case UNICODE_HUMAN:   // some easy to read format
      case ASCII_HUMAN:   { // same, but \\u1234 escapes allowed
        if (format == ASCII_HUMAN) str = unEscape(str);
          // this might cause double unEscape but should not
          // harm. If it does, re-escape parts or do not un-escape
          // them in importString.
        glyphs = str; // not checking anything else. No trim()!
        break;
      }
      case UNICODE_HTML:    // cannot import HTML
      case ASCII_HTML:    { // cannot import HTML
        throw new UnsupportedOperationException("Cannot parse HTML!");
      }
      case XGIM_FILE:       // only keys differ to GIM...
      case GIM_FILE:      { // for GUK IM
        // styles:
        //          send 0x1234 keycap 0x1234
        // where 0x1234 is always 4 digits, no whitespace between.
        // possible to send several glyphs per keystroke.
        // keycap is usually same as send, and optional.
        //         digit 0x1234 0x1234 (ascii, national)
        // DIGIT is controlled by: option digits \"keys\" national
        // (where keys can toggle between ascii and default national)
        // (mentioning a default is optional)
        // possible header: inputmethod \"Language\" \"style\"
        // possible header: option backspace
        // possible special: bind \"\\ \" resetorsend 0x0020
        // (space forces to initial state if not already there)
        if (str.indexOf("send ") >= 0)
          str = str.substring(str.indexOf("send ") + "send ".length()).trim();
          // strip leading send, as it is our default.
        int keycapi = str.indexOf("keycap ");
        if (keycapi >= 0) {
          String leftPart = str.substring(0, keycapi).trim();
          if (leftPart.equals(
            str.substring(keycapi + "keycap ".length()).trim())) {
            str = leftPart;
          } else {
            throw new UnsupportedOperationException(
              "cannot understand GIM send that differs from keycap!");
          }
        } // keycap found
        if (str.indexOf("digit") >= 0) {
          str = str.substring(str.indexOf("digit ") + "digit ".length()).trim();
          if (str.indexOf(' ') > 0) {
            throw new UnsupportedOperationException(
              "cannot process GIM national / ascii digit alternatives!");
          } // digit 0x1234 0x5678
          // normal digit is just a glyph for us now
        } // digit
        str = str.trim();
          // only does something if leading or trailing whitespace exists
        StringBuffer buf = new StringBuffer();
        while ((str.startsWith("0x")) && (str.length() >= 6)) {
          try {
            buf.append((char) Integer.parseInt(str.substring(2, 6), 16));
            str = str.substring(6).trim(); // no whitespace between,
          } catch (NumberFormatException nfe) {
            throw new UnsupportedOperationException("Unparseable number");
          }
          // so trim is probably not needed
        } // while
        glyphs = buf.toString();
        break;
      }
      case U8_FILE:       { // for MPI IM (no escapes, pure UTF-8)
        if (str.indexOf(' ') >= 0)
          throw new UnsupportedOperationException("Space is reserved in U8");
        glyphs = str; // actually, multiple glyph sequences are allowed
          // in the FILE, when they are separated by spaces
          // we COULD add an UTF-8 decoder here... or better, open the
          // file in UTF-8 mode when loading a file.
        break;
      }
      case YUDIT_FILE:    { // for Yudit (has several escapes, see above)
        glyphs = parseYuditEscapes(unEscape(str));
        break;
      }
      default:            { // other
        throw new UnsupportedOperationException("Unknown format " + format);
      }
    } // switch
  } // importGlyphs


  /**
   * Import keys from string in given format,
   * including COMMENTS if at end of line.
   * @param str A string containing a key sequence description
   * in the selected format. No keywords or quotes are allowed, and
   * this is not recommended to use this for FILES. However,
   * this IS recommended for importing user input that should
   * become a new key sequence. Notice that no error will be
   * thrown if you accidentally include additional text like
   * keywords or quotes: It will become part of the key sequence.
   * @param format One of the format constants of this class,
   * selecting which file format the parser has to process.
   * @throws UnsupportedOperationException Whenever something turns
   * out to be unparseable or unstorable here or in one of the helper
   * methods, an exception with a detailed error message is thrown.
   */
  public void importKeys(String str, int format)
    throws UnsupportedOperationException {
    String keyEtr = null;
    switch (format) {
      case UNICODE_HUMAN:
      case ASCII_HUMAN:   { // some easy to read format
        if (format == ASCII_HUMAN) str = unEscape(str);
        keys = escapedStringToKeys(str, true /* XGIM */);
        break;
      }
      case UNICODE_HTML:    // cannot import HTML
      case ASCII_HTML:    { // cannot import HTML
        throw new UnsupportedOperationException("Cannot parse HTML!");
      }
      case XGIM_FILE:     { // with \\k+stuff+ escapes
        keys = escapedStringToKeys(str, true /* XGIM */);
        break;
      }
      case GIM_FILE:      { // for GUK IM
        keys = escapedStringToKeys(str, false /* only GIM */);
        break;
      }
      case U8_FILE:       { // for MPI IM (no escapes, pure UTF-8)
        keyEtr = str;
        break;
      }
      case YUDIT_FILE:    { // Yudit has several escapes, see above.
        keyEtr = parseYuditEscapes(unEscape(str.trim()));
        break;
      }
      default:            { // other
        throw new UnsupportedOperationException("Unknown format " + format);
      }
    } // switch
    if (keyEtr != null)
      keys = stringToKeys(keyEtr); // flat obvious key definition
  } // importKeys


}// public class AssignObject


