A simple expression parser

First the basic requirements. You have an “arbitrary” delimited string which you need to parse. Whitespace counts. So for example, you might have something like:

X:456 T=900; 123456

and you want to be able to express this in by an expression like:

X:${x-value} T:${t-value}; ${other}

So, giving your parser this pattern where keys are represented like ${this} and then feeding it the first string above you want to generate a key, value map (java.util.Map) where, for this example, x-value = 456, t-value = 900 and other = 123456. Note that this is somewhat backwards from something you might see in a template language like Velocity. It doesn’t have to be very forgiving about bad patterns or pieces of text yet. Got it?

Here’s my sample code:

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

/**
  * A simple expression parser
  * Copyright 2004 J Aaron Farr
  * 
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
  *  You may obtain a copy of the License at
  * 
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
 */
public class ExpressionParser {

  protected static final String TOKEN_START = "${";
  protected static final String TOKEN_END = "}";

  // m_pattern holds the tokenized pattern string
  public ArrayList m_pattern = new ArrayList();

  // m_keyLocations holds Integers which identify indices in 
  // m_pattern which are ${keys}
  public ArrayList m_keyLocations = new ArrayList();

  public ExpressionParser() {
  }

  /**
   * set and parse the pattern string
   */
  public void setPattern(String pattern) {
    int cursor = 0;
    while (cursor < pattern.length()) {
      int[] indices = getNextTokenLocation(pattern, cursor);
      if (indices[0] != -1) {
        // if first element is a token
        if (indices[0] == 0) {
          m_pattern.add(extractToken(pattern, indices));
          m_keyLocations.add(new Integer(m_pattern.size() - 1));
          cursor = indices[1] + 1;
        }
        // just a regular token 
        else {
          m_pattern.add(pattern.substring(cursor, indices[0]));
          m_pattern.add(extractToken(pattern, indices));
          m_keyLocations.add(new Integer(m_pattern.size() - 1));
          cursor = indices[1] + 1;
        }
      }
      // add any left over characters in the pattern past the last key
      else if (cursor != pattern.length()) {
        m_pattern.add(pattern.substring(cursor + 1));
        cursor = pattern.length();
      }
    }
  }

  /**
   * parse the string of text against the pattern
   * @return a map of key value pairs
   */
  public Map parse(String text) {
    Map map = new HashMap();
    int cursor = 0;
    for (int i = 0; i < m_keyLocations.size(); i++) {
      int keyIndex = ((Integer) m_keyLocations.get(i)).intValue();
      // key at start of pattern
      if (keyIndex == 0) {
        String key = (String) m_pattern.get(0);
        // make sure there's something else in the pattern
        if (m_pattern.size() > 1) {
          String next = (String) m_pattern.get(1);
          int nextIndex = text.indexOf(next);
          String value = text.substring(0, nextIndex);
          map.put(key, value);
          cursor = nextIndex;
        }
        // else pattern is a single key
        else {
          map.put(key, text);
          break;
        }
      }
      // else a normal key surrounded by tokens
      else {
        String key = (String) m_pattern.get(keyIndex);
        String before = (String) m_pattern.get(keyIndex - 1);
        // check if there are tokens after this key
        if (keyIndex + 1 < m_pattern.size()) {
          String after = (String) m_pattern.get(keyIndex + 1);
          int start = text.indexOf(before, cursor) + before.length();
          int end = text.indexOf(after, start);
          String value = text.substring(start, end);
          map.put(key, value);
          cursor = end;
        }
        // the key is at the end of the pattern
        else {
          int start = text.indexOf(before, cursor) + before.length();
          String value = text.substring(start);
          map.put(key, value);
          break;
        }
      }
    }
    return map;
  }

  /**
   * Take the values in the map and use them to replace the
   * keys in the pattern
   */
  public String parse(Map map) {
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < m_pattern.size(); i++) {
      String token = (String) m_pattern.get(i);
      if (m_keyLocations.contains(new Integer(i)))
        sb.append(map.get(token));
      else
        sb.append(token);
    }
    return sb.toString();
  }

  // returns indices of the next key token
  protected int[] getNextTokenLocation(String pattern, int index) {
    int[] indices = new int[] { -1, -1 };
    indices[0] = pattern.indexOf(TOKEN_START, index);
    indices[1] = pattern.indexOf(TOKEN_END, indices[0]);
    return indices;
  }

  // extracts the key name
  protected String extractToken(String pattern, int[] indices) {
    return pattern.substring(indices[0] + TOKEN_START.length(), indices[1]);
  }

  /**
   * simple main test.
   * arg[0] = the pattern
   * arg[1] = the text to run it against
   */
  public static void main(String[] args) {
    ExpressionParser p = new ExpressionParser();
    p.setPattern(args[0]);
    Map map = p.parse(args[1]);
    System.out.println(map);
    System.out.println(p.parse(map));
  }

}

Well, what do you think? Could regex have solved my problem easier? Could I have solved the problem easier (or better) in Java? I have that O’Reilly book on Mastering Regular Expressions that I’ve been meaning to read…