Monday, June 15, 2009

XDXF Parser library

I created small library to parse dictionaries stored in XDXF format. API is event based and you can listen to events (dictionary, article) while parser is parsing.
Whole dictiary is not loaded to memory by default, so you can parse large dictionary with small memory footprint. Project is hosted on kenai.com. Binary packages will be available soon, but source code is available in kenai browser.

API usage example:

import eu.hlavki.xdxf.parser.DefaultXDXFParser;
import eu.hlavki.xdxf.parser.ParseException;
import eu.hlavki.xdxf.parser.XDXFParser;
import java.io.IOException;
import java.io.InputStream;

public class Sample {

public static void main(String[] args) {
InputStream in = null;
try {
XDXFParser parser = new DefaultXDXFParser();
DictionaryListener listener = new DictionaryListener();
parser.addXDXFEventListener(listener);
in = Sample.class.getResourceAsStream("/test-dict.xdxf");
long startTime = System.currentTimeMillis();
parser.parse(in);
long endTime = System.currentTimeMillis();
System.out.println("Dictionary: " + listener.getDictionary());
System.out.println("Word Count: " + listener.getArticleCount());
System.out.println("Parse time: " + (endTime - startTime) + " ms");
} catch (ParseException e) {
e.printStackTrace();
} finally {
try {
if (in != null) in.close();
} catch (IOException e) {
// should never happened
}
}
}
}

Listener sample code:

import eu.hlavki.xdxf.parser.event.XDXFArticleEvent;
import eu.hlavki.xdxf.parser.event.XDXFDictionaryEvent;
import eu.hlavki.xdxf.parser.event.XDXFEventListener;
import eu.hlavki.xdxf.parser.model.XDXFDictionary;

public class DictionaryListener implements XDXFEventListener {

private int articleCount = 0;
private XDXFDictionary dictionary;

public void onDictionary(XDXFDictionaryEvent evt) {
this.dictionary = evt.getSource();
}

public void onDictionaryChange(XDXFDictionaryEvent evt) {
this.dictionary = evt.getSource();
}

public void onArticle(XDXFArticleEvent evt) {
articleCount++;
}

public int getArticleCount() {
return articleCount;
}

public XDXFDictionary getDictionary() {
return dictionary;
}
}

No comments: