使用OpenNLP进行分词及词性标注

1、下载OpenNLP
http://opennlp.apache.org/maven-dependency.html

2、下载模型文件
http://opennlp.sourceforge.net/models-1.5/

3、编码进行分词并标记

package com.neohope.opennlp.test;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;

import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.cmdline.postag.POSModelLoader;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

public class TestIt {

	@SuppressWarnings("deprecation")
	public static void POSTag() throws IOException {
		POSModel model = new POSModelLoader()
				.load(new File("en-pos-maxent.bin"));
		PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
		POSTaggerME tagger = new POSTaggerME(model);

		String input = "Don't ever let somebody tell you you can't do something, not even me. "
				+ "You got a dream, you gotta protect it. "
				+ "People can’t do something themselves, they wanna tell you you can’t do it. "
				+ "If you want something, go get it. " + "Period.";
		ObjectStream<String> lineStream = new PlainTextByLineStream(
				new StringReader(input));

		perfMon.start();
		String line;
		while ((line = lineStream.read()) != null) {

			String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE
					.tokenize(line);
			String[] tags = tagger.tag(whitespaceTokenizerLine);

			POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
			System.out.println(sample.toString());

			perfMon.incrementCounter();
		}
		perfMon.stopAndPrintFinalResult();
	}

	public static void main(String[] args) throws IOException {
		POSTag();
	}
}

4、输出结果

Don't_NNP ever_RB let_VB somebody_NN tell_VB you_PRP you_PRP can't_MD do_VB something,_RB not_RB even_RB me._RBR You_PRP got_VBD a_DT dream,_NN you_PRP gotta_VBP protect_VB it._PRP People_NNS can’t_MD do_VB something_NN themselves,_, they_PRP wanna_MD tell_VB you_PRP you_PRP can’t_MD do_VB it._PRP If_IN you_PRP want_VBP something,_NN go_VB get_VB it._PRP Period._.

Leave a Reply

Your email address will not be published. Required fields are marked *

*