这是一个使用Stanford NLP中的SemgrexPattern
的非常简单的示例。
我不明白为什么它找不到与{lemma:/eat/}
匹配的内容,却找不到与{word:/eats/}
匹配的内容。我使用LemmaAnnotation
类来获取动词“吃”的引理,即“吃”。
谢谢您的帮助 :)
package Project;
import java.io.File;
import java.util.Scanner;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
public class SemgrexDemo {
public static void main(String[] args) throws FileNotFoundException {
String treeString = "(ROOT (S (NP (NNP John)) (VP (VBZ eats) (NP (NN pizza))) (. .)))";
Tree tree = Tree.valueOf(treeString);
SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
TreebankLangParserParams params = new EnglishTreebankParserParams();
GrammaticalStructureFactory gsf = params.treebankLanguagePack().grammaticalStructureFactory(params.treebankLanguagePack().punctuationWordRejectFilter(), params.typedDependencyHeadFinder());
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
System.err.println(graph);
SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<dobj=reln {lemma:/eat/}=B");
SemgrexMatcher matcher = semgrex.matcher(graph);
while (matcher.find()) {
System.err.println(matcher.getNode("A") + " <<dobj " + matcher.getNode("B"));
}
}
}
最佳答案
将树字符串解析为Tree对象时,引词不会自动添加到标记中,因此SemanticGraph
中所有节点的lemma属性为null
,因此{lemma:/eat/}
与任何节点都不匹配。
您可以使用lemma(String word, String pos)
类的Morphology
方法添加引词:
public static void main(String[] args) throws FileNotFoundException {
String treeString = "(ROOT (S (NP (NNP John)) (VP (VBZ eats) (NP (NN pizza))) (. .)))";
Tree tree = Tree.valueOf(treeString);
SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
//add lemmata
Morphology morphology = new Morphology();
for (IndexedWord node : graph.vertexSet()) {
String lemma = morphology.lemma(node.word(), node.tag());
node.setLemma(lemma);
}
System.err.println(graph);
SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<dobj=reln {lemma:/eat/}=B");
SemgrexMatcher matcher = semgrex.matcher(graph);
while (matcher.find()) {
System.err.println(matcher.getNode("A") + " <<dobj " + matcher.getNode("B"));
}
}