热门标签 | HotTags
当前位置:  开发笔记 > 编程语言 > 正文

org.apache.lucene.analysis.de.GermanAnalyzer类的使用及代码示例

本文整理了Java中org.apache.lucene.analysis.de.GermanAnalyzer类的一些代码示例,展示了GermanAnalyzer类的具体用法。这些代码示例主要来源于Gi

本文整理了Java中org.apache.lucene.analysis.de.GermanAnalyzer类的一些代码示例,展示了GermanAnalyzer类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。GermanAnalyzer类的具体详情如下:
包路径:org.apache.lucene.analysis.de.GermanAnalyzer
类名称:GermanAnalyzer

GermanAnalyzer介绍

[英]Analyzer for German language.

Supports an external list of stopwords (words that will not be indexed at all) and an external list of exclusions (word that will not be stemmed, but indexed). A default set of stopwords is used unless an alternative list is specified, but the exclusion list is empty by default.

NOTE: This class uses the same org.apache.lucene.util.Versiondependent settings as StandardAnalyzer.
[中]德语分析器。
支持外部停止字列表(完全不编入索引的字)和外部排除项列表(不编入词干但编入索引的字)。除非指定了替代列表,否则将使用默认的停止字集,但默认情况下排除列表为空。
注意:此类使用相同的组织。阿帕奇。卢森。util。版本相关设置作为StandardAnalyzer。

代码示例

代码示例来源:origin: neo4j/neo4j

@Override
public Analyzer createAnalyzer()
{
return new GermanAnalyzer();
}

代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch

public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GermanAnalyzer(Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}

代码示例来源:origin: Stratio/cassandra-lucene-index

@Override
protected CharArraySet build() {
return GermanAnalyzer.getDefaultStopSet();
}
},

代码示例来源:origin: org.infinispan/infinispan-embedded-query

/**
* Creates
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided, {@link GermanNormalizationFilter} and {@link GermanLightStemFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source;
if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
source = new StandardTokenizer();
} else {
source = new StandardTokenizer40();
}
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
result = new SetKeywordMarkerFilter(result, exclusionSet);
result = new GermanNormalizationFilter(result);
result = new GermanLightStemFilter(result);
return new TokenStreamComponents(source, result);
}
}

代码示例来源:origin: org.codelibs.elasticsearch.module/analysis-common

GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GermanAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, GermanAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}

代码示例来源:origin: crosswire/jsword

public GermanLuceneAnalyzer() {
stopSet = GermanAnalyzer.getDefaultStopSet();
}

代码示例来源:origin: apache/servicemix-bundles

public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GermanAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, GermanAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}

代码示例来源:origin: rnewson/couchdb-lucene

@Override
public Analyzer newAnalyzer(final String args) {
return new GermanAnalyzer();
}
@Override

代码示例来源:origin: com.scireum/sirius-nlp-main

/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell wordlist
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanPrimaryWordExtractionAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.hyphen = hyphen;
this.wordlist = wordlist;
this.hunspellDict = hunspellDict;
}

代码示例来源:origin: rnewson/couchdb-lucene

@Override
public Analyzer newAnalyzer(final JSONObject args) {
return new GermanAnalyzer();
}
},

代码示例来源:origin: com.scireum/sirius-nlp-main

/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell dictionary
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanAutocompleteIndexingAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.wordlist = wordlist;
this.hyphen = hyphen;
this.hunspellDict = hunspellDict;
}

代码示例来源:origin: Stratio/cassandra-lucene-index

@Override
protected Analyzer build() {
return new GermanAnalyzer();
}
},

代码示例来源:origin: com.scireum/sirius-nlp-main

/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell dictionary
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanIndexingAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.wordlist = wordlist;
this.hyphen = hyphen;
this.hunspellDict = hunspellDict;
}

代码示例来源:origin: blazegraph/database

public Analyzer newInstance(final boolean filterStopwords) {
return filterStopwords ?
new GermanAnalyzer() :
new GermanAnalyzer(emptyStopwords);
}
};

代码示例来源:origin: com.scireum/sirius-nlp-main

/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell dictionary
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanSuggestAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.wordlist = wordlist;
this.hyphen = hyphen;
this.hunspellDict = hunspellDict;
}

代码示例来源:origin: com.blazegraph/bigdata-core

public Analyzer newInstance(final boolean filterStopwords) {
return filterStopwords ?
new GermanAnalyzer() :
new GermanAnalyzer(emptyStopwords);
}
};

代码示例来源:origin: com.scireum/sirius-nlp-main

int minLengthEdit1,
int minLengthEdit2) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.updateSynOnymsService= updateSynonymsService;

代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch

@Override
protected Analyzer create(Version version) {
Analyzer a = new GermanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},

代码示例来源:origin: omegat-org/omegat

@SuppressWarnings("resource")
@Override
protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed,
final boolean stopWordsAllowed) throws IOException {
if (stemsAllowed) {
CharArraySet stopWords = stopWordsAllowed ? GermanAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET;
return new Lucene30GermanAnalyzer(stopWords).tokenStream("", new StringReader(strOrig));
} else {
return getStandardTokenStream(strOrig);
}
}

代码示例来源:origin: stackoverflow.com

tokenStream = TokenSources.getTokenStream(RunSearch.CONTENT, reader.getTermVectors(docId), txt, new GermanAnalyzer(), -1);

推荐阅读
author-avatar
king_her灬o1
这个家伙很懒,什么也没留下!
PHP1.CN | 中国最专业的PHP中文社区 | DevBox开发工具箱 | json解析格式化 |PHP资讯 | PHP教程 | 数据库技术 | 服务器技术 | 前端开发技术 | PHP框架 | 开发工具 | 在线工具
Copyright © 1998 - 2020 PHP1.CN. All Rights Reserved | 京公网安备 11010802041100号 | 京ICP备19059560号-4 | PHP1.CN 第一PHP社区 版权所有