本文整理了Java中org.apache.lucene.analysis.de.GermanAnalyzer
类的一些代码示例,展示了GermanAnalyzer
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。GermanAnalyzer
类的具体详情如下:
包路径:org.apache.lucene.analysis.de.GermanAnalyzer
类名称:GermanAnalyzer
[英]Analyzer for German language.
Supports an external list of stopwords (words that will not be indexed at all) and an external list of exclusions (word that will not be stemmed, but indexed). A default set of stopwords is used unless an alternative list is specified, but the exclusion list is empty by default.
NOTE: This class uses the same org.apache.lucene.util.Versiondependent settings as StandardAnalyzer.
[中]德语分析器。
支持外部停止字列表(完全不编入索引的字)和外部排除项列表(不编入词干但编入索引的字)。除非指定了替代列表,否则将使用默认的停止字集,但默认情况下排除列表为空。
注意:此类使用相同的组织。阿帕奇。卢森。util。版本相关设置作为StandardAnalyzer。
代码示例来源:origin: neo4j/neo4j
@Override
public Analyzer createAnalyzer()
{
return new GermanAnalyzer();
}
代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch
public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GermanAnalyzer(Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}
代码示例来源:origin: Stratio/cassandra-lucene-index
@Override
protected CharArraySet build() {
return GermanAnalyzer.getDefaultStopSet();
}
},
代码示例来源:origin: org.infinispan/infinispan-embedded-query
/**
* Creates
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided, {@link GermanNormalizationFilter} and {@link GermanLightStemFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source;
if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
source = new StandardTokenizer();
} else {
source = new StandardTokenizer40();
}
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
result = new SetKeywordMarkerFilter(result, exclusionSet);
result = new GermanNormalizationFilter(result);
result = new GermanLightStemFilter(result);
return new TokenStreamComponents(source, result);
}
}
代码示例来源:origin: org.codelibs.elasticsearch.module/analysis-common
GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GermanAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, GermanAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
代码示例来源:origin: crosswire/jsword
public GermanLuceneAnalyzer() {
stopSet = GermanAnalyzer.getDefaultStopSet();
}
代码示例来源:origin: apache/servicemix-bundles
public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GermanAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, GermanAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
代码示例来源:origin: rnewson/couchdb-lucene
@Override
public Analyzer newAnalyzer(final String args) {
return new GermanAnalyzer();
}
@Override
代码示例来源:origin: com.scireum/sirius-nlp-main
/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell wordlist
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanPrimaryWordExtractionAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.hyphen = hyphen;
this.wordlist = wordlist;
this.hunspellDict = hunspellDict;
}
代码示例来源:origin: rnewson/couchdb-lucene
@Override
public Analyzer newAnalyzer(final JSONObject args) {
return new GermanAnalyzer();
}
},
代码示例来源:origin: com.scireum/sirius-nlp-main
/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell dictionary
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanAutocompleteIndexingAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.wordlist = wordlist;
this.hyphen = hyphen;
this.hunspellDict = hunspellDict;
}
代码示例来源:origin: Stratio/cassandra-lucene-index
@Override
protected Analyzer build() {
return new GermanAnalyzer();
}
},
代码示例来源:origin: com.scireum/sirius-nlp-main
/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell dictionary
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanIndexingAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.wordlist = wordlist;
this.hyphen = hyphen;
this.hunspellDict = hunspellDict;
}
代码示例来源:origin: blazegraph/database
public Analyzer newInstance(final boolean filterStopwords) {
return filterStopwords ?
new GermanAnalyzer() :
new GermanAnalyzer(emptyStopwords);
}
};
代码示例来源:origin: com.scireum/sirius-nlp-main
/**
* Constructs a new instance of the analyzer.
*
* @param hunspellDict a hunspell dictionary
* @param stemExceptions a map of words which should be stemmed in a special way
* @param hyphen a hyphenation defintion for compound word splitting
* @param wordlist a word list for compound splitting
*/
public GermanSuggestAnalyzer(Dictionary hunspellDict,
SynonymMap stemExceptions,
HyphenationTree hyphen,
CharArraySet wordlist) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.wordlist = wordlist;
this.hyphen = hyphen;
this.hunspellDict = hunspellDict;
}
代码示例来源:origin: com.blazegraph/bigdata-core
public Analyzer newInstance(final boolean filterStopwords) {
return filterStopwords ?
new GermanAnalyzer() :
new GermanAnalyzer(emptyStopwords);
}
};
代码示例来源:origin: com.scireum/sirius-nlp-main
int minLengthEdit1,
int minLengthEdit2) {
super(GermanAnalyzer.getDefaultStopSet());
this.stemExceptiOns= stemExceptions;
this.updateSynOnymsService= updateSynonymsService;
代码示例来源:origin: com.strapdata.elasticsearch/elasticsearch
@Override
protected Analyzer create(Version version) {
Analyzer a = new GermanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
代码示例来源:origin: omegat-org/omegat
@SuppressWarnings("resource")
@Override
protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed,
final boolean stopWordsAllowed) throws IOException {
if (stemsAllowed) {
CharArraySet stopWords = stopWordsAllowed ? GermanAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET;
return new Lucene30GermanAnalyzer(stopWords).tokenStream("", new StringReader(strOrig));
} else {
return getStandardTokenStream(strOrig);
}
}
代码示例来源:origin: stackoverflow.com
tokenStream = TokenSources.getTokenStream(RunSearch.CONTENT, reader.getTermVectors(docId), txt, new GermanAnalyzer(), -1);