diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..503dac2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +build +opensearch-hebrew-analyser/.gradle \ No newline at end of file diff --git a/Hebrew-ElasticSearch-ngrams-3-words/es-plugin.properties b/Hebrew-ElasticSearch-ngrams-3-words/es-plugin.properties deleted file mode 100644 index 6ca3b56..0000000 --- a/Hebrew-ElasticSearch-ngrams-3-words/es-plugin.properties +++ /dev/null @@ -1 +0,0 @@ -plugin=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin \ No newline at end of file diff --git a/Hebrew-ElasticSearch-ngrams-3-words/plugin-descriptor.properties b/Hebrew-ElasticSearch-ngrams-3-words/plugin-descriptor.properties deleted file mode 100644 index 7d85f7c..0000000 --- a/Hebrew-ElasticSearch-ngrams-3-words/plugin-descriptor.properties +++ /dev/null @@ -1,7 +0,0 @@ -jvm=true -name=elasticsearch-analysis-hebrew -description=elasticsearch-analysis-hebrew -classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin -elasticsearch.version=8.5.3 -java.version=17 -version=1.0 diff --git a/Hebrew-ElasticSearch-ngrams-3-words/pom.xml b/Hebrew-ElasticSearch-ngrams-3-words/pom.xml deleted file mode 100644 index 66b0922..0000000 --- a/Hebrew-ElasticSearch-ngrams-3-words/pom.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - 4.0.0 - - com.hotstar - elasticsearch-hebrew-ngram-3 - 8.5.3 - jar - elasticsearch-analysis-hebrew - - - UTF-8 - 7.4.0 - 8.5.3 - 1.0.0 - 1.8 - 1.8 - - - - - org.apache.lucene - lucene-test-framework - ${lucene.version} - test - - - org.apache.lucene - lucene-core - ${lucene.version} - provided - - - org.apache.lucene - lucene-analyzers-common - ${lucene.version} - provided - - - org.elasticsearch - elasticsearch - ${elasticsearch.version} - provided - - - com.google.guava - guava - 17.0 - provided - - - junit - junit - 4.11 - test - - - \ No newline at end of file diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java b/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java deleted file mode 100644 index cd5cb56..0000000 --- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.hotstar.hebrew.plugin; -import org.elasticsearch.index.analysis.AnalyzerProvider; -import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.plugins.AnalysisPlugin; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; -import org.apache.lucene.analysis.Analyzer; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.singletonMap; - -public class HebrewAnalyzerEsPlugin extends Plugin implements AnalysisPlugin { - - @Override - public Map> getTokenFilters() { - Map> tokenFilters = new HashMap<>(); - tokenFilters.put("hebrew_stop", HebrewNoOpTokenFilterFactory::new); - tokenFilters.put("hebrew_word", HebrewNoOpTokenFilterFactory::new); - return tokenFilters; - } - - @Override - public Map> getTokenizers() { - Map> extra = new HashMap<>(); - extra.put("hebrew_tokenizer", HebrewTokenizerTokenizerFactory::new); - extra.put("hebrew_sentence", HebrewTokenizerTokenizerFactory::new); - return extra; - } - - @Override - public Map>> getAnalyzers() { - return singletonMap("hebrew-ngram-3-analyzer", HebrewAnalyzerProvider::new); - } -} diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java b/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java deleted file mode 100644 index 1957c7f..0000000 --- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.hotstar.hebrew.plugin; - - -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; -import com.hotstar.hebrew.analysis.HebrewAnalyzer; - - -public class HebrewAnalyzerProvider extends AbstractIndexAnalyzerProvider { - - /* Constructor. Nothing special here. */ - public HebrewAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(name, settings); - analyzer = new HebrewAnalyzer(); - } - - /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */ - public HebrewAnalyzer get() { - return this.analyzer; - } - - /* Instance of PlusSignAnalyzer class that is returned by this class. */ - protected HebrewAnalyzer analyzer; - - /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */ - public static final String NAME = "hebrew-ngram-3-analyzer"; -} diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java b/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java deleted file mode 100644 index 4a73a41..0000000 --- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.hotstar.hebrew.plugin; - - -import org.apache.lucene.analysis.TokenStream; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; - -public class HebrewNoOpTokenFilterFactory extends AbstractTokenFilterFactory { - public HebrewNoOpTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(name, settings); - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return tokenStream; - } -} diff --git a/Hebrew-ElasticSearch-semi-exact/es-plugin.properties b/Hebrew-ElasticSearch-semi-exact/es-plugin.properties deleted file mode 100644 index 6ca3b56..0000000 --- a/Hebrew-ElasticSearch-semi-exact/es-plugin.properties +++ /dev/null @@ -1 +0,0 @@ -plugin=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin \ No newline at end of file diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-semi-exact-analyzer-8.5.3.jar b/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-semi-exact-analyzer-8.5.3.jar deleted file mode 100644 index cbf5760..0000000 Binary files a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-semi-exact-analyzer-8.5.3.jar and /dev/null differ diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties b/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties deleted file mode 100644 index 0dfbbea..0000000 --- a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties +++ /dev/null @@ -1,8 +0,0 @@ -version=8.5.3 -name=elasticsearch-hebrew -description=elasticsearch-analysis-hebrew -classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin -java.version=17 -elasticsearch.version=8.5.3 -extended.plugins= -has.native.controller=false diff --git a/Hebrew-ElasticSearch-semi-exact/plugin-descriptor.properties b/Hebrew-ElasticSearch-semi-exact/plugin-descriptor.properties deleted file mode 100644 index 7d85f7c..0000000 --- a/Hebrew-ElasticSearch-semi-exact/plugin-descriptor.properties +++ /dev/null @@ -1,7 +0,0 @@ -jvm=true -name=elasticsearch-analysis-hebrew -description=elasticsearch-analysis-hebrew -classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin -elasticsearch.version=8.5.3 -java.version=17 -version=1.0 diff --git a/Hebrew-ElasticSearch-semi-exact/pom.xml b/Hebrew-ElasticSearch-semi-exact/pom.xml deleted file mode 100644 index ff58433..0000000 --- a/Hebrew-ElasticSearch-semi-exact/pom.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - 4.0.0 - - com.hotstar - elasticsearch-hebrew-semi-exact-analyzer - 8.5.3 - jar - elasticsearch-hebrew-semi-exact-analyzer - - - UTF-8 - 7.4.0 - 8.5.3 - 1.0.0 - 1.8 - 1.8 - - - - - org.apache.lucene - lucene-test-framework - ${lucene.version} - test - - - org.apache.lucene - lucene-core - ${lucene.version} - provided - - - org.apache.lucene - lucene-analyzers-common - ${lucene.version} - provided - - - org.elasticsearch - elasticsearch - ${elasticsearch.version} - provided - - - com.google.guava - guava - 17.0 - provided - - - junit - junit - 4.11 - test - - - \ No newline at end of file diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/EmptyStringTokenFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/EmptyStringTokenFilter.java deleted file mode 100644 index baecbd9..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/EmptyStringTokenFilter.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.hotstar.hebrew.analysis; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import java.io.IOException; - -public class EmptyStringTokenFilter extends TokenFilter { - - /* The constructor for our custom token filter just calls the TokenFilter constructor; that - * constructor saves the token stream in a variable named this.input. - */ - public EmptyStringTokenFilter(TokenStream tokenStream) { - super(tokenStream); - } - - /* Like the PlusSignTokenizer class, we are going to save the text of the current token - * in a CharTermAttribute object. In addition, we are going to use a - * PositionIncrementAttribute object to store the position increment of the token. Lucene - * uses this latter attribute to determine the position of a token. Given a token stream with - * “This”, “is”, “”, “some”, and “text”, we are going to ensure that “This” is saved at - * position 1, “is” at position 2, “some” at position 3, and “text” at position 4. Note that - * we have completely ignored the empty string at what was position 3 in the original stream. - */ - protected CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class); - protected PositionIncrementAttribute positionIncrementAttribute = - addAttribute(PositionIncrementAttribute.class); - - /* Like we did in the PlusSignTokenizer class, we need to override the incrementToken() - * function to save the attributes of the current token. We are going to pass over any - * tokens that are empty strings and save all others without modifying them. This function - * should return true if a new token was generated and false if the last token was passed. - */ - @Override - public boolean incrementToken() throws IOException { - - // Loop over tokens in the token stream to find the next one that is not empty - String nextToken = null; - while (nextToken == null) { - - // Reached the end of the token stream being processed - if ( ! this.input.incrementToken()) { - return false; - } - - // Get text of the current token and remove any leading/trailing whitespace. - String currentTokenInStream = - this.input.getAttribute(CharTermAttribute.class).toString().trim(); - - // Save the token if it is not an empty string - if (currentTokenInStream.length() > 0) { - nextToken = currentTokenInStream; - } - } - - // Save the current token - this.charTermAttribute.setEmpty().append(nextToken); - this.positionIncrementAttribute.setPositionIncrement(1); - return true; - } -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java deleted file mode 100644 index b091b8f..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.hotstar.hebrew.analysis; -import org.apache.lucene.util.Attribute; - -/** - * This attribute is used to pass info on tokens as parsed and identified - * by the HebMorph tokenizer - */ -public interface HebrewTokenTypeAttribute extends Attribute{ - enum HebrewType { - Unknown - } - - void setType(HebrewType type); - HebrewType getType(); - boolean isExact(); - void setExact(boolean isExact); -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java deleted file mode 100644 index 693fbc0..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.hotstar.hebrew.analysis; - -import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.util.AttributeImpl; -import org.apache.lucene.util.AttributeReflector; - -/** - * Created by Egozy on 19/04/2015. - */ -public class HebrewTokenTypeAttributeImpl extends AttributeImpl implements HebrewTokenTypeAttribute { - private HebrewType type = HebrewType.Unknown; - private boolean isExact = false; - public void setType(HebrewType type) { - this.type = type; - } - - public HebrewType getType() { - return type; - } - - public boolean isExact() { - return isExact; - } - - public void setExact(boolean isExact) { - this.isExact = isExact; - } - - public void clear() { - type = HebrewType.Unknown; - isExact = false; - } - - @Override - public void reflectWith(AttributeReflector reflector) { - reflector.reflect(KeywordAttribute.class, "isExact", isExact); - reflector.reflect(KeywordAttribute.class, "type", type); - } - - public void copyTo(AttributeImpl target) { - ((HebrewTokenTypeAttribute) target).setType(type); - } -} - diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java deleted file mode 100644 index 5682a3d..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.hotstar.hebrew.analysis; - -import java.util.ArrayList; -import java.util.List; - -/** - * Created by nss on 2/20/17. - */ -public class NGramizer { - - private int n; - - public NGramizer(int n) { - this.n = n; - } - - public List ngramize(String s) { - List ngrams = new ArrayList(); - - for (int i = 0; i <= s.length() - n; i++) { - ngrams.add(s.substring(i,i+n)); - } - - return ngrams; - } -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java deleted file mode 100644 index 45c7ee8..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java +++ /dev/null @@ -1,70 +0,0 @@ -package com.hotstar.hebrew.analysis; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; - -public class PluralFilter extends TokenFilter { - - private CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class); - private HebrewTokenTypeAttribute hebTokAttribute = addAttribute(HebrewTokenTypeAttribute.class); - private PositionIncrementAttribute positionIncrementAttribute = - addAttribute(PositionIncrementAttribute.class); - private List previousTokens; - private Pattern pluralPat; - - public PluralFilter(TokenStream tokenStream) { - super(tokenStream); - this.previousTokens = new ArrayList(); - this.pluralPat = Pattern.compile("(.{3,})(ים|ות)$"); - } - - - - @Override - public boolean incrementToken() throws IOException { - - if (!previousTokens.isEmpty()) { - this.charTermAttribute.setEmpty().append(previousTokens.remove(0)); - this.positionIncrementAttribute.setPositionIncrement(0); - this.hebTokAttribute.setExact(false); - return true; - } - - // Loop over tokens in the token stream to find the next one that is not empty - String nextToken = null; - while (nextToken == null) { - - // Reached the end of the token stream being processed - if ( ! this.input.incrementToken()) { - return false; - } - - // Get text of the current token and remove any leading/trailing whitespace. - String currentTokenInStream = - this.input.getAttribute(CharTermAttribute.class).toString().trim(); - - // Save the token if it is not an empty string - if (currentTokenInStream.length() > 0) { - nextToken = currentTokenInStream; - } - } - - previousTokens.add(filterPlural(nextToken)); - - // Save the current token - this.charTermAttribute.setEmpty().append(nextToken).append('$'); - this.positionIncrementAttribute.setPositionIncrement(1); - this.hebTokAttribute.setExact(true); - return true; - } - - private String filterPlural(String in) { - return in.replaceFirst("(.{3,})(ים|ות)$","$1"); - } -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java deleted file mode 100644 index 3c094ae..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.hotstar.hebrew.analysis; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public class SefariaNGramTokenFilter extends TokenFilter { - - public static final char FINAL_CHAR = '$'; - - private NGramizer ngramizer; - - - public SefariaNGramTokenFilter(TokenStream tokenStream, int n) { - super(tokenStream); - ngramizer = new NGramizer(n); - } - private CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class); - private HebrewTokenTypeAttribute hebTokAttribute = addAttribute(HebrewTokenTypeAttribute.class); - private PositionIncrementAttribute positionIncrementAttribute = - addAttribute(PositionIncrementAttribute.class); - - private List previousTokens = new ArrayList(); - - - @Override - public boolean incrementToken() throws IOException { - - if (savePrevToken()) - return true; - - // Reached the end of the token stream being processed - if ( ! this.input.incrementToken()) { - return false; - } - - // Get text of the current token and remove any leading/trailing whitespace. - String currToken = - this.input.getAttribute(CharTermAttribute.class).toString().trim(); - - if (! hebTokAttribute.isExact()) { - List ngrams = ngramizer.ngramize(currToken); - for (String ngram : ngrams) { - previousTokens.add(ngram); - } - - savePrevToken(); - } - - return true; - } - - private boolean savePrevToken() { - if (!previousTokens.isEmpty()) { - this.charTermAttribute.setEmpty(); - this.charTermAttribute.append(previousTokens.remove(0)); - this.positionIncrementAttribute.setPositionIncrement(0); - this.hebTokAttribute.setExact(false); - return true; - } else { - return false; - } - } -} - diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java deleted file mode 100644 index 20fcfa3..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java +++ /dev/null @@ -1,98 +0,0 @@ -package com.hotstar.hebrew.analysis; - -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import java.io.IOException; -import java.io.Reader; - -public class SefariaTokenizer extends Tokenizer { - - /* Lucene uses attributes to store information about a single token. For this tokenizer, the - * only attribute that we are going to use is the CharTermAttribute, which can store the text - * for the token that is generated. Other types of attributes exist (see interfaces and - * classes derived from org.apache.lucene.util.Attribute); we will use some of these other - * attributes when we build our custom token filter. It is important that you register - * attributes, whatever their type, using the addAttribute() function. - */ - protected CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class); - - /* This is the important function to override from the Tokenizer class. At each call, it - * should set the value of this.charTermAttribute to the text of the next token. It returns - * true if a new token is generated and false if there are no more tokens remaining. - */ - @Override - public boolean incrementToken() throws IOException { - - // Clear anything that is already saved in this.charTermAttribute - this.charTermAttribute.setEmpty(); - - // Get the position of the next + symbol - int nextIndex = this.stringToTokenize.indexOf('+', this.position); - - // Execute this block if a plus symbol was found. Save the token and the - // position to start at when incrementToken() is next called. - if (nextIndex != -1) { - String nextToken = this.stringToTokenize.substring(this.position, nextIndex); - this.charTermAttribute.append(nextToken); - this.position = nextIndex + 1; - return true; - } - - // Execute this block if no more + signs are found, but there is still some text - // remaining in the string. For example, this saves “text” in “This+is++some+text”. - else if (this.position < this.stringToTokenize.length()) { - String nextToken = this.stringToTokenize.substring(this.position); - this.charTermAttribute.append(nextToken); - this.position = this.stringToTokenize.length(); - return true; - } - - // Execute this block if no more tokens exist in the string. - else { - return false; - } - } - - /* This is the constructor for our custom tokenizer class. It takes all information from a - * java.io.Reader object and stores it in a string. If you are expecting very large blocks of - * text, you might want to think about using a buffer and saving chunks from the reader - * whenever incrementToken() is called. This function throws a RuntimeException when an - * IOException is found - you can choose how you want to deal with the IOException, but - * for our purposes, we do not need to try to recover from it. - */ - public SefariaTokenizer() { - super(); - } - - /* Reset the stored position for this object when reset() is called. - */ - @Override - public void reset() throws IOException { - super.reset(); - this.position = 0; - - int numChars; - char[] buffer = new char[1024]; - StringBuilder stringBuilder = new StringBuilder(); - - try { - while ((numChars = this.input.read(buffer, 0, buffer.length)) != -1) { - stringBuilder.append(buffer, 0, numChars); - } - } - catch (IOException e) { - throw new RuntimeException(e); - } - this.stringToTokenize = stringBuilder.toString(); - } - - /* This object stores the string that we are turning into tokens. We will process its content - * as we call the incrementToken() function. - */ - protected String stringToTokenize; - - /* This stores the current position in this.stringToTokenize. We will increment its value as - * we call the incrementToken() function. - */ - protected int position = 0; -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java deleted file mode 100644 index 8f5754b..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.hotstar.hebrew.analysis; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; - -import java.io.IOException; - -public class StopLetterFilter extends TokenFilter { - - private CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class); - private HebrewTokenTypeAttribute hebTokAttribute = addAttribute(HebrewTokenTypeAttribute.class); - private PositionIncrementAttribute positionIncrementAttribute = - addAttribute(PositionIncrementAttribute.class); - private String stopLettersPat; - - public StopLetterFilter(TokenStream tokenStream, char[] stopLetters) { - super(tokenStream); - this.stopLettersPat = "["; - for (char ch : stopLetters) { - stopLettersPat += ch; - } - this.stopLettersPat += "]"; - } - - @Override - public boolean incrementToken() throws IOException { - // Reached the end of the token stream being processed - if ( ! this.input.incrementToken()) { - return false; - } - - String currToken = - this.input.getAttribute(CharTermAttribute.class).toString().trim(); - if ( ! this.hebTokAttribute.isExact()) { - this.charTermAttribute.setEmpty().append(filterStopLetters(currToken)); - this.hebTokAttribute.setExact(false); - } - - return true; - } - - private String filterStopLetters(String in) { - return in.replaceAll(this.stopLettersPat, ""); - } -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java deleted file mode 100644 index a09a369..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.hotstar.hebrew.plugin; -import org.elasticsearch.index.analysis.AnalyzerProvider; -import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.plugins.AnalysisPlugin; -import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; -import org.apache.lucene.analysis.Analyzer; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.singletonMap; - -public class HebrewAnalyzerEsPlugin extends Plugin implements AnalysisPlugin { - - @Override - public Map> getTokenFilters() { - Map> tokenFilters = new HashMap<>(); - return tokenFilters; - } - - @Override - public Map> getTokenizers() { - Map> extra = new HashMap<>(); - return extra; - } - - @Override - public Map>> getAnalyzers() { - return singletonMap("hebrew_semi_exact_analyzer", HebrewAnalyzerProvider::new); - } -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java deleted file mode 100644 index b98f730..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.hotstar.hebrew.plugin; - - -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; -import com.hotstar.hebrew.analysis.HebrewAnalyzer; - - -public class HebrewAnalyzerProvider extends AbstractIndexAnalyzerProvider { - - /* Constructor. Nothing special here. */ - public HebrewAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(name, settings); - analyzer = new HebrewAnalyzer(); - } - - /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */ - public HebrewAnalyzer get() { - return this.analyzer; - } - - /* Instance of PlusSignAnalyzer class that is returned by this class. */ - protected HebrewAnalyzer analyzer; - - /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */ - public static final String NAME = "hebrew_semi_exact_analyzer"; -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java deleted file mode 100644 index 7010fb3..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.hotstar.hebrew.plugin; - -import com.hotstar.hebrew.analysis.SefariaTokenizer; -import org.apache.lucene.analysis.Tokenizer; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractTokenizerFactory; - - -public class HebrewTokenizerTokenizerFactory extends AbstractTokenizerFactory { - public HebrewTokenizerTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, settings, name); - } - - @Override - public Tokenizer create() { - return new SefariaTokenizer(); - } -} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/SefariaBinderProcessor.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/SefariaBinderProcessor.java deleted file mode 100644 index 69a6f02..0000000 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/SefariaBinderProcessor.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.hotstar.hebrew.plugin; - -import org.elasticsearch.indices.analysis.AnalysisModule;; - -//public class SefariaBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { -// -// /* This is the only function that you need. It simply adds our PlusSignAnalyzerProvider class -// * to a list of bindings. -// */ -// @Override -// public void processAnalyzers(AnalyzersBindings analyzersBindings) { -// analyzersBindings.processAnalyzer(SefariaAnalyzerProvider.NAME, -// SefariaAnalyzerProvider.class); -// } -//} diff --git a/README.md b/README.md index 480e4ad..44efbb4 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,15 @@ # Hebrew-Analyzers -This is a project that provides language analyzer plugins for Hebrew on different search engines such as ElasticSearch/OpenSearch. +This is a project that provides language analyzer plugins for Hebrew on search engine OpenSearch. ## How to Install a Plugin -This repository contains several plugins. Each folder in the root directory represents a separate plugin. To install a plugin: -For example, on ElasticSearch: - -First, locate the bin folder in your ES installation (referred to as $ES_BIN hereafter). This folder can be found in one of two places: - -If you installed ES as a service: `/usr/share/elasticsearch/bin` -If you downloaded the source: `$SRC_ROOT/bin` https://www.elastic.co/downloads/elasticsearch -Navigate to the plugins directory: - -`Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew` -`Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew` ## What is an OpenSearch/ElasticSearch Plugin? OpenSearch/ElasticSearch plugins are a way to enhance the basic functionality of Elasticsearch in a customized manner. They can include custom mapping types, custom analyzers (in a more built-in fashion), custom script engines, custom discovery, and more. -## How to Write a Customized Plugin? -To write a customized plugin, you can follow these steps: - -Download an official analysis plugin that matches your ES version. For example, if you want to upload a plugin to your ElasticSearch 8.5.3 version cluster, it is recommended to download a local package of version 8.5.3 and test it locally. You can download the package from this link: ElasticSearch Downloads - -After downloading the ElasticSearch package, install it. Then, run the following command to check if it is installed successfully: `${YOUR_ES_DOWNLOAD_PATH}/bin/elasticsearch` - -Examine an official plugin to understand its implementation. For example, the smartcn plugin https://www.elastic.co/guide/en/elasticsearch/plugins/8.6/analysis-smartcn.html is a language analysis plugin that has similar functionality to the Hebrew analyzer. Install this plugin using the command: `sudo ${YOUR_ES_DOWNLOAD_PATH}/bin/elasticsearch-plugin install analysis-smartcn`. You will find the plugin installed at `${YOUR_ES_DOWNLOAD_PATH}/plugins/analysis-smartcn`. - -Under `${YOUR_ES_DOWNLOAD_PATH}/plugins/analysis-smartcn`, you will find a file named plugin-descriptor.properties. This file is a configuration file that defines the execution file. For detailed usage, you can refer to this example: `plugin-descriptor.properties`. - -Write your own plugins and build them into a JAR file. This step will be customized based on your requirements. - -Test the plugin locally and upload it to your ES cluster. Make sure you have the necessary permissions to upload plugins to the ES cluster. - -Please note that the above instructions assume a basic understanding of ElasticSearch and plugin development. ## Examples: | Text | Analyzer | Tokens | |-----------------------------|---------------|-----------------------------------------------------------------| | הַכֹּחַ הַאֱמוּנָה יְכוֹל לְהַזְזִים הָרֵים | ngram-3-words | הכח$,הכח,האמונה$,האמ,אמנ,מנה,יכול$,כל,להזזים$,להז,הזז,הרים$,הרם | | הַכֹּחַ הַאֱמוּנָה יְכוֹל לְהַזְזִים הָרֵים | semi-exact | הכח$,הכח,האמונה$,האמונה,יכול$,יכול,להזזים$,להזזים,הרים$,הרים | - -## Reference: -https://github.com/Sefaria/Sefaria-ElasticSearch -https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugin-authors.html \ No newline at end of file diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/LICENSE.txt b/opensearch-hebrew-analyser/LICENSE.txt similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/LICENSE.txt rename to opensearch-hebrew-analyser/LICENSE.txt diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/Archive.zip b/opensearch-hebrew-analyser/NOTICE.txt similarity index 100% rename from Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/Archive.zip rename to opensearch-hebrew-analyser/NOTICE.txt diff --git a/opensearch-hebrew-analyser/build.gradle b/opensearch-hebrew-analyser/build.gradle new file mode 100644 index 0000000..8637d1c --- /dev/null +++ b/opensearch-hebrew-analyser/build.gradle @@ -0,0 +1,104 @@ +buildscript { + repositories { + mavenLocal() + mavenCentral() + maven{ + url "https://plugins.gradle.org/m2/" + } + } + dependencies { + classpath "org.opensearch.gradle:build-tools:${opensearchVersion}" + classpath "org.opensearch:opensearch-core:${opensearchVersion}" + classpath "org.opensearch:opensearch-common:${opensearchVersion}" + } + +} + +group = 'com.hotstar' +version = "${opensearchVersion}" + +apply plugin: 'java' +apply plugin: 'idea' +apply plugin: 'opensearch.opensearchplugin' + +opensearchplugin { + name 'opensearch-analysis-hebrew' + description 'The Hebrew Analysis plugin module for opensearch.' + classname 'com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin' + licenseFile = rootProject.file('LICENSE.txt') + noticeFile = rootProject.file('NOTICE.txt') +} + + +jar { + archiveBaseName.set(rootProject.name) +} + +javadocJar { + archiveBaseName.set(rootProject.name) +} + +sourcesJar { + archiveBaseName.set(rootProject.name) +} + + +dependencies { + + testImplementation "org.apache.lucene:lucene-test-framework:${luceneVersion}" + testImplementation "junit:junit:4.13.2" + implementation "org.apache.lucene:lucene-core:${luceneVersion}" + implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" + compileOnly "org.opensearch:opensearch:${opensearchVersion}" + //implementation "com.google.guava:guava:17.0" +} + +forbiddenPatterns { + forbiddenPatterns.enabled = false; +} + +licenseHeaders { + licenseHeaders.enabled = false +} + +dependencyLicenses { + dependencyLicenses.enabled = false +} + +validateNebulaPom { + validateNebulaPom.enabled = false +} + +thirdPartyAudit { + thirdPartyAudit.enabled = false +} + +loggerUsageCheck { + loggerUsageCheck.enabled = false +} + +test { + systemProperty 'tests.security.manager', 'false' +} + +testingConventions { + testingConventions.enabled = false +} + +javadoc { + enabled = false +} + +task release(type: Copy, group: 'build') { + dependsOn assemble + project.logger.debug("Copying plugin zip to plugin directory") + from(bundlePlugin.outputs.files.getSingleFile()) + into "../build/plugins/${pluginName}/" + includeEmptyDirs = false +} + +tasks.register('integTest', Test) { +} + + + diff --git a/opensearch-hebrew-analyser/es-plugin.properties b/opensearch-hebrew-analyser/es-plugin.properties new file mode 100644 index 0000000..cde669a --- /dev/null +++ b/opensearch-hebrew-analyser/es-plugin.properties @@ -0,0 +1 @@ +plugin=com.hotstar.hebrew.plugin.HebrewAnalyzerOsPlugin \ No newline at end of file diff --git a/opensearch-hebrew-analyser/gradle.properties b/opensearch-hebrew-analyser/gradle.properties new file mode 100644 index 0000000..ed326c0 --- /dev/null +++ b/opensearch-hebrew-analyser/gradle.properties @@ -0,0 +1,21 @@ +opensearchVersion=2.11.0 +luceneVersion=9.7.0 +pluginName = opensearch-analysis-hebrew +org.gradle.warning.mode=none +org.gradle.parallel=true +org.gradle.jvmargs=-Xmx3g -XX:+HeapDumpOnOutOfMemoryError -Xss2m +options.forkOptions.memoryMaximumSize=2g + +# Disable duplicate project id detection +# See https://docs.gradle.org/current/userguide/upgrading_version_6.html#duplicate_project_names_may_cause_publication_to_fail +systemProp.org.gradle.dependency.duplicate.project.detection=false + +# Enforce the build to fail on deprecated gradle api usage +systemProp.org.gradle.warning.mode=fail + +# forcing to use TLS1.2 to avoid failure in vault +# see https://github.com/hashicorp/vault/issues/8750#issuecomment-631236121 +systemProp.jdk.tls.client.protocols=TLSv1.2 + +# jvm args for faster test execution by default +systemProp.tests.jvm.argline=-XX:TieredStopAtLevel=1 -XX:ReservedCodeCacheSize=64m \ No newline at end of file diff --git a/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.jar b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..d64cd49 Binary files /dev/null and b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.jar differ diff --git a/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.properties b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..707e21e --- /dev/null +++ b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/opensearch-hebrew-analyser/gradlew b/opensearch-hebrew-analyser/gradlew new file mode 100755 index 0000000..1aa94a4 --- /dev/null +++ b/opensearch-hebrew-analyser/gradlew @@ -0,0 +1,249 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/opensearch-hebrew-analyser/gradlew.bat b/opensearch-hebrew-analyser/gradlew.bat new file mode 100644 index 0000000..6689b85 --- /dev/null +++ b/opensearch-hebrew-analyser/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/Archive.zip b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/Archive.zip similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/Archive.zip rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/Archive.zip diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/LICENSE.txt b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/LICENSE.txt similarity index 100% rename from Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/LICENSE.txt rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/LICENSE.txt diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties similarity index 75% rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties index 161bf75..d7f5d97 100644 --- a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties +++ b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties @@ -1,7 +1,7 @@ version=8.5.3 name=elasticsearch-hebrew-ngram-3 description=elasticsearch-analysis-hebrew -classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin +classname=com.hotstar.hebrew.plugin.HebrewAnalyzerOsPlugin java.version=17 elasticsearch.version=8.5.3 extended.plugins= diff --git a/opensearch-hebrew-analyser/settings.gradle b/opensearch-hebrew-analyser/settings.gradle new file mode 100644 index 0000000..b886efc --- /dev/null +++ b/opensearch-hebrew-analyser/settings.gradle @@ -0,0 +1,18 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * This project uses @Incubating APIs which are subject to change. + */ + +pluginManagement { + repositories{ + mavenLocal() + mavenCentral() + google() + maven{ + url "https://plugins.gradle.org/m2/" + } + gradlePluginPortal() + } +} +rootProject.name = "opensearch-analysis-hebrew" diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewNgramAnalyzer.java similarity index 96% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewNgramAnalyzer.java index 798ba6e..503569f 100644 --- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewNgramAnalyzer.java @@ -10,7 +10,7 @@ import java.util.regex.Pattern; -public class HebrewAnalyzer extends Analyzer { +public class HebrewNgramAnalyzer extends Analyzer { /* This is the only function that we need to override for our analyzer. * It takes in a java.io.Reader object and saves the tokenizer and list diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewSemiExactAnalyzer.java similarity index 95% rename from Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewSemiExactAnalyzer.java index f97db77..703b6a6 100644 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewSemiExactAnalyzer.java @@ -10,7 +10,7 @@ import java.util.regex.Pattern; -public class HebrewAnalyzer extends Analyzer { +public class HebrewSemiExactAnalyzer extends Analyzer { /* This is the only function that we need to override for our analyzer. * It takes in a java.io.Reader object and saves the tokenizer and list diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java similarity index 89% rename from Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java index 7096342..78e7bf9 100644 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java @@ -1,10 +1,11 @@ package com.hotstar.hebrew.analysis; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import java.io.IOException; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +import java.io.IOException; import java.util.ArrayList; import java.util.List; diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java similarity index 100% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java diff --git a/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerOsPlugin.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerOsPlugin.java new file mode 100644 index 0000000..6a5e706 --- /dev/null +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerOsPlugin.java @@ -0,0 +1,38 @@ +package com.hotstar.hebrew.plugin; +import org.apache.lucene.analysis.Analyzer; +import org.opensearch.index.analysis.AnalyzerProvider; +import org.opensearch.index.analysis.TokenFilterFactory; +import org.opensearch.index.analysis.TokenizerFactory; +import org.opensearch.indices.analysis.AnalysisModule; +import org.opensearch.plugins.AnalysisPlugin; +import org.opensearch.plugins.Plugin; + +import java.util.HashMap; +import java.util.Map; + +public class HebrewAnalyzerOsPlugin extends Plugin implements AnalysisPlugin { + + @Override + public Map> getTokenFilters() { + Map> tokenFilters = new HashMap<>(); + tokenFilters.put("hebrew_stop", HebrewNoOpTokenFilterFactory::new); + tokenFilters.put("hebrew_word", HebrewNoOpTokenFilterFactory::new); + return tokenFilters; + } + + @Override + public Map> getTokenizers() { + Map> extra = new HashMap<>(); + extra.put("hebrew_tokenizer", HebrewTokenizerTokenizerFactory::new); + extra.put("hebrew_sentence", HebrewTokenizerTokenizerFactory::new); + return extra; + } + + @Override + public Map>> getAnalyzers() { + Map>> analyser = new HashMap<>(); + analyser.put("hebrew-ngram-3-analyzer", HebrewNgramAnalyzerProvider::new); + analyser.put("hebrew_semi_exact_analyzer", HebrewSemiExactAnalyzerProvider::new); + return analyser; + } +} diff --git a/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNgramAnalyzerProvider.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNgramAnalyzerProvider.java new file mode 100644 index 0000000..b8123b1 --- /dev/null +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNgramAnalyzerProvider.java @@ -0,0 +1,29 @@ +package com.hotstar.hebrew.plugin; + + +import com.hotstar.hebrew.analysis.HebrewNgramAnalyzer; +import org.opensearch.common.settings.Settings; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider; + + +public class HebrewNgramAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + /* Constructor. Nothing special here. */ + public HebrewNgramAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + super(indexSettings, name, settings); + analyzer = new HebrewNgramAnalyzer(); + } + + /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */ + public HebrewNgramAnalyzer get() { + return this.analyzer; + } + + /* Instance of PlusSignAnalyzer class that is returned by this class. */ + protected HebrewNgramAnalyzer analyzer; + + /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */ + public static final String NAME = "hebrew-ngram-3-analyzer"; +} diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java similarity index 62% rename from Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java index 4a73a41..4f6642d 100644 --- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java @@ -2,14 +2,14 @@ import org.apache.lucene.analysis.TokenStream; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.opensearch.common.settings.Settings; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.AbstractTokenFilterFactory; public class HebrewNoOpTokenFilterFactory extends AbstractTokenFilterFactory { public HebrewNoOpTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(name, settings); + super(indexSettings, name, settings); } @Override diff --git a/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewSemiExactAnalyzerProvider.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewSemiExactAnalyzerProvider.java new file mode 100644 index 0000000..20fba1b --- /dev/null +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewSemiExactAnalyzerProvider.java @@ -0,0 +1,29 @@ +package com.hotstar.hebrew.plugin; + + +import com.hotstar.hebrew.analysis.HebrewSemiExactAnalyzer; +import org.opensearch.common.settings.Settings; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider; + + +public class HebrewSemiExactAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + /* Constructor. Nothing special here. */ + public HebrewSemiExactAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + super(indexSettings, name, settings); + analyzer = new HebrewSemiExactAnalyzer(); + } + + /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */ + public HebrewSemiExactAnalyzer get() { + return this.analyzer; + } + + /* Instance of PlusSignAnalyzer class that is returned by this class. */ + protected HebrewSemiExactAnalyzer analyzer; + + /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */ + public static final String NAME = "hebrew_semi_exact_analyzer"; +} diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java similarity index 70% rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java index 0ce30ce..517723f 100644 --- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java +++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java @@ -1,11 +1,11 @@ package com.hotstar.hebrew.plugin; import org.apache.lucene.analysis.Tokenizer; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.AbstractTokenizerFactory; import com.hotstar.hebrew.analysis.SefariaTokenizer; +import org.opensearch.common.settings.Settings; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.AbstractTokenizerFactory; public class HebrewTokenizerTokenizerFactory extends AbstractTokenizerFactory {