diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..503dac2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+build
+opensearch-hebrew-analyser/.gradle
\ No newline at end of file
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/es-plugin.properties b/Hebrew-ElasticSearch-ngrams-3-words/es-plugin.properties
deleted file mode 100644
index 6ca3b56..0000000
--- a/Hebrew-ElasticSearch-ngrams-3-words/es-plugin.properties
+++ /dev/null
@@ -1 +0,0 @@
-plugin=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin
\ No newline at end of file
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/plugin-descriptor.properties b/Hebrew-ElasticSearch-ngrams-3-words/plugin-descriptor.properties
deleted file mode 100644
index 7d85f7c..0000000
--- a/Hebrew-ElasticSearch-ngrams-3-words/plugin-descriptor.properties
+++ /dev/null
@@ -1,7 +0,0 @@
-jvm=true
-name=elasticsearch-analysis-hebrew
-description=elasticsearch-analysis-hebrew
-classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin
-elasticsearch.version=8.5.3
-java.version=17
-version=1.0
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/pom.xml b/Hebrew-ElasticSearch-ngrams-3-words/pom.xml
deleted file mode 100644
index 66b0922..0000000
--- a/Hebrew-ElasticSearch-ngrams-3-words/pom.xml
+++ /dev/null
@@ -1,60 +0,0 @@
-
-
- 4.0.0
-
- com.hotstar
- elasticsearch-hebrew-ngram-3
- 8.5.3
- jar
- elasticsearch-analysis-hebrew
-
-
- UTF-8
- 7.4.0
- 8.5.3
- 1.0.0
- 1.8
- 1.8
-
-
-
-
- org.apache.lucene
- lucene-test-framework
- ${lucene.version}
- test
-
-
- org.apache.lucene
- lucene-core
- ${lucene.version}
- provided
-
-
- org.apache.lucene
- lucene-analyzers-common
- ${lucene.version}
- provided
-
-
- org.elasticsearch
- elasticsearch
- ${elasticsearch.version}
- provided
-
-
- com.google.guava
- guava
- 17.0
- provided
-
-
- junit
- junit
- 4.11
- test
-
-
-
\ No newline at end of file
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java b/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java
deleted file mode 100644
index cd5cb56..0000000
--- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package com.hotstar.hebrew.plugin;
-import org.elasticsearch.index.analysis.AnalyzerProvider;
-import org.elasticsearch.index.analysis.TokenFilterFactory;
-import org.elasticsearch.index.analysis.TokenizerFactory;
-import org.elasticsearch.plugins.AnalysisPlugin;
-import org.elasticsearch.plugins.Plugin;
-import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
-import org.apache.lucene.analysis.Analyzer;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import static java.util.Collections.singletonMap;
-
-public class HebrewAnalyzerEsPlugin extends Plugin implements AnalysisPlugin {
-
- @Override
- public Map> getTokenFilters() {
- Map> tokenFilters = new HashMap<>();
- tokenFilters.put("hebrew_stop", HebrewNoOpTokenFilterFactory::new);
- tokenFilters.put("hebrew_word", HebrewNoOpTokenFilterFactory::new);
- return tokenFilters;
- }
-
- @Override
- public Map> getTokenizers() {
- Map> extra = new HashMap<>();
- extra.put("hebrew_tokenizer", HebrewTokenizerTokenizerFactory::new);
- extra.put("hebrew_sentence", HebrewTokenizerTokenizerFactory::new);
- return extra;
- }
-
- @Override
- public Map>> getAnalyzers() {
- return singletonMap("hebrew-ngram-3-analyzer", HebrewAnalyzerProvider::new);
- }
-}
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java b/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java
deleted file mode 100644
index 1957c7f..0000000
--- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.hotstar.hebrew.plugin;
-
-
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
-import com.hotstar.hebrew.analysis.HebrewAnalyzer;
-
-
-public class HebrewAnalyzerProvider extends AbstractIndexAnalyzerProvider {
-
- /* Constructor. Nothing special here. */
- public HebrewAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
- super(name, settings);
- analyzer = new HebrewAnalyzer();
- }
-
- /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */
- public HebrewAnalyzer get() {
- return this.analyzer;
- }
-
- /* Instance of PlusSignAnalyzer class that is returned by this class. */
- protected HebrewAnalyzer analyzer;
-
- /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */
- public static final String NAME = "hebrew-ngram-3-analyzer";
-}
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java b/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java
deleted file mode 100644
index 4a73a41..0000000
--- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package com.hotstar.hebrew.plugin;
-
-
-import org.apache.lucene.analysis.TokenStream;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
-
-public class HebrewNoOpTokenFilterFactory extends AbstractTokenFilterFactory {
- public HebrewNoOpTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
- super(name, settings);
- }
-
- @Override
- public TokenStream create(TokenStream tokenStream) {
- return tokenStream;
- }
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/es-plugin.properties b/Hebrew-ElasticSearch-semi-exact/es-plugin.properties
deleted file mode 100644
index 6ca3b56..0000000
--- a/Hebrew-ElasticSearch-semi-exact/es-plugin.properties
+++ /dev/null
@@ -1 +0,0 @@
-plugin=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin
\ No newline at end of file
diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-semi-exact-analyzer-8.5.3.jar b/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-semi-exact-analyzer-8.5.3.jar
deleted file mode 100644
index cbf5760..0000000
Binary files a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-semi-exact-analyzer-8.5.3.jar and /dev/null differ
diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties b/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties
deleted file mode 100644
index 0dfbbea..0000000
--- a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties
+++ /dev/null
@@ -1,8 +0,0 @@
-version=8.5.3
-name=elasticsearch-hebrew
-description=elasticsearch-analysis-hebrew
-classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin
-java.version=17
-elasticsearch.version=8.5.3
-extended.plugins=
-has.native.controller=false
diff --git a/Hebrew-ElasticSearch-semi-exact/plugin-descriptor.properties b/Hebrew-ElasticSearch-semi-exact/plugin-descriptor.properties
deleted file mode 100644
index 7d85f7c..0000000
--- a/Hebrew-ElasticSearch-semi-exact/plugin-descriptor.properties
+++ /dev/null
@@ -1,7 +0,0 @@
-jvm=true
-name=elasticsearch-analysis-hebrew
-description=elasticsearch-analysis-hebrew
-classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin
-elasticsearch.version=8.5.3
-java.version=17
-version=1.0
diff --git a/Hebrew-ElasticSearch-semi-exact/pom.xml b/Hebrew-ElasticSearch-semi-exact/pom.xml
deleted file mode 100644
index ff58433..0000000
--- a/Hebrew-ElasticSearch-semi-exact/pom.xml
+++ /dev/null
@@ -1,60 +0,0 @@
-
-
- 4.0.0
-
- com.hotstar
- elasticsearch-hebrew-semi-exact-analyzer
- 8.5.3
- jar
- elasticsearch-hebrew-semi-exact-analyzer
-
-
- UTF-8
- 7.4.0
- 8.5.3
- 1.0.0
- 1.8
- 1.8
-
-
-
-
- org.apache.lucene
- lucene-test-framework
- ${lucene.version}
- test
-
-
- org.apache.lucene
- lucene-core
- ${lucene.version}
- provided
-
-
- org.apache.lucene
- lucene-analyzers-common
- ${lucene.version}
- provided
-
-
- org.elasticsearch
- elasticsearch
- ${elasticsearch.version}
- provided
-
-
- com.google.guava
- guava
- 17.0
- provided
-
-
- junit
- junit
- 4.11
- test
-
-
-
\ No newline at end of file
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/EmptyStringTokenFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/EmptyStringTokenFilter.java
deleted file mode 100644
index baecbd9..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/EmptyStringTokenFilter.java
+++ /dev/null
@@ -1,62 +0,0 @@
-package com.hotstar.hebrew.analysis;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import java.io.IOException;
-
-public class EmptyStringTokenFilter extends TokenFilter {
-
- /* The constructor for our custom token filter just calls the TokenFilter constructor; that
- * constructor saves the token stream in a variable named this.input.
- */
- public EmptyStringTokenFilter(TokenStream tokenStream) {
- super(tokenStream);
- }
-
- /* Like the PlusSignTokenizer class, we are going to save the text of the current token
- * in a CharTermAttribute object. In addition, we are going to use a
- * PositionIncrementAttribute object to store the position increment of the token. Lucene
- * uses this latter attribute to determine the position of a token. Given a token stream with
- * “This”, “is”, “”, “some”, and “text”, we are going to ensure that “This” is saved at
- * position 1, “is” at position 2, “some” at position 3, and “text” at position 4. Note that
- * we have completely ignored the empty string at what was position 3 in the original stream.
- */
- protected CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
- protected PositionIncrementAttribute positionIncrementAttribute =
- addAttribute(PositionIncrementAttribute.class);
-
- /* Like we did in the PlusSignTokenizer class, we need to override the incrementToken()
- * function to save the attributes of the current token. We are going to pass over any
- * tokens that are empty strings and save all others without modifying them. This function
- * should return true if a new token was generated and false if the last token was passed.
- */
- @Override
- public boolean incrementToken() throws IOException {
-
- // Loop over tokens in the token stream to find the next one that is not empty
- String nextToken = null;
- while (nextToken == null) {
-
- // Reached the end of the token stream being processed
- if ( ! this.input.incrementToken()) {
- return false;
- }
-
- // Get text of the current token and remove any leading/trailing whitespace.
- String currentTokenInStream =
- this.input.getAttribute(CharTermAttribute.class).toString().trim();
-
- // Save the token if it is not an empty string
- if (currentTokenInStream.length() > 0) {
- nextToken = currentTokenInStream;
- }
- }
-
- // Save the current token
- this.charTermAttribute.setEmpty().append(nextToken);
- this.positionIncrementAttribute.setPositionIncrement(1);
- return true;
- }
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java
deleted file mode 100644
index b091b8f..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package com.hotstar.hebrew.analysis;
-import org.apache.lucene.util.Attribute;
-
-/**
- * This attribute is used to pass info on tokens as parsed and identified
- * by the HebMorph tokenizer
- */
-public interface HebrewTokenTypeAttribute extends Attribute{
- enum HebrewType {
- Unknown
- }
-
- void setType(HebrewType type);
- HebrewType getType();
- boolean isExact();
- void setExact(boolean isExact);
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java
deleted file mode 100644
index 693fbc0..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package com.hotstar.hebrew.analysis;
-
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.AttributeReflector;
-
-/**
- * Created by Egozy on 19/04/2015.
- */
-public class HebrewTokenTypeAttributeImpl extends AttributeImpl implements HebrewTokenTypeAttribute {
- private HebrewType type = HebrewType.Unknown;
- private boolean isExact = false;
- public void setType(HebrewType type) {
- this.type = type;
- }
-
- public HebrewType getType() {
- return type;
- }
-
- public boolean isExact() {
- return isExact;
- }
-
- public void setExact(boolean isExact) {
- this.isExact = isExact;
- }
-
- public void clear() {
- type = HebrewType.Unknown;
- isExact = false;
- }
-
- @Override
- public void reflectWith(AttributeReflector reflector) {
- reflector.reflect(KeywordAttribute.class, "isExact", isExact);
- reflector.reflect(KeywordAttribute.class, "type", type);
- }
-
- public void copyTo(AttributeImpl target) {
- ((HebrewTokenTypeAttribute) target).setType(type);
- }
-}
-
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java
deleted file mode 100644
index 5682a3d..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package com.hotstar.hebrew.analysis;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Created by nss on 2/20/17.
- */
-public class NGramizer {
-
- private int n;
-
- public NGramizer(int n) {
- this.n = n;
- }
-
- public List ngramize(String s) {
- List ngrams = new ArrayList();
-
- for (int i = 0; i <= s.length() - n; i++) {
- ngrams.add(s.substring(i,i+n));
- }
-
- return ngrams;
- }
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java
deleted file mode 100644
index 45c7ee8..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package com.hotstar.hebrew.analysis;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
-
-public class PluralFilter extends TokenFilter {
-
- private CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
- private HebrewTokenTypeAttribute hebTokAttribute = addAttribute(HebrewTokenTypeAttribute.class);
- private PositionIncrementAttribute positionIncrementAttribute =
- addAttribute(PositionIncrementAttribute.class);
- private List previousTokens;
- private Pattern pluralPat;
-
- public PluralFilter(TokenStream tokenStream) {
- super(tokenStream);
- this.previousTokens = new ArrayList();
- this.pluralPat = Pattern.compile("(.{3,})(ים|ות)$");
- }
-
-
-
- @Override
- public boolean incrementToken() throws IOException {
-
- if (!previousTokens.isEmpty()) {
- this.charTermAttribute.setEmpty().append(previousTokens.remove(0));
- this.positionIncrementAttribute.setPositionIncrement(0);
- this.hebTokAttribute.setExact(false);
- return true;
- }
-
- // Loop over tokens in the token stream to find the next one that is not empty
- String nextToken = null;
- while (nextToken == null) {
-
- // Reached the end of the token stream being processed
- if ( ! this.input.incrementToken()) {
- return false;
- }
-
- // Get text of the current token and remove any leading/trailing whitespace.
- String currentTokenInStream =
- this.input.getAttribute(CharTermAttribute.class).toString().trim();
-
- // Save the token if it is not an empty string
- if (currentTokenInStream.length() > 0) {
- nextToken = currentTokenInStream;
- }
- }
-
- previousTokens.add(filterPlural(nextToken));
-
- // Save the current token
- this.charTermAttribute.setEmpty().append(nextToken).append('$');
- this.positionIncrementAttribute.setPositionIncrement(1);
- this.hebTokAttribute.setExact(true);
- return true;
- }
-
- private String filterPlural(String in) {
- return in.replaceFirst("(.{3,})(ים|ות)$","$1");
- }
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java
deleted file mode 100644
index 3c094ae..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-package com.hotstar.hebrew.analysis;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-public class SefariaNGramTokenFilter extends TokenFilter {
-
- public static final char FINAL_CHAR = '$';
-
- private NGramizer ngramizer;
-
-
- public SefariaNGramTokenFilter(TokenStream tokenStream, int n) {
- super(tokenStream);
- ngramizer = new NGramizer(n);
- }
- private CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
- private HebrewTokenTypeAttribute hebTokAttribute = addAttribute(HebrewTokenTypeAttribute.class);
- private PositionIncrementAttribute positionIncrementAttribute =
- addAttribute(PositionIncrementAttribute.class);
-
- private List previousTokens = new ArrayList();
-
-
- @Override
- public boolean incrementToken() throws IOException {
-
- if (savePrevToken())
- return true;
-
- // Reached the end of the token stream being processed
- if ( ! this.input.incrementToken()) {
- return false;
- }
-
- // Get text of the current token and remove any leading/trailing whitespace.
- String currToken =
- this.input.getAttribute(CharTermAttribute.class).toString().trim();
-
- if (! hebTokAttribute.isExact()) {
- List ngrams = ngramizer.ngramize(currToken);
- for (String ngram : ngrams) {
- previousTokens.add(ngram);
- }
-
- savePrevToken();
- }
-
- return true;
- }
-
- private boolean savePrevToken() {
- if (!previousTokens.isEmpty()) {
- this.charTermAttribute.setEmpty();
- this.charTermAttribute.append(previousTokens.remove(0));
- this.positionIncrementAttribute.setPositionIncrement(0);
- this.hebTokAttribute.setExact(false);
- return true;
- } else {
- return false;
- }
- }
-}
-
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java
deleted file mode 100644
index 20fcfa3..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java
+++ /dev/null
@@ -1,98 +0,0 @@
-package com.hotstar.hebrew.analysis;
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import java.io.IOException;
-import java.io.Reader;
-
-public class SefariaTokenizer extends Tokenizer {
-
- /* Lucene uses attributes to store information about a single token. For this tokenizer, the
- * only attribute that we are going to use is the CharTermAttribute, which can store the text
- * for the token that is generated. Other types of attributes exist (see interfaces and
- * classes derived from org.apache.lucene.util.Attribute); we will use some of these other
- * attributes when we build our custom token filter. It is important that you register
- * attributes, whatever their type, using the addAttribute() function.
- */
- protected CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
-
- /* This is the important function to override from the Tokenizer class. At each call, it
- * should set the value of this.charTermAttribute to the text of the next token. It returns
- * true if a new token is generated and false if there are no more tokens remaining.
- */
- @Override
- public boolean incrementToken() throws IOException {
-
- // Clear anything that is already saved in this.charTermAttribute
- this.charTermAttribute.setEmpty();
-
- // Get the position of the next + symbol
- int nextIndex = this.stringToTokenize.indexOf('+', this.position);
-
- // Execute this block if a plus symbol was found. Save the token and the
- // position to start at when incrementToken() is next called.
- if (nextIndex != -1) {
- String nextToken = this.stringToTokenize.substring(this.position, nextIndex);
- this.charTermAttribute.append(nextToken);
- this.position = nextIndex + 1;
- return true;
- }
-
- // Execute this block if no more + signs are found, but there is still some text
- // remaining in the string. For example, this saves “text” in “This+is++some+text”.
- else if (this.position < this.stringToTokenize.length()) {
- String nextToken = this.stringToTokenize.substring(this.position);
- this.charTermAttribute.append(nextToken);
- this.position = this.stringToTokenize.length();
- return true;
- }
-
- // Execute this block if no more tokens exist in the string.
- else {
- return false;
- }
- }
-
- /* This is the constructor for our custom tokenizer class. It takes all information from a
- * java.io.Reader object and stores it in a string. If you are expecting very large blocks of
- * text, you might want to think about using a buffer and saving chunks from the reader
- * whenever incrementToken() is called. This function throws a RuntimeException when an
- * IOException is found - you can choose how you want to deal with the IOException, but
- * for our purposes, we do not need to try to recover from it.
- */
- public SefariaTokenizer() {
- super();
- }
-
- /* Reset the stored position for this object when reset() is called.
- */
- @Override
- public void reset() throws IOException {
- super.reset();
- this.position = 0;
-
- int numChars;
- char[] buffer = new char[1024];
- StringBuilder stringBuilder = new StringBuilder();
-
- try {
- while ((numChars = this.input.read(buffer, 0, buffer.length)) != -1) {
- stringBuilder.append(buffer, 0, numChars);
- }
- }
- catch (IOException e) {
- throw new RuntimeException(e);
- }
- this.stringToTokenize = stringBuilder.toString();
- }
-
- /* This object stores the string that we are turning into tokens. We will process its content
- * as we call the incrementToken() function.
- */
- protected String stringToTokenize;
-
- /* This stores the current position in this.stringToTokenize. We will increment its value as
- * we call the incrementToken() function.
- */
- protected int position = 0;
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java
deleted file mode 100644
index 8f5754b..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package com.hotstar.hebrew.analysis;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-import java.io.IOException;
-
-public class StopLetterFilter extends TokenFilter {
-
- private CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
- private HebrewTokenTypeAttribute hebTokAttribute = addAttribute(HebrewTokenTypeAttribute.class);
- private PositionIncrementAttribute positionIncrementAttribute =
- addAttribute(PositionIncrementAttribute.class);
- private String stopLettersPat;
-
- public StopLetterFilter(TokenStream tokenStream, char[] stopLetters) {
- super(tokenStream);
- this.stopLettersPat = "[";
- for (char ch : stopLetters) {
- stopLettersPat += ch;
- }
- this.stopLettersPat += "]";
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- // Reached the end of the token stream being processed
- if ( ! this.input.incrementToken()) {
- return false;
- }
-
- String currToken =
- this.input.getAttribute(CharTermAttribute.class).toString().trim();
- if ( ! this.hebTokAttribute.isExact()) {
- this.charTermAttribute.setEmpty().append(filterStopLetters(currToken));
- this.hebTokAttribute.setExact(false);
- }
-
- return true;
- }
-
- private String filterStopLetters(String in) {
- return in.replaceAll(this.stopLettersPat, "");
- }
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java
deleted file mode 100644
index a09a369..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerEsPlugin.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.hotstar.hebrew.plugin;
-import org.elasticsearch.index.analysis.AnalyzerProvider;
-import org.elasticsearch.index.analysis.TokenFilterFactory;
-import org.elasticsearch.index.analysis.TokenizerFactory;
-import org.elasticsearch.plugins.AnalysisPlugin;
-import org.elasticsearch.plugins.Plugin;
-import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
-import org.apache.lucene.analysis.Analyzer;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import static java.util.Collections.singletonMap;
-
-public class HebrewAnalyzerEsPlugin extends Plugin implements AnalysisPlugin {
-
- @Override
- public Map> getTokenFilters() {
- Map> tokenFilters = new HashMap<>();
- return tokenFilters;
- }
-
- @Override
- public Map> getTokenizers() {
- Map> extra = new HashMap<>();
- return extra;
- }
-
- @Override
- public Map>> getAnalyzers() {
- return singletonMap("hebrew_semi_exact_analyzer", HebrewAnalyzerProvider::new);
- }
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java
deleted file mode 100644
index b98f730..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerProvider.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.hotstar.hebrew.plugin;
-
-
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
-import com.hotstar.hebrew.analysis.HebrewAnalyzer;
-
-
-public class HebrewAnalyzerProvider extends AbstractIndexAnalyzerProvider {
-
- /* Constructor. Nothing special here. */
- public HebrewAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
- super(name, settings);
- analyzer = new HebrewAnalyzer();
- }
-
- /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */
- public HebrewAnalyzer get() {
- return this.analyzer;
- }
-
- /* Instance of PlusSignAnalyzer class that is returned by this class. */
- protected HebrewAnalyzer analyzer;
-
- /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */
- public static final String NAME = "hebrew_semi_exact_analyzer";
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java
deleted file mode 100644
index 7010fb3..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package com.hotstar.hebrew.plugin;
-
-import com.hotstar.hebrew.analysis.SefariaTokenizer;
-import org.apache.lucene.analysis.Tokenizer;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
-
-
-public class HebrewTokenizerTokenizerFactory extends AbstractTokenizerFactory {
- public HebrewTokenizerTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
- super(indexSettings, settings, name);
- }
-
- @Override
- public Tokenizer create() {
- return new SefariaTokenizer();
- }
-}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/SefariaBinderProcessor.java b/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/SefariaBinderProcessor.java
deleted file mode 100644
index 69a6f02..0000000
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/SefariaBinderProcessor.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.hotstar.hebrew.plugin;
-
-import org.elasticsearch.indices.analysis.AnalysisModule;;
-
-//public class SefariaBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
-//
-// /* This is the only function that you need. It simply adds our PlusSignAnalyzerProvider class
-// * to a list of bindings.
-// */
-// @Override
-// public void processAnalyzers(AnalyzersBindings analyzersBindings) {
-// analyzersBindings.processAnalyzer(SefariaAnalyzerProvider.NAME,
-// SefariaAnalyzerProvider.class);
-// }
-//}
diff --git a/README.md b/README.md
index 480e4ad..44efbb4 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,15 @@
# Hebrew-Analyzers
-This is a project that provides language analyzer plugins for Hebrew on different search engines such as ElasticSearch/OpenSearch.
+This is a project that provides language analyzer plugins for Hebrew on search engine OpenSearch.
## How to Install a Plugin
-This repository contains several plugins. Each folder in the root directory represents a separate plugin. To install a plugin:
-For example, on ElasticSearch:
-
-First, locate the bin folder in your ES installation (referred to as $ES_BIN hereafter). This folder can be found in one of two places:
-
-If you installed ES as a service: `/usr/share/elasticsearch/bin`
-If you downloaded the source: `$SRC_ROOT/bin` https://www.elastic.co/downloads/elasticsearch
-Navigate to the plugins directory:
-
-`Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew`
-`Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew`
## What is an OpenSearch/ElasticSearch Plugin?
OpenSearch/ElasticSearch plugins are a way to enhance the basic functionality of Elasticsearch in a customized manner. They can include custom mapping types, custom analyzers (in a more built-in fashion), custom script engines, custom discovery, and more.
-## How to Write a Customized Plugin?
-To write a customized plugin, you can follow these steps:
-
-Download an official analysis plugin that matches your ES version. For example, if you want to upload a plugin to your ElasticSearch 8.5.3 version cluster, it is recommended to download a local package of version 8.5.3 and test it locally. You can download the package from this link: ElasticSearch Downloads
-
-After downloading the ElasticSearch package, install it. Then, run the following command to check if it is installed successfully: `${YOUR_ES_DOWNLOAD_PATH}/bin/elasticsearch`
-
-Examine an official plugin to understand its implementation. For example, the smartcn plugin https://www.elastic.co/guide/en/elasticsearch/plugins/8.6/analysis-smartcn.html is a language analysis plugin that has similar functionality to the Hebrew analyzer. Install this plugin using the command: `sudo ${YOUR_ES_DOWNLOAD_PATH}/bin/elasticsearch-plugin install analysis-smartcn`. You will find the plugin installed at `${YOUR_ES_DOWNLOAD_PATH}/plugins/analysis-smartcn`.
-
-Under `${YOUR_ES_DOWNLOAD_PATH}/plugins/analysis-smartcn`, you will find a file named plugin-descriptor.properties. This file is a configuration file that defines the execution file. For detailed usage, you can refer to this example: `plugin-descriptor.properties`.
-
-Write your own plugins and build them into a JAR file. This step will be customized based on your requirements.
-
-Test the plugin locally and upload it to your ES cluster. Make sure you have the necessary permissions to upload plugins to the ES cluster.
-
-Please note that the above instructions assume a basic understanding of ElasticSearch and plugin development.
## Examples:
| Text | Analyzer | Tokens |
|-----------------------------|---------------|-----------------------------------------------------------------|
| הַכֹּחַ הַאֱמוּנָה יְכוֹל לְהַזְזִים הָרֵים | ngram-3-words | הכח$,הכח,האמונה$,האמ,אמנ,מנה,יכול$,כל,להזזים$,להז,הזז,הרים$,הרם |
| הַכֹּחַ הַאֱמוּנָה יְכוֹל לְהַזְזִים הָרֵים | semi-exact | הכח$,הכח,האמונה$,האמונה,יכול$,יכול,להזזים$,להזזים,הרים$,הרים |
-
-## Reference:
-https://github.com/Sefaria/Sefaria-ElasticSearch
-https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugin-authors.html
\ No newline at end of file
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/LICENSE.txt b/opensearch-hebrew-analyser/LICENSE.txt
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/LICENSE.txt
rename to opensearch-hebrew-analyser/LICENSE.txt
diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/Archive.zip b/opensearch-hebrew-analyser/NOTICE.txt
similarity index 100%
rename from Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/Archive.zip
rename to opensearch-hebrew-analyser/NOTICE.txt
diff --git a/opensearch-hebrew-analyser/build.gradle b/opensearch-hebrew-analyser/build.gradle
new file mode 100644
index 0000000..8637d1c
--- /dev/null
+++ b/opensearch-hebrew-analyser/build.gradle
@@ -0,0 +1,104 @@
+buildscript {
+ repositories {
+ mavenLocal()
+ mavenCentral()
+ maven{
+ url "https://plugins.gradle.org/m2/"
+ }
+ }
+ dependencies {
+ classpath "org.opensearch.gradle:build-tools:${opensearchVersion}"
+ classpath "org.opensearch:opensearch-core:${opensearchVersion}"
+ classpath "org.opensearch:opensearch-common:${opensearchVersion}"
+ }
+
+}
+
+group = 'com.hotstar'
+version = "${opensearchVersion}"
+
+apply plugin: 'java'
+apply plugin: 'idea'
+apply plugin: 'opensearch.opensearchplugin'
+
+opensearchplugin {
+ name 'opensearch-analysis-hebrew'
+ description 'The Hebrew Analysis plugin module for opensearch.'
+ classname 'com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin'
+ licenseFile = rootProject.file('LICENSE.txt')
+ noticeFile = rootProject.file('NOTICE.txt')
+}
+
+
+jar {
+ archiveBaseName.set(rootProject.name)
+}
+
+javadocJar {
+ archiveBaseName.set(rootProject.name)
+}
+
+sourcesJar {
+ archiveBaseName.set(rootProject.name)
+}
+
+
+dependencies {
+
+ testImplementation "org.apache.lucene:lucene-test-framework:${luceneVersion}"
+ testImplementation "junit:junit:4.13.2"
+ implementation "org.apache.lucene:lucene-core:${luceneVersion}"
+ implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}"
+ compileOnly "org.opensearch:opensearch:${opensearchVersion}"
+ //implementation "com.google.guava:guava:17.0"
+}
+
+forbiddenPatterns {
+ forbiddenPatterns.enabled = false;
+}
+
+licenseHeaders {
+ licenseHeaders.enabled = false
+}
+
+dependencyLicenses {
+ dependencyLicenses.enabled = false
+}
+
+validateNebulaPom {
+ validateNebulaPom.enabled = false
+}
+
+thirdPartyAudit {
+ thirdPartyAudit.enabled = false
+}
+
+loggerUsageCheck {
+ loggerUsageCheck.enabled = false
+}
+
+test {
+ systemProperty 'tests.security.manager', 'false'
+}
+
+testingConventions {
+ testingConventions.enabled = false
+}
+
+javadoc {
+ enabled = false
+}
+
+task release(type: Copy, group: 'build') {
+ dependsOn assemble
+ project.logger.debug("Copying plugin zip to plugin directory")
+ from(bundlePlugin.outputs.files.getSingleFile())
+ into "../build/plugins/${pluginName}/"
+ includeEmptyDirs = false
+}
+
+tasks.register('integTest', Test) {
+}
+
+
+
diff --git a/opensearch-hebrew-analyser/es-plugin.properties b/opensearch-hebrew-analyser/es-plugin.properties
new file mode 100644
index 0000000..cde669a
--- /dev/null
+++ b/opensearch-hebrew-analyser/es-plugin.properties
@@ -0,0 +1 @@
+plugin=com.hotstar.hebrew.plugin.HebrewAnalyzerOsPlugin
\ No newline at end of file
diff --git a/opensearch-hebrew-analyser/gradle.properties b/opensearch-hebrew-analyser/gradle.properties
new file mode 100644
index 0000000..ed326c0
--- /dev/null
+++ b/opensearch-hebrew-analyser/gradle.properties
@@ -0,0 +1,21 @@
+opensearchVersion=2.11.0
+luceneVersion=9.7.0
+pluginName = opensearch-analysis-hebrew
+org.gradle.warning.mode=none
+org.gradle.parallel=true
+org.gradle.jvmargs=-Xmx3g -XX:+HeapDumpOnOutOfMemoryError -Xss2m
+options.forkOptions.memoryMaximumSize=2g
+
+# Disable duplicate project id detection
+# See https://docs.gradle.org/current/userguide/upgrading_version_6.html#duplicate_project_names_may_cause_publication_to_fail
+systemProp.org.gradle.dependency.duplicate.project.detection=false
+
+# Enforce the build to fail on deprecated gradle api usage
+systemProp.org.gradle.warning.mode=fail
+
+# forcing to use TLS1.2 to avoid failure in vault
+# see https://github.com/hashicorp/vault/issues/8750#issuecomment-631236121
+systemProp.jdk.tls.client.protocols=TLSv1.2
+
+# jvm args for faster test execution by default
+systemProp.tests.jvm.argline=-XX:TieredStopAtLevel=1 -XX:ReservedCodeCacheSize=64m
\ No newline at end of file
diff --git a/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.jar b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000..d64cd49
Binary files /dev/null and b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.properties b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000..707e21e
--- /dev/null
+++ b/opensearch-hebrew-analyser/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,7 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip
+networkTimeout=10000
+validateDistributionUrl=true
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/opensearch-hebrew-analyser/gradlew b/opensearch-hebrew-analyser/gradlew
new file mode 100755
index 0000000..1aa94a4
--- /dev/null
+++ b/opensearch-hebrew-analyser/gradlew
@@ -0,0 +1,249 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+#
+# Gradle start up script for POSIX generated by Gradle.
+#
+# Important for running:
+#
+# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+# noncompliant, but you have some other compliant shell such as ksh or
+# bash, then to run this script, type that shell name before the whole
+# command line, like:
+#
+# ksh Gradle
+#
+# Busybox and similar reduced shells will NOT work, because this script
+# requires all of these POSIX shell features:
+# * functions;
+# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+# * compound commands having a testable exit status, especially «case»;
+# * various built-in commands including «command», «set», and «ulimit».
+#
+# Important for patching:
+#
+# (2) This script targets any POSIX shell, so it avoids extensions provided
+# by Bash, Ksh, etc; in particular arrays are avoided.
+#
+# The "traditional" practice of packing multiple parameters into a
+# space-separated string is a well documented source of bugs and security
+# problems, so this is (mostly) avoided, by progressively accumulating
+# options in "$@", and eventually passing that to Java.
+#
+# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+# see the in-line comments for details.
+#
+# There are tweaks for specific operating systems such as AIX, CygWin,
+# Darwin, MinGW, and NonStop.
+#
+# (3) This script is generated from the Groovy template
+# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+# within the Gradle project.
+#
+# You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+ APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
+ [ -h "$app_path" ]
+do
+ ls=$( ls -ld "$app_path" )
+ link=${ls#*' -> '}
+ case $link in #(
+ /*) app_path=$link ;; #(
+ *) app_path=$APP_HOME$link ;;
+ esac
+done
+
+# This is normally unused
+# shellcheck disable=SC2034
+APP_BASE_NAME=${0##*/}
+# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
+APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+ echo "$*"
+} >&2
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in #(
+ CYGWIN* ) cygwin=true ;; #(
+ Darwin* ) darwin=true ;; #(
+ MSYS* | MINGW* ) msys=true ;; #(
+ NONSTOP* ) nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD=$JAVA_HOME/jre/sh/java
+ else
+ JAVACMD=$JAVA_HOME/bin/java
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD=java
+ if ! command -v java >/dev/null 2>&1
+ then
+ die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+ case $MAX_FD in #(
+ max*)
+ # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
+ # shellcheck disable=SC2039,SC3045
+ MAX_FD=$( ulimit -H -n ) ||
+ warn "Could not query maximum file descriptor limit"
+ esac
+ case $MAX_FD in #(
+ '' | soft) :;; #(
+ *)
+ # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
+ # shellcheck disable=SC2039,SC3045
+ ulimit -n "$MAX_FD" ||
+ warn "Could not set maximum file descriptor limit to $MAX_FD"
+ esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+# * args from the command line
+# * the main class name
+# * -classpath
+# * -D...appname settings
+# * --module-path (only if needed)
+# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+ APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+ CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+ JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ for arg do
+ if
+ case $arg in #(
+ -*) false ;; # don't mess with options #(
+ /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
+ [ -e "$t" ] ;; #(
+ *) false ;;
+ esac
+ then
+ arg=$( cygpath --path --ignore --mixed "$arg" )
+ fi
+ # Roll the args list around exactly as many times as the number of
+ # args, so each arg winds up back in the position where it started, but
+ # possibly modified.
+ #
+ # NB: a `for` loop captures its iteration list before it begins, so
+ # changing the positional parameters here affects neither the number of
+ # iterations, nor the values presented in `arg`.
+ shift # remove old arg
+ set -- "$@" "$arg" # push replacement arg
+ done
+fi
+
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Collect all arguments for the java command:
+# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
+# and any embedded shellness will be escaped.
+# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
+# treated as '${Hostname}' itself on the command line.
+
+set -- \
+ "-Dorg.gradle.appname=$APP_BASE_NAME" \
+ -classpath "$CLASSPATH" \
+ org.gradle.wrapper.GradleWrapperMain \
+ "$@"
+
+# Stop when "xargs" is not available.
+if ! command -v xargs >/dev/null 2>&1
+then
+ die "xargs is not available"
+fi
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+# set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+ printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+ xargs -n1 |
+ sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+ tr '\n' ' '
+ )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/opensearch-hebrew-analyser/gradlew.bat b/opensearch-hebrew-analyser/gradlew.bat
new file mode 100644
index 0000000..6689b85
--- /dev/null
+++ b/opensearch-hebrew-analyser/gradlew.bat
@@ -0,0 +1,92 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%"=="" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%"=="" set DIRNAME=.
+@rem This is normally unused
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if %ERRORLEVEL% equ 0 goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if %ERRORLEVEL% equ 0 goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+set EXIT_CODE=%ERRORLEVEL%
+if %EXIT_CODE% equ 0 set EXIT_CODE=1
+if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
+exit /b %EXIT_CODE%
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/Archive.zip b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/Archive.zip
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/Archive.zip
rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/Archive.zip
diff --git a/Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/LICENSE.txt b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/LICENSE.txt
similarity index 100%
rename from Hebrew-ElasticSearch-semi-exact/out/artifacts/elasticsearch-hebrew/LICENSE.txt
rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/LICENSE.txt
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar
rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/elasticsearch-hebrew-ngram-3-8.5.3.jar
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties
similarity index 75%
rename from Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties
rename to opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties
index 161bf75..d7f5d97 100644
--- a/Hebrew-ElasticSearch-ngrams-3-words/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties
+++ b/opensearch-hebrew-analyser/out/artifacts/elasticsearch-hebrew/plugin-descriptor.properties
@@ -1,7 +1,7 @@
version=8.5.3
name=elasticsearch-hebrew-ngram-3
description=elasticsearch-analysis-hebrew
-classname=com.hotstar.hebrew.plugin.HebrewAnalyzerEsPlugin
+classname=com.hotstar.hebrew.plugin.HebrewAnalyzerOsPlugin
java.version=17
elasticsearch.version=8.5.3
extended.plugins=
diff --git a/opensearch-hebrew-analyser/settings.gradle b/opensearch-hebrew-analyser/settings.gradle
new file mode 100644
index 0000000..b886efc
--- /dev/null
+++ b/opensearch-hebrew-analyser/settings.gradle
@@ -0,0 +1,18 @@
+/*
+ * This file was generated by the Gradle 'init' task.
+ *
+ * This project uses @Incubating APIs which are subject to change.
+ */
+
+pluginManagement {
+ repositories{
+ mavenLocal()
+ mavenCentral()
+ google()
+ maven{
+ url "https://plugins.gradle.org/m2/"
+ }
+ gradlePluginPortal()
+ }
+}
+rootProject.name = "opensearch-analysis-hebrew"
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewNgramAnalyzer.java
similarity index 96%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewNgramAnalyzer.java
index 798ba6e..503569f 100644
--- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewNgramAnalyzer.java
@@ -10,7 +10,7 @@
import java.util.regex.Pattern;
-public class HebrewAnalyzer extends Analyzer {
+public class HebrewNgramAnalyzer extends Analyzer {
/* This is the only function that we need to override for our analyzer.
* It takes in a java.io.Reader object and saves the tokenizer and list
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewSemiExactAnalyzer.java
similarity index 95%
rename from Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewSemiExactAnalyzer.java
index f97db77..703b6a6 100644
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/HebrewAnalyzer.java
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewSemiExactAnalyzer.java
@@ -10,7 +10,7 @@
import java.util.regex.Pattern;
-public class HebrewAnalyzer extends Analyzer {
+public class HebrewSemiExactAnalyzer extends Analyzer {
/* This is the only function that we need to override for our analyzer.
* It takes in a java.io.Reader object and saves the tokenizer and list
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttribute.java
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/HebrewTokenTypeAttributeImpl.java
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/NGramizer.java
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/PluralFilter.java
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaNGramTokenFilter.java
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java
similarity index 89%
rename from Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java
index 7096342..78e7bf9 100644
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaSemiExactFilter.java
@@ -1,10 +1,11 @@
package com.hotstar.hebrew.analysis;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import java.io.IOException;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/SefariaTokenizer.java
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java
similarity index 100%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/analysis/StopLetterFilter.java
diff --git a/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerOsPlugin.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerOsPlugin.java
new file mode 100644
index 0000000..6a5e706
--- /dev/null
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewAnalyzerOsPlugin.java
@@ -0,0 +1,38 @@
+package com.hotstar.hebrew.plugin;
+import org.apache.lucene.analysis.Analyzer;
+import org.opensearch.index.analysis.AnalyzerProvider;
+import org.opensearch.index.analysis.TokenFilterFactory;
+import org.opensearch.index.analysis.TokenizerFactory;
+import org.opensearch.indices.analysis.AnalysisModule;
+import org.opensearch.plugins.AnalysisPlugin;
+import org.opensearch.plugins.Plugin;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class HebrewAnalyzerOsPlugin extends Plugin implements AnalysisPlugin {
+
+ @Override
+ public Map> getTokenFilters() {
+ Map> tokenFilters = new HashMap<>();
+ tokenFilters.put("hebrew_stop", HebrewNoOpTokenFilterFactory::new);
+ tokenFilters.put("hebrew_word", HebrewNoOpTokenFilterFactory::new);
+ return tokenFilters;
+ }
+
+ @Override
+ public Map> getTokenizers() {
+ Map> extra = new HashMap<>();
+ extra.put("hebrew_tokenizer", HebrewTokenizerTokenizerFactory::new);
+ extra.put("hebrew_sentence", HebrewTokenizerTokenizerFactory::new);
+ return extra;
+ }
+
+ @Override
+ public Map>> getAnalyzers() {
+ Map>> analyser = new HashMap<>();
+ analyser.put("hebrew-ngram-3-analyzer", HebrewNgramAnalyzerProvider::new);
+ analyser.put("hebrew_semi_exact_analyzer", HebrewSemiExactAnalyzerProvider::new);
+ return analyser;
+ }
+}
diff --git a/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNgramAnalyzerProvider.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNgramAnalyzerProvider.java
new file mode 100644
index 0000000..b8123b1
--- /dev/null
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNgramAnalyzerProvider.java
@@ -0,0 +1,29 @@
+package com.hotstar.hebrew.plugin;
+
+
+import com.hotstar.hebrew.analysis.HebrewNgramAnalyzer;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.env.Environment;
+import org.opensearch.index.IndexSettings;
+import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider;
+
+
+public class HebrewNgramAnalyzerProvider extends AbstractIndexAnalyzerProvider {
+
+ /* Constructor. Nothing special here. */
+ public HebrewNgramAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ super(indexSettings, name, settings);
+ analyzer = new HebrewNgramAnalyzer();
+ }
+
+ /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */
+ public HebrewNgramAnalyzer get() {
+ return this.analyzer;
+ }
+
+ /* Instance of PlusSignAnalyzer class that is returned by this class. */
+ protected HebrewNgramAnalyzer analyzer;
+
+ /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */
+ public static final String NAME = "hebrew-ngram-3-analyzer";
+}
diff --git a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java
similarity index 62%
rename from Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java
index 4a73a41..4f6642d 100644
--- a/Hebrew-ElasticSearch-semi-exact/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewNoOpTokenFilterFactory.java
@@ -2,14 +2,14 @@
import org.apache.lucene.analysis.TokenStream;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.env.Environment;
+import org.opensearch.index.IndexSettings;
+import org.opensearch.index.analysis.AbstractTokenFilterFactory;
public class HebrewNoOpTokenFilterFactory extends AbstractTokenFilterFactory {
public HebrewNoOpTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
- super(name, settings);
+ super(indexSettings, name, settings);
}
@Override
diff --git a/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewSemiExactAnalyzerProvider.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewSemiExactAnalyzerProvider.java
new file mode 100644
index 0000000..20fba1b
--- /dev/null
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewSemiExactAnalyzerProvider.java
@@ -0,0 +1,29 @@
+package com.hotstar.hebrew.plugin;
+
+
+import com.hotstar.hebrew.analysis.HebrewSemiExactAnalyzer;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.env.Environment;
+import org.opensearch.index.IndexSettings;
+import org.opensearch.index.analysis.AbstractIndexAnalyzerProvider;
+
+
+public class HebrewSemiExactAnalyzerProvider extends AbstractIndexAnalyzerProvider {
+
+ /* Constructor. Nothing special here. */
+ public HebrewSemiExactAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ super(indexSettings, name, settings);
+ analyzer = new HebrewSemiExactAnalyzer();
+ }
+
+ /* This function needs to be overridden to return an instance of PlusSignAnalyzer. */
+ public HebrewSemiExactAnalyzer get() {
+ return this.analyzer;
+ }
+
+ /* Instance of PlusSignAnalyzer class that is returned by this class. */
+ protected HebrewSemiExactAnalyzer analyzer;
+
+ /* Name to associate with this class. We will use this in PlusSignBinderProcessor. */
+ public static final String NAME = "hebrew_semi_exact_analyzer";
+}
diff --git a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java
similarity index 70%
rename from Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java
rename to opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java
index 0ce30ce..517723f 100644
--- a/Hebrew-ElasticSearch-ngrams-3-words/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java
+++ b/opensearch-hebrew-analyser/src/main/java/com/hotstar/hebrew/plugin/HebrewTokenizerTokenizerFactory.java
@@ -1,11 +1,11 @@
package com.hotstar.hebrew.plugin;
import org.apache.lucene.analysis.Tokenizer;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
import com.hotstar.hebrew.analysis.SefariaTokenizer;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.env.Environment;
+import org.opensearch.index.IndexSettings;
+import org.opensearch.index.analysis.AbstractTokenizerFactory;
public class HebrewTokenizerTokenizerFactory extends AbstractTokenizerFactory {