From b054b665d67c8c6386d6959a378b5a5851f31b05 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 17 May 2024 09:17:59 +0900 Subject: [PATCH 1/4] update support matrix --- .github/workflows/build.yml | 2 +- gradle.properties | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 34e0246..e5a46a1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -38,7 +38,7 @@ jobs: - 'os:2.7.0' - 'os:2.6.0' env: - mainJob: ${{ matrix.es-version == 'es:8.8.1' }} + mainJob: ${{ matrix.es-version == 'es:8.13.4' }} githubRef: ${{ github.ref }} continue-on-error: true steps: diff --git a/gradle.properties b/gradle.properties index d8aeaf4..060f891 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,8 @@ -# elasticsearch versions: 8.8.1, 8.6.2, 8.5.3, 8.4.3, 8.2.3, 7.17.9, 7.14.2, 7.10.2, 7.8.1, 7.4.2 -# opensearch version: 2.6.0, 2.8.0, 2.9.0, 2.10.0, 2.11.* -engineVersion=os:2.11.0 +# elasticsearch versions: 8.13.4, 8.12.2, 8.11.4, 8.10.4, 8.9.2, 8.8.1, 8.6.2, +# 8.5.3, 8.4.3, 8.2.3, 7.17.21, 7.14.2, 7.10.2 +# opensearch version: 2.14.0, 2.13.0, 2.12.0, 2.11.1, 2.10.0, 2.9.0, 2.8.0, +# 2.7.0, 2.6.0 +engineVersion=es:8.13.4 org.gradle.jvmargs=-XX:MaxMetaspaceSize=350m \ --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ @@ -9,4 +11,4 @@ org.gradle.jvmargs=-XX:MaxMetaspaceSize=350m \ --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED org.gradle.caching=true org.gradle.parallel=true -pluginVersion=3.1.1-SNAPSHOT \ No newline at end of file +pluginVersion=3.1.1-SNAPSHOT From 7de47aa9c5a76ea060385e6efde11fd4b88fc608 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 17 May 2024 09:21:30 +0900 Subject: [PATCH 2/4] Split changelog from readme --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 48 +----------------------------------------------- 2 files changed, 48 insertions(+), 47 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..da41998 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,47 @@ +- version 3.1.0 + - support OpenSearch 2.6.0+ in addition to ElasticSearch + - analysis-sudachi plugin is now can be extended by other plugins. Loading sudachi plugins from extending plugins is supported as well +- version 3.0.0 + - Plugin is now implemented in Kotlin +- version 2.1.0 + - Added a new property `additional_settings` to write Sudachi settings directly in config + - Added support for specifying Elasticsearch version at build time +- version 2.0.3 + - Fix duplicated tokens for OOVs with `sudachi_split` filter's `extended mode` +- version 2.0.2 + - Upgrade Sudachi to 0.4.3 + - Fix overrun with surrogate pairs +- version 2.0.1 + - Upgrade Sudachi to 0.4.2 + - Fix buffer overrun with character normalization +- version 2.0.0 + - New mode `split_mode` was added + - New filter `sudachi_split` was added instead of `mode` + - `mode` was deperecated + - Upgrade Sudachi morphological analyzer to 0.4.1 + - Words containing periods are no longer split + - Fix a bug causing wrong offsets with `icu_normalizer` + +- version 1.3.2 + - Upgrade Sudachi morphological analyzer to 0.3.1 + +- version 1.3.1 + - Upgrade Sudachi morphological analyzer to 0.3.0 + - Minor bug fix + +- version 1.3.0 + - Upgrade Sudachi morphological analyzer to 0.2.0 + - Import Sudachi from maven central repository + - Minor bug fix + +- version 1.2.0 + - Upgrading Sudachi morphological analyzer to 0.2.0-SNAPSHOT + - New filter `sudachi_normalizedform` was added; see [sudachi_normalizedform](#sudachi_normalizedform) + - Default normalization behavior was changed; neather baseform filter and normalziedform filter not applied + - `sudachi_readingform` filter was changed with new romaji mappings based on MS-IME + +- version 1.1.0 + - `part-of-speech forward matching` is available on `stoptags`; see [sudachi_part_of_speech](#sudachi_part_of_speech) + +- version 1.0.0 + - first release diff --git a/README.md b/README.md index 1fc594f..c1cca1e 100644 --- a/README.md +++ b/README.md @@ -7,53 +7,7 @@ analysis-sudachi is an Elasticsearch plugin for tokenization of Japanese text us # What's new? -- version 3.1.0 - - support OpenSearch 2.6.0+ in addition to ElasticSearch - - analysis-sudachi plugin is now can be extended by other plugins. Loading sudachi plugins from extending plugins is supported as well -- version 3.0.0 - - Plugin is now implemented in Kotlin -- version 2.1.0 - - Added a new property `additional_settings` to write Sudachi settings directly in config - - Added support for specifying Elasticsearch version at build time -- version 2.0.3 - - Fix duplicated tokens for OOVs with `sudachi_split` filter's `extended mode` -- version 2.0.2 - - Upgrade Sudachi to 0.4.3 - - Fix overrun with surrogate pairs -- version 2.0.1 - - Upgrade Sudachi to 0.4.2 - - Fix buffer overrun with character normalization -- version 2.0.0 - - New mode `split_mode` was added - - New filter `sudachi_split` was added instead of `mode` - - `mode` was deperecated - - Upgrade Sudachi morphological analyzer to 0.4.1 - - Words containing periods are no longer split - - Fix a bug causing wrong offsets with `icu_normalizer` - -- version 1.3.2 - - Upgrade Sudachi morphological analyzer to 0.3.1 - -- version 1.3.1 - - Upgrade Sudachi morphological analyzer to 0.3.0 - - Minor bug fix - -- version 1.3.0 - - Upgrade Sudachi morphological analyzer to 0.2.0 - - Import Sudachi from maven central repository - - Minor bug fix - -- version 1.2.0 - - Upgrading Sudachi morphological analyzer to 0.2.0-SNAPSHOT - - New filter `sudachi_normalizedform` was added; see [sudachi_normalizedform](#sudachi_normalizedform) - - Default normalization behavior was changed; neather baseform filter and normalziedform filter not applied - - `sudachi_readingform` filter was changed with new romaji mappings based on MS-IME - -- version 1.1.0 - - `part-of-speech forward matching` is available on `stoptags`; see [sudachi_part_of_speech](#sudachi_part_of_speech) - -- version 1.0.0 - - first release +Check [changelog](./CHANGELOG.md) for more. # Build (if necessary) From e2b482810b3605947c4b91c42f49ca1b40142f58 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 17 May 2024 09:34:18 +0900 Subject: [PATCH 3/4] format changelog, add 3.1.1, update readme --- CHANGELOG.md | 126 ++++++++++++++++++++++++++++++++------------------- README.md | 15 ++++-- 2 files changed, 90 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da41998..9a481c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,47 +1,79 @@ -- version 3.1.0 - - support OpenSearch 2.6.0+ in addition to ElasticSearch - - analysis-sudachi plugin is now can be extended by other plugins. Loading sudachi plugins from extending plugins is supported as well -- version 3.0.0 - - Plugin is now implemented in Kotlin -- version 2.1.0 - - Added a new property `additional_settings` to write Sudachi settings directly in config - - Added support for specifying Elasticsearch version at build time -- version 2.0.3 - - Fix duplicated tokens for OOVs with `sudachi_split` filter's `extended mode` -- version 2.0.2 - - Upgrade Sudachi to 0.4.3 - - Fix overrun with surrogate pairs -- version 2.0.1 - - Upgrade Sudachi to 0.4.2 - - Fix buffer overrun with character normalization -- version 2.0.0 - - New mode `split_mode` was added - - New filter `sudachi_split` was added instead of `mode` - - `mode` was deperecated - - Upgrade Sudachi morphological analyzer to 0.4.1 - - Words containing periods are no longer split - - Fix a bug causing wrong offsets with `icu_normalizer` - -- version 1.3.2 - - Upgrade Sudachi morphological analyzer to 0.3.1 - -- version 1.3.1 - - Upgrade Sudachi morphological analyzer to 0.3.0 - - Minor bug fix - -- version 1.3.0 - - Upgrade Sudachi morphological analyzer to 0.2.0 - - Import Sudachi from maven central repository - - Minor bug fix - -- version 1.2.0 - - Upgrading Sudachi morphological analyzer to 0.2.0-SNAPSHOT - - New filter `sudachi_normalizedform` was added; see [sudachi_normalizedform](#sudachi_normalizedform) - - Default normalization behavior was changed; neather baseform filter and normalziedform filter not applied - - `sudachi_readingform` filter was changed with new romaji mappings based on MS-IME - -- version 1.1.0 - - `part-of-speech forward matching` is available on `stoptags`; see [sudachi_part_of_speech](#sudachi_part_of_speech) - -- version 1.0.0 - - first release +# Change log + +## [3.1.1] - 2024-05-17 + +### Added + +- Support ElasticSearch -8.13.4 and OpenSearch -2.14.0. (#114, #118) + - Integration tests (`:integration`) for es:8.9.0+ are moved to Github Actions. + +### Fixed + +- Fix dictionary caching problem (#112) + +## [3.1.0] + +- support OpenSearch 2.6.0+ in addition to ElasticSearch +- analysis-sudachi plugin is now can be extended by other plugins. Loading sudachi plugins from extending plugins is supported as well + +## [3.0.0] + +- Plugin is now implemented in Kotlin + +## [2.1.0] + +- Added a new property `additional_settings` to write Sudachi settings directly in config +- Added support for specifying Elasticsearch version at build time + +## [2.0.3] + +- Fix duplicated tokens for OOVs with `sudachi_split` filter's `extended mode` + +## [2.0.2] + +- Upgrade Sudachi to 0.4.3 + - Fix overrun with surrogate pairs + +## [2.0.1] + +- Upgrade Sudachi to 0.4.2 + - Fix buffer overrun with character normalization + +## [2.0.0] + +- New mode `split_mode` was added +- New filter `sudachi_split` was added instead of `mode` +- `mode` was deperecated +- Upgrade Sudachi morphological analyzer to 0.4.1 +- Words containing periods are no longer split +- Fix a bug causing wrong offsets with `icu_normalizer` + +## [1.3.2] + +- Upgrade Sudachi morphological analyzer to 0.3.1 + +## [1.3.1] + +- Upgrade Sudachi morphological analyzer to 0.3.0 +- Minor bug fix + +## [1.3.0] + +- Upgrade Sudachi morphological analyzer to 0.2.0 +- Import Sudachi from maven central repository +- Minor bug fix + +## [1.2.0] + +- Upgrading Sudachi morphological analyzer to 0.2.0-SNAPSHOT +- New filter `sudachi_normalizedform` was added; see [sudachi_normalizedform](#sudachi_normalizedform) +- Default normalization behavior was changed; neather baseform filter and normalziedform filter not applied +- `sudachi_readingform` filter was changed with new romaji mappings based on MS-IME + +## [1.1.0] + +- `part-of-speech forward matching` is available on `stoptags`; see [sudachi_part_of_speech](#sudachi_part_of_speech) + +## [1.0.0] + +- first release diff --git a/README.md b/README.md index c1cca1e..7ada288 100644 --- a/README.md +++ b/README.md @@ -7,26 +7,33 @@ analysis-sudachi is an Elasticsearch plugin for tokenization of Japanese text us # What's new? +- [3.1.1] + - Support ElasticSearch -8.13.4 and OpenSearch -2.14.0. (#114, #118) + Check [changelog](./CHANGELOG.md) for more. # Build (if necessary) 1. Build analysis-sudachi. ``` - $ ./gradlew -PengineVersion=es:8.6.2 build + $ ./gradlew -PengineVersion=es:8.13.4 build ``` -Use `-PengineVersion=os:2.6.0` for OpenSearch. +Use `-PengineVersion=os:2.14.0` for OpenSearch. ## Supported ElasticSearch versions -1. 8.0.* until 8.6.* supported, integration tests in CI +1. 8.0.* until 8.13.* supported, integration tests in CI 2. 7.17.* (latest patch version) - supported, integration tests in CI 3. 7.11.* until 7.16.* - best effort support, not tested in CI 4. 7.10.* integration tests for the latest patch version 5. 7.9.* and below - not tested in CI at all, may be broken 6. 7.3.* and below - broken, not supported +## Supported OpenSearch versions + +1. 2.6.* until 2.14.* supported, integration tests in CI + # Installation 1. Move current dir to $ES_HOME @@ -545,6 +552,6 @@ Returns `susi`. # License -Copyright (c) 2017-2020 Works Applications Co., Ltd. +Copyright (c) 2017-2024 Works Applications Co., Ltd. Originally under elasticsearch, https://www.elastic.co/jp/products/elasticsearch Originally under lucene, https://lucene.apache.org/ From 2557e452c90ec7bfc8b9af99bf9526914230d9b3 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 17 May 2024 09:52:12 +0900 Subject: [PATCH 4/4] bump version -> 3.1.1 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 060f891..0283197 100644 --- a/gradle.properties +++ b/gradle.properties @@ -11,4 +11,4 @@ org.gradle.jvmargs=-XX:MaxMetaspaceSize=350m \ --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED org.gradle.caching=true org.gradle.parallel=true -pluginVersion=3.1.1-SNAPSHOT +pluginVersion=3.1.1