diff --git a/package.json b/package.json index 10ede3a37..758616eb0 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "@azure/storage-blob": "12.29.1", "@exlinc/keycloak-passport": "1.0.2", "@joplin/turndown-plugin-gfm": "1.0.45", + "@opensearch-project/opensearch": "3.5.1", "@root/csr": "0.8.1", "@root/keypairs": "0.10.3", "@root/pem": "1.0.4", diff --git a/server/modules/search/opensearch/definition.yml b/server/modules/search/opensearch/definition.yml new file mode 100644 index 000000000..d5ceb7699 --- /dev/null +++ b/server/modules/search/opensearch/definition.yml @@ -0,0 +1,57 @@ +key: opensearch +title: OpenSearch +description: OpenSearch is a community-driven, Apache 2.0-licensed open source search and analytics suite that makes it easy to ingest, search, visualize, and analyze data. +author: Metaways Infosystems +logo: https://opensearch.org/wp-content/uploads/2024/11/favicon.webp +website: https://opensearch.org/ +isAvailable: true +props: + apiVersion: + type: String + title: OpenSearch Version + hint: Should match the version of the OpenSearch nodes you are connecting to + order: 1 + enum: + - '2.x' + - '2.x' + - '1.x' + default: '2.x' + hosts: + type: String + title: Host(s) + hint: Comma-separated list of OpenSearch hosts to connect to, including the port, username and password if necessary. (e.g. http://localhost:9200, https://user:pass@es1.example.com:9200) + order: 2 + verifyTLSCertificate: + title: Verify TLS Certificate + type: Boolean + default: true + order: 3 + tlsCertPath: + title: TLS Certificate Path + type: String + hint: Absolute path to the TLS certificate on the server. + order: 4 + indexName: + type: String + title: Index Name + hint: The index name to use during creation + default: wiki + order: 5 + analyzer: + type: String + title: Analyzer + hint: 'The token analyzer in OpenSearch' + default: simple + order: 6 + sniffOnStart: + type: Boolean + title: Sniff on start + hint: 'Should Wiki.js attempt to detect the rest of the cluster on first connect? (Default: off)' + default: false + order: 7 + sniffInterval: + type: Number + title: Sniff Interval + hint: '0 = disabled, Interval in seconds to check for updated list of nodes in cluster. (Default: 0)' + default: 0 + order: 8 diff --git a/server/modules/search/opensearch/engine.js b/server/modules/search/opensearch/engine.js new file mode 100644 index 000000000..6573de4d2 --- /dev/null +++ b/server/modules/search/opensearch/engine.js @@ -0,0 +1,345 @@ +const _ = require('lodash') +const fs = require('fs') +const { pipeline } = require('node:stream/promises') +const { Transform } = require('node:stream') +const ElasticSearch = require('../elasticsearch/engine') + +/* global WIKI */ + +module.exports = { + async activate() { + // not used + }, + async deactivate() { + // not used + }, + /** + * INIT + */ + async init() { + WIKI.logger.info(`(SEARCH/OPENSEARCH) Initializing...`) + + + switch (this.config.apiVersion) { + case '3.x': + case '2.x': + case '1.x': + var { Client } = require('@opensearch-project/opensearch'); + this.client = new Client({ + nodes: this.config.hosts.split(',').map(_.trim), + sniffOnStart: this.config.sniffOnStart, + sniffInterval: (this.config.sniffInterval > 0) ? this.config.sniffInterval : false, + ssl: getTlsOptions(this.config), + name: 'wiki-js' + }) + + break + default: + throw new Error('Unsupported version of OpenSearch! Update your settings in the Administration Area.') + } + + // -> Create Search Index + await this.createIndex() + + WIKI.logger.info(`(SEARCH/OPENSEARCH) Initialization completed.`) + }, + /** + * Create Index + */ + async createIndex() { + try { + const indexExists = await this.client.indices.exists({ index: this.config.indexName }) + + if (!indexExists.body) { + WIKI.logger.info(`(SEARCH/OPENSEARCH) Creating index...`) + try { + await this.client.indices.create({ + index: this.config.indexName, + body: { + mappings: { + properties: { + suggest: { type: 'completion' }, + title: { type: 'text', boost: 10.0 }, + description: { type: 'text', boost: 3.0 }, + content: { type: 'text', boost: 1.0 }, + locale: { type: 'keyword' }, + path: { type: 'text' }, + tags: { type: 'text', boost: 8.0 } + } + }, + settings: { + analysis: { + analyzer: { + default: { + type: this.config.analyzer + } + } + } + } + } + }) + } catch (err) { + WIKI.logger.error(`(SEARCH/OPENSEARCH) Create Index Error: `, _.get(err, 'meta.body.error', err)) + } + } + } catch (err) { + WIKI.logger.error(`(SEARCH/OPENSEARCH) Index Check Error: `, _.get(err, 'meta.body.error', err)) + } + }, + /** + * QUERY + * + * @param {String} q Query + * @param {Object} opts Additional options + */ + async query(q, opts) { + try { + const results = await this.client.search({ + index: this.config.indexName, + body: { + query: { + simple_query_string: { + query: `*${q}*`, + fields: ['title^20', 'description^3', 'tags^8', 'content^1'], + default_operator: 'and', + analyze_wildcard: true + } + }, + from: 0, + size: 50, + _source: ['title', 'description', 'path', 'locale'], + suggest: { + suggestions: { + text: q, + completion: { + field: 'suggest', + size: 5, + skip_duplicates: true, + fuzzy: true + } + } + } + } + }) + return { + results: _.get(results, 'body.hits.hits', []).map(r => ({ + id: r._id, + locale: r._source.locale, + path: r._source.path, + title: r._source.title, + description: r._source.description + })), + suggestions: _.reject(_.get(results, 'suggest.suggestions', []).map(s => _.get(s, 'options[0].text', false)), s => !s), + totalHits: _.get(results, 'body.hits.total.value', _.get(results, 'body.hits.total', 0)) + } + } catch (err) { + WIKI.logger.warn('Search Engine Error: ', _.get(err, 'meta.body.error', err)) + } + }, + + /** + * Build tags field + * @param id + * @returns {Promise<*|*[]>} + */ + async buildTags(id) { + return ElasticSearch.buildTags(id) + }, + /** + * Build suggest field + */ + buildSuggest(page) { + return ElasticSearch.buildSuggest(page) + }, + /** + * CREATE + * + * @param {Object} page Page to create + */ + async created(page) { + await this.client.index({ + index: this.config.indexName, + id: page.hash, + body: { + suggest: this.buildSuggest(page), + locale: page.localeCode, + path: page.path, + title: page.title, + description: page.description, + content: page.safeContent, + tags: await this.buildTags(page.id) + }, + refresh: true + }) + }, + /** + * UPDATE + * + * @param {Object} page Page to update + */ + async updated(page) { + await this.client.index({ + index: this.config.indexName, + id: page.hash, + body: { + suggest: this.buildSuggest(page), + locale: page.localeCode, + path: page.path, + title: page.title, + description: page.description, + content: page.safeContent, + tags: await this.buildTags(page.id) + }, + refresh: true + }) + }, + /** + * DELETE + * + * @param {Object} page Page to delete + */ + async deleted(page) { + await this.client.delete({ + index: this.config.indexName, + id: page.hash, + refresh: true + }) + }, + /** + * RENAME + * + * @param {Object} page Page to rename + */ + async renamed(page) { + await this.client.delete({ + index: this.config.indexName, + id: page.hash, + refresh: true + }) + await this.client.index({ + index: this.config.indexName, + id: page.destinationHash, + body: { + suggest: this.buildSuggest(page), + locale: page.destinationLocaleCode, + path: page.destinationPath, + title: page.title, + description: page.description, + content: page.safeContent, + tags: await this.buildTags(page.id) + }, + refresh: true + }) + }, + /** + * REBUILD INDEX + */ + async rebuild() { + WIKI.logger.info(`(SEARCH/OPENSEARCH) Rebuilding Index...`) + await this.client.indices.delete({ index: this.config.indexName }) + await this.createIndex() + + const MAX_INDEXING_BYTES = 10 * Math.pow(2, 20) - Buffer.from('[').byteLength - Buffer.from(']').byteLength // 10 MB + const MAX_INDEXING_COUNT = 1000 + const COMMA_BYTES = Buffer.from(',').byteLength + + let chunks = [] + let bytes = 0 + + const processDocument = async (cb, doc) => { + try { + if (doc) { + const docBytes = Buffer.from(JSON.stringify(doc)).byteLength + + doc['tags'] = await this.buildTags(doc.realId) + // -> Current batch exceeds size limit, flush + if (docBytes + COMMA_BYTES + bytes >= MAX_INDEXING_BYTES) { + await flushBuffer() + } + + if (chunks.length > 0) { + bytes += COMMA_BYTES + } + bytes += docBytes + chunks.push(doc) + + // -> Current batch exceeds count limit, flush + if (chunks.length >= MAX_INDEXING_COUNT) { + await flushBuffer() + } + } else { + // -> End of stream, flush + await flushBuffer() + } + cb() + } catch (err) { + cb(err) + } + } + + const flushBuffer = async () => { + WIKI.logger.info(`(SEARCH/OPENSEARCH) Sending batch of ${chunks.length}...`) + try { + await this.client.bulk({ + index: this.config.indexName, + body: _.reduce(chunks, (result, doc) => { + result.push({ + index: { + _index: this.config.indexName, + _id: doc.id, + } + }) + doc.safeContent = WIKI.models.pages.cleanHTML(doc.render) + result.push({ + suggest: this.buildSuggest(doc), + tags: doc.tags, + locale: doc.locale, + path: doc.path, + title: doc.title, + description: doc.description, + content: doc.safeContent + }) + return result + }, []), + refresh: true + }) + } catch (err) { + WIKI.logger.warn('(SEARCH/OPENSEARCH) Failed to send batch to OpenSearch: ', err) + } + chunks.length = 0 + bytes = 0 + } + + // Added real id in order to fetch page tags from the query + await pipeline( + WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render', { realId: 'id' }).select().from('pages').where({ + isPublished: true, + isPrivate: false + }).stream(), + new Transform({ + objectMode: true, + transform: async (chunk, enc, cb) => processDocument(cb, chunk), + flush: async (cb) => processDocument(cb) + }) + ) + WIKI.logger.info(`(SEARCH/OPENSEARCH) Index rebuilt successfully.`) + } +} + +function getTlsOptions(conf) { + if (!conf.tlsCertPath) { + return { + rejectUnauthorized: conf.verifyTLSCertificate + } + } + + const caList = [] + if (conf.verifyTLSCertificate) { + caList.push(fs.readFileSync(conf.tlsCertPath)) + } + + return { + rejectUnauthorized: conf.verifyTLSCertificate, + ca: caList + } +} diff --git a/yarn.lock b/yarn.lock index 8414feb3d..743289e77 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3417,6 +3417,18 @@ dependencies: mkdirp "^1.0.4" +"@opensearch-project/opensearch@3.5.1": + version "3.5.1" + resolved "https://registry.yarnpkg.com/@opensearch-project/opensearch/-/opensearch-3.5.1.tgz#bec6e20c45d2ae57a3c6a167d3f508748f86ff69" + integrity sha512-6bf+HcuERzAtHZxrm6phjref54ABse39BpkDie/YO3AUFMCBrb3SK5okKSdT5n3+nDRuEEQLhQCl0RQV3s1qpA== + dependencies: + aws4 "^1.11.0" + debug "^4.3.1" + hpagent "^1.2.0" + json11 "^2.0.0" + ms "^2.1.3" + secure-json-parse "^2.4.0" + "@opentelemetry/api@1.x": version "1.9.0" resolved "https://registry.yarnpkg.com/@opentelemetry/api/-/api-1.9.0.tgz#d03eba68273dc0f7509e2a3d5cba21eae10379fe" @@ -5467,6 +5479,11 @@ aws-ssl-profiles@^1.1.1: resolved "https://registry.yarnpkg.com/aws-ssl-profiles/-/aws-ssl-profiles-1.1.2.tgz#157dd77e9f19b1d123678e93f120e6f193022641" integrity sha512-NZKeq9AfyQvEeNlN0zSYAaWrmBffJh3IELMZfRpJVWgrpEbtEpnjvzqBPf+mxoI287JohRDoa+/nsfqqiZmF6g== +aws4@^1.11.0: + version "1.13.2" + resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.13.2.tgz#0aa167216965ac9474ccfa83892cfb6b3e1e52ef" + integrity sha512-lHe62zvbTB5eEABUVi/AwVh0ZKY9rMMDhmm+eeyuuUQbQ3+J+fONVQOZyj+DdrvD4BY33uYniyRJ4UJIaSKAfw== + aws4@^1.8.0: version "1.8.0" resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.8.0.tgz#f0e003d9ca9e7f59c7a508945d7b2ef9a04a542f" @@ -8387,6 +8404,13 @@ debug@^4.1.1: dependencies: ms "2.1.2" +debug@^4.3.1, debug@^4.4.0: + version "4.4.3" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.4.3.tgz#c6ae432d9bd9662582fce08709b038c58e9e3d6a" + integrity sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA== + dependencies: + ms "^2.1.3" + debug@^4.3.2, debug@^4.3.3, debug@^4.3.4: version "4.3.4" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" @@ -8394,13 +8418,6 @@ debug@^4.3.2, debug@^4.3.3, debug@^4.3.4: dependencies: ms "2.1.2" -debug@^4.4.0: - version "4.4.3" - resolved "https://registry.yarnpkg.com/debug/-/debug-4.4.3.tgz#c6ae432d9bd9662582fce08709b038c58e9e3d6a" - integrity sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA== - dependencies: - ms "^2.1.3" - decamelize@^1.1.1, decamelize@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290" @@ -11080,7 +11097,7 @@ hosted-git-info@^2.1.4: resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.4.tgz#44119abaf4bc64692a16ace34700fed9c03e2546" integrity sha512-pzXIvANXEFrc5oFFXRMkbLPQ2rXRoDERwDLyrcUxGhaZhgP54BBSl9Oheh7Vv0T090cszWBxPjkQQ5Sq1PbBRQ== -hpagent@^1.0.0: +hpagent@^1.0.0, hpagent@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/hpagent/-/hpagent-1.2.0.tgz#0ae417895430eb3770c03443456b8d90ca464903" integrity sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA== @@ -12687,6 +12704,11 @@ json-stringify-safe@~5.0.1: resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb" integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus= +json11@^2.0.0: + version "2.0.2" + resolved "https://registry.yarnpkg.com/json11/-/json11-2.0.2.tgz#77a6070437e36e269a0d3432593ce008553cdc72" + integrity sha512-HIrd50UPYmP6sqLuLbFVm75g16o0oZrVfxrsY0EEys22klz8mRoWlX9KAEDOSOR9Q34rcxsyC8oDveGrCz5uLQ== + json5@2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/json5/-/json5-2.0.0.tgz#b61abf97aa178c4b5853a66cc8eecafd03045d78"