From 124bcf007877e96a6cd94850202a4507dd5d471d Mon Sep 17 00:00:00 2001 From: Damien Guillaume Date: Wed, 17 Dec 2025 14:46:41 -0500 Subject: [PATCH 1/5] Support queries where part of the query is using ExactSettings --- .../Backend/Solr/QueryBuilder.php | 60 ++++++++++++++++ .../Backend/Solr/QueryBuilderTest.php | 68 +++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 38eb0732141..2b81bb2d015 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -133,6 +133,8 @@ public function __construct( */ public function build(AbstractQuery $query, ?ParamBag $params = null) { + $query = $this->possiblyConvertMixedExactQueryIntoAdvanced($query); + $newParams = new ParamBag(); // Add spelling query if applicable -- note that we must set this up before @@ -210,6 +212,64 @@ public function build(AbstractQuery $query, ?ParamBag $params = null) return $newParams; } + /** + * Converts a simple query (Query) into an advanced one (QueryGroup) if part of it should be an exact query. + * This only supports a single exact query (surrounded with quotes) combined with a non-exact query. + * Logical operators can be used, but not parenthesis or field names. + * The original query is returned for any non-supported case. + * + * @param QueryGroup|Query $query User query + * + * @return QueryGroup|Query + */ + protected function possiblyConvertMixedExactQueryIntoAdvanced($query) + { + if ($query instanceof QueryGroup) { + return $query; + } + $handler = $query->getHandler(); + if ($handler && !isset($this->exactSpecs[strtolower($handler)])) { + return $query; + } + $queryString = trim($query->getString()); + if (!preg_match('/^([^":()+]*)"([^"]+)"([^":()]*)$/u', $queryString, $parts)) { + return $query; + } + $groupOperator = 'AND'; + $negateQuotedPart = false; + $before = trim($parts[1]); + if (preg_match('/^(.*)\s*(NOT|-)$/u', $before, $notParts)) { + $before = $notParts[1]; + $negateQuotedPart = true; + } + if (preg_match('/^(.*)\s*(AND|OR)$/u', $before, $beforeParts)) { + $before = $beforeParts[1]; + $groupOperator = $beforeParts[2]; + } + $quoted = '"' . $parts[2] . '"'; + $after = trim($parts[3]); + if (preg_match('/^(AND|OR)\s*(.*)$/u', $after, $afterParts)) { + $groupOperator = $afterParts[1]; + $after = $afterParts[2]; + } + if (($before == '' && $after == '') || ($before != '' && $after != '')) { + return $query; + } + $subQueries = []; + if ($before != '') { + $subQueries[] = new Query($before, $handler); + } + if ($negateQuotedPart) { + $subQueries[] = new QueryGroup('NOT', [ new Query($quoted, $handler) ]); + } else { + $subQueries[] = new Query($quoted, $handler); + } + if ($after != '') { + $subQueries[] = new Query($after, $handler); + } + return new QueryGroup($groupOperator, $subQueries); + } + /** * Check if the conditions match for an extra parameter * diff --git a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php index 2397c1c4473..bc53110b42f 100644 --- a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php +++ b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php @@ -295,6 +295,74 @@ public function testExactQueryHandler() $this->assertEquals('c d', $qf[0]); } + /** + * Test queries with mixed exact and non-exact parts. + * + * @return void + */ + public function testMixedExactQueryHandler() + { + // Check QueryBuilder without ExactSettings + $qb = new QueryBuilder( + [ + 'TestHandler' => [ + 'DismaxFields' => ['a'], + 'DismaxHandler' => 'edismax', + ], + ] + ); + $q = new Query('"t1" AND t2', 'TestHandler'); + $response = $qb->build($q); + $queryString = $response->get('q')[0]; + $this->assertEquals('"t1" AND t2', $queryString); + + // Expected inputs and outputs with ExactSettings: + $tests = [ + ['"t1"', '"t1"'], // simple exact queries are not affected + ['("t1" OR t2) AND t3', '("t1" OR t2) AND t3'], // queries with parenthesis are not supported + ['"t1" AND title:t2', '"t1" AND title:t2'], // queries with field are not supported + ['"t1" AND "t2"', '"t1" AND "t2"'], // queries with multiple exact parts are not supported + ['t1 AND "t2" AND t3', 't1 AND "t2" AND t3'], // queries with an exact part in the middle are not supported + ['"t1" t2', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") AND ' . + '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'], + ['"t1" AND t2', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") AND ' . + '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'], + ['"t1" OR t2', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") OR ' . + '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'], + ['t1 AND "t2"', '((_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t1") AND ' . + '(_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t2\""))'], + ['NOT "t1" AND t2', '((*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\""))) AND ' . + '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'], + ['t1 AND NOT "t2"', '((_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t1 AND") AND ' . + '(*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t2\""))))'], + ['-"t1" t2', '((*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\""))) AND ' . + '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'], + ['"t1" AND t2 AND t3', '((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\"") AND ' . + '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2 AND t3"))'], // would be different with dismax + ]; + + $qb = new QueryBuilder( + [ + 'TestHandler' => [ + 'DismaxFields' => ['a'], + 'DismaxHandler' => 'edismax', + 'ExactSettings' => [ + 'DismaxFields' => ['b'], + 'DismaxHandler' => 'edismax', + ], + ], + ] + ); + + foreach ($tests as $test) { + [$input, $output] = $test; + $q = new Query($input, 'TestHandler'); + $response = $qb->build($q); + $queryString = $response->get('q')[0]; + $this->assertEquals($output, $queryString); + } + } + /** * Test generation with a query handler with a filter set and DisMax settings * From cd87e579dbf31ab4be9e6f486b5c03c2a5dce71e Mon Sep 17 00:00:00 2001 From: damien-git Date: Thu, 18 Dec 2025 10:17:45 -0500 Subject: [PATCH 2/5] spelling Co-authored-by: Demian Katz --- .../VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 2b81bb2d015..15908f99e50 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -215,7 +215,7 @@ public function build(AbstractQuery $query, ?ParamBag $params = null) /** * Converts a simple query (Query) into an advanced one (QueryGroup) if part of it should be an exact query. * This only supports a single exact query (surrounded with quotes) combined with a non-exact query. - * Logical operators can be used, but not parenthesis or field names. + * Logical operators can be used, but not parentheses or field names. * The original query is returned for any non-supported case. * * @param QueryGroup|Query $query User query From c7cf05409d9880981962046b2e46f057c8ff9fbb Mon Sep 17 00:00:00 2001 From: Damien Date: Thu, 18 Dec 2025 10:34:09 -0500 Subject: [PATCH 3/5] Better handling of query types --- .../src/VuFindSearch/Backend/Solr/QueryBuilder.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 15908f99e50..f570ee7d094 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -218,13 +218,13 @@ public function build(AbstractQuery $query, ?ParamBag $params = null) * Logical operators can be used, but not parentheses or field names. * The original query is returned for any non-supported case. * - * @param QueryGroup|Query $query User query + * @param QueryInterface $query User query * - * @return QueryGroup|Query + * @return QueryInterface */ protected function possiblyConvertMixedExactQueryIntoAdvanced($query) { - if ($query instanceof QueryGroup) { + if (! $query instanceof Query) { return $query; } $handler = $query->getHandler(); From 8b65cc3eb83553c7dfa93245de9f37e78642e7ac Mon Sep 17 00:00:00 2001 From: Damien Date: Thu, 18 Dec 2025 10:38:06 -0500 Subject: [PATCH 4/5] Better handling of query types (2) --- .../src/VuFindSearch/Backend/Solr/QueryBuilder.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index f570ee7d094..287bc820342 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -218,13 +218,13 @@ public function build(AbstractQuery $query, ?ParamBag $params = null) * Logical operators can be used, but not parentheses or field names. * The original query is returned for any non-supported case. * - * @param QueryInterface $query User query + * @param AbstractQuery $query User query * - * @return QueryInterface + * @return AbstractQuery */ - protected function possiblyConvertMixedExactQueryIntoAdvanced($query) + protected function possiblyConvertMixedExactQueryIntoAdvanced(AbstractQuery $query): AbstractQuery { - if (! $query instanceof Query) { + if (!($query instanceof Query)) { return $query; } $handler = $query->getHandler(); From 1b5513f5a0b5003a6b90e8af86a976d073c7ec2f Mon Sep 17 00:00:00 2001 From: Damien Date: Thu, 18 Dec 2025 11:15:17 -0500 Subject: [PATCH 5/5] Improved regexps --- .../src/VuFindSearch/Backend/Solr/QueryBuilder.php | 6 +++--- .../src/VuFindTest/Backend/Solr/QueryBuilderTest.php | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 287bc820342..5af17303677 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -238,11 +238,11 @@ protected function possiblyConvertMixedExactQueryIntoAdvanced(AbstractQuery $que $groupOperator = 'AND'; $negateQuotedPart = false; $before = trim($parts[1]); - if (preg_match('/^(.*)\s*(NOT|-)$/u', $before, $notParts)) { - $before = $notParts[1]; + if (preg_match('/^(.+\s+)?(NOT|-)$/u', $before, $notParts)) { + $before = trim($notParts[1]); $negateQuotedPart = true; } - if (preg_match('/^(.*)\s*(AND|OR)$/u', $before, $beforeParts)) { + if (preg_match('/^(.*)\s+(AND|OR)$/u', $before, $beforeParts)) { $before = $beforeParts[1]; $groupOperator = $beforeParts[2]; } diff --git a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php index bc53110b42f..6b65187e96d 100644 --- a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php +++ b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php @@ -333,7 +333,7 @@ public function testMixedExactQueryHandler() '(_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t2\""))'], ['NOT "t1" AND t2', '((*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\""))) AND ' . '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'], - ['t1 AND NOT "t2"', '((_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t1 AND") AND ' . + ['t1 AND NOT "t2"', '((_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t1") AND ' . '(*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t2\""))))'], ['-"t1" t2', '((*:* NOT ((_query_:"{!edismax qf=\"b\" mm=\\\'0%\\\'}\"t1\""))) AND ' . '(_query_:"{!edismax qf=\"a\" mm=\\\'0%\\\'}t2"))'],