diff --git a/config-model/src/main/java/com/yahoo/schema/document/Attribute.java b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java index d44e639a62d..f5a1582373a 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Attribute.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java @@ -41,7 +41,7 @@ */ public final class Attribute implements Cloneable, Serializable { - public enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR, DOTPRODUCT } + public enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR, DOTPRODUCT, TURBOQUANT } // Remember to change hashCode and equals when you add new fields diff --git a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java index 095b3c84213..73edd77df65 100644 --- a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java @@ -348,6 +348,7 @@ void distance_metric_is_propagated_to_attributes_config() throws ParseException // TODO Vespa 9: Remove 'innerproduct' as alias for 'prenormalized-angular'. assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.INNERPRODUCT, "innerproduct"); assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.PRENORMALIZED_ANGULAR, "prenormalized-angular"); + assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.TURBOQUANT, "turboquant"); } private void assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.Enum expDistanceMetric, diff --git a/configdefinitions/src/vespa/attributes.def b/configdefinitions/src/vespa/attributes.def index db5e185099e..e82e6a5e1f5 100644 --- a/configdefinitions/src/vespa/attributes.def +++ b/configdefinitions/src/vespa/attributes.def @@ -37,7 +37,7 @@ attribute[].maxuncommittedmemory long default=130000 # The distance metric to use for nearest neighbor search. # Is only used when the attribute is a 1-dimensional indexed tensor. -attribute[].distancemetric enum { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR, DOTPRODUCT } default=EUCLIDEAN +attribute[].distancemetric enum { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR, DOTPRODUCT, TURBOQUANT } default=EUCLIDEAN # Configuration parameters for a hnsw index used together with a 1-dimensional indexed tensor for approximate nearest neighbor search. attribute[].index.hnsw.enabled bool default=false diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/schema/completion/provider/FixedKeywordBodies.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/schema/completion/provider/FixedKeywordBodies.java index eaad81ad4f0..b9863cc58bd 100644 --- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/schema/completion/provider/FixedKeywordBodies.java +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/schema/completion/provider/FixedKeywordBodies.java @@ -188,6 +188,8 @@ public CompletionItem getBodySnippet() { CompletionUtils.constructBasic("euclidean"), CompletionUtils.constructBasic("angular"), CompletionUtils.constructBasic("dotproduct"), + CompletionUtils.constructBasic("innerproduct"), + CompletionUtils.constructBasic("turboquant"), CompletionUtils.constructBasic("prenormalized-angular"), CompletionUtils.constructBasic("geodegrees"), CompletionUtils.constructBasic("hamming") diff --git a/integration/tmgrammar/grammar/vespa-schema.tmLanguage.json b/integration/tmgrammar/grammar/vespa-schema.tmLanguage.json index 1e25c726588..0ee7f377ae7 100644 --- a/integration/tmgrammar/grammar/vespa-schema.tmLanguage.json +++ b/integration/tmgrammar/grammar/vespa-schema.tmLanguage.json @@ -770,7 +770,7 @@ ] }, "enum-value-inline": { - "match": "(?(f.hnsw_index().distance_function_factory()).get_max_squared_norm_store(); + EXPECT_EQ(130.0, norm_store.get_max()); +} + template class NearestNeighborBlueprintFixtureBase : public ParentT { private: diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp index d0e26a45079..1f290ed83fe 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,9 @@ void benchmark(size_t iterations, size_t elems, const std::string & dist_functio if (dist_functions.find("mips") != npos) { benchmark(iterations, elems, MipsDistanceFunctionFactory()); } + if (dist_functions.find("turboquant") != npos) { + benchmark(iterations, elems, TurboQuantDistanceFunctionFactory()); + } } void @@ -108,7 +112,7 @@ int main(int argc, char *argv[]) { size_t num_iterations = 10000000; size_t num_elems = 1024; - std::string dist_functions = "angular euclid prenorm mips"; + std::string dist_functions = "angular euclid prenorm mips turboquant"; std::string data_types = "double float32 bfloat16 float8"; if (argc > 1) { num_iterations = atol(argv[1]); } if (argc > 2) { num_elems = atol(argv[2]); } diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index 5e3bffac30a..179a1e6d906 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -5,8 +5,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -764,6 +766,106 @@ TEST(DistanceFunctionsTest, dotproduct_can_reference_insertion_vector) expect_reference_insertion_vector(0.0, DistanceMetric::Dotproduct, CellType::BFLOAT16); } +TEST(DistanceFunctionsTest, turboquant_uses_dedicated_factory_type) +{ + auto dotproduct_factory = make_distance_function_factory(DistanceMetric::Dotproduct, CellType::FLOAT); + auto turboquant_factory = make_distance_function_factory(DistanceMetric::TurboQuant, CellType::FLOAT); + EXPECT_NE(nullptr, dynamic_cast*>(dotproduct_factory.get())); + EXPECT_NE(nullptr, dynamic_cast*>(turboquant_factory.get())); + EXPECT_EQ(nullptr, dynamic_cast*>(dotproduct_factory.get())); +} + +TEST(DistanceFunctionsTest, turboquant_can_reference_insertion_vector) +{ + auto check_ref = [](CellType cell_type) { + std::vector lhs{0.0, 1.0}; + std::vector rhs{0.0, 1.0}; + auto factory = make_distance_function_factory(DistanceMetric::TurboQuant, cell_type); + auto func = factory->for_insertion_vector(t(lhs)); + const double before = func->calc(t(rhs)); + lhs[0] = 1.0; + lhs[1] = 0.0; + const double after = func->calc(t(rhs)); + EXPECT_NE(before, after); + }; + check_ref(CellType::FLOAT); + check_ref(CellType::DOUBLE); +} + +TEST(DistanceFunctionsTest, turboquant_prefers_identical_vector_over_dissimilar_vector) +{ + std::vector query{1.0f, 2.0f, 3.0f, 4.0f}; + std::vector same{1.0f, 2.0f, 3.0f, 4.0f}; + std::vector dissimilar{-4.0f, 3.0f, -2.0f, 1.0f}; + + auto factory = make_distance_function_factory(DistanceMetric::TurboQuant, CellType::FLOAT); + auto df = factory->for_query_vector(t(query)); + const double same_distance = df->calc(t(same)); + const double dissimilar_distance = df->calc(t(dissimilar)); + + EXPECT_LT(same_distance, dissimilar_distance); + EXPECT_GT(df->to_rawscore(same_distance), df->to_rawscore(dissimilar_distance)); +} + +TEST(DistanceFunctionsTest, turboquant_topk_overlap_with_exact_dotproduct_is_reasonable) +{ + constexpr size_t dims = 128; + constexpr size_t docs = 200; + constexpr size_t k = 10; + + std::mt19937 rng(7); + std::uniform_real_distribution dist(-3.0f, 3.0f); + + std::vector query(dims); + for (auto& value : query) { + value = dist(rng); + } + std::vector> vectors; + vectors.reserve(docs); + for (size_t i = 0; i < docs; ++i) { + std::vector v(dims); + for (auto& value : v) { + value = dist(rng); + } + vectors.push_back(std::move(v)); + } + + auto turbo_factory = make_distance_function_factory(DistanceMetric::TurboQuant, CellType::FLOAT); + auto turbo_df = turbo_factory->for_query_vector(t(query)); + + auto exact_dot = [&query](const std::vector& v) { + double sum = 0.0; + for (size_t i = 0; i < query.size(); ++i) { + sum += static_cast(query[i]) * static_cast(v[i]); + } + return sum; + }; + + std::vector> exact; + std::vector> approx; + exact.reserve(docs); + approx.reserve(docs); + for (uint32_t i = 0; i < docs; ++i) { + const auto& v = vectors[i]; + exact.emplace_back(exact_dot(v), i); + approx.emplace_back(turbo_df->to_rawscore(turbo_df->calc(t(v))), i); + } + auto greater = [](const auto& lhs, const auto& rhs) { return lhs.first > rhs.first; }; + std::partial_sort(exact.begin(), exact.begin() + k, exact.end(), greater); + std::partial_sort(approx.begin(), approx.begin() + k, approx.end(), greater); + + size_t overlap = 0; + for (size_t i = 0; i < k; ++i) { + for (size_t j = 0; j < k; ++j) { + if (exact[i].second == approx[j].second) { + ++overlap; + break; + } + } + } + EXPECT_GE(overlap, 5u); +} + TEST(DistanceFunctionsTest, hamming_can_reference_insertion_vector) { expect_reference_insertion_vector(2.0, DistanceMetric::Hamming, CellType::FLOAT); diff --git a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h index e1433f2d948..ba96ebf8f62 100644 --- a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h +++ b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h @@ -6,6 +6,6 @@ namespace search::attribute { -enum class DistanceMetric : uint8_t { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, Dotproduct }; +enum class DistanceMetric : uint8_t { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, Dotproduct, TurboQuant }; } diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index ac0d94d80b5..0ccb9edf0ad 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -139,6 +139,9 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) case CfgDm::DOTPRODUCT: dm = DistanceMetric::Dotproduct; break; + case CfgDm::TURBOQUANT: + dm = DistanceMetric::TurboQuant; + break; } retval.set_distance_metric(dm); if (cfg.index.hnsw.enabled) { diff --git a/searchlib/src/vespa/searchlib/attribute/distance_metric_utils.cpp b/searchlib/src/vespa/searchlib/attribute/distance_metric_utils.cpp index b59e4314210..9cf26a7a9dc 100644 --- a/searchlib/src/vespa/searchlib/attribute/distance_metric_utils.cpp +++ b/searchlib/src/vespa/searchlib/attribute/distance_metric_utils.cpp @@ -14,6 +14,7 @@ const std::string innerproduct = "innerproduct"; const std::string prenormalized_angular = "prenormalized_angular"; const std::string dotproduct = "dotproduct"; const std::string hamming = "hamming"; +const std::string turboquant = "turboquant"; } @@ -28,6 +29,7 @@ DistanceMetricUtils::to_string(DistanceMetric metric) case DistanceMetric::Hamming: return hamming; case DistanceMetric::PrenormalizedAngular: return prenormalized_angular; case DistanceMetric::Dotproduct: return dotproduct; + case DistanceMetric::TurboQuant: return turboquant; } throw vespalib::IllegalArgumentException("Unknown distance metric " + std::to_string(static_cast(metric))); } @@ -49,6 +51,8 @@ DistanceMetricUtils::to_distance_metric(const std::string& metric) return DistanceMetric::Dotproduct; } else if (metric == hamming) { return DistanceMetric::Hamming; + } else if (metric == turboquant) { + return DistanceMetric::TurboQuant; } else { throw vespalib::IllegalStateException("Unknown distance metric '" + metric + "'"); } diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 219aa3ec068..5bdd48611e9 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(searchlib_tensor OBJECT SOURCES angular_distance.cpp mips_distance_transform.cpp + turbo_quant_distance.cpp bitvector_visited_tracker.cpp bound_distance_function.cpp default_nearest_neighbor_index_factory.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 9e4bd59e507..8c061cc508f 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -3,6 +3,7 @@ #include "distance_function_factory.h" #include "distance_functions.h" #include "mips_distance_transform.h" +#include "turbo_quant_distance.h" using search::attribute::DistanceMetric; using vespalib::eval::CellType; @@ -47,6 +48,14 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type) case CellType::FLOAT: return std::make_unique>(true); default: return std::make_unique>(); } + case DistanceMetric::TurboQuant: + switch (cell_type) { + case CellType::DOUBLE: return std::make_unique>(true); + case CellType::INT8: return std::make_unique>(true); + case CellType::BFLOAT16: return std::make_unique>(true); + case CellType::FLOAT: return std::make_unique>(true); + default: return std::make_unique>(); + } case DistanceMetric::GeoDegrees: return std::make_unique(); case DistanceMetric::Hamming: diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h index 3b0a0ac91fd..6448c4202dd 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h @@ -6,6 +6,8 @@ #include "bound_distance_function.h" #include +namespace vespalib { class GenericHeader; } + namespace search::tensor { /** @@ -19,6 +21,8 @@ struct DistanceFunctionFactory { virtual ~DistanceFunctionFactory() = default; virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const = 0; virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const = 0; + virtual void save_state(vespalib::GenericHeader&) const {} + virtual void load_state(const vespalib::GenericHeader&) {} using UP = std::unique_ptr; }; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 03080f0d9b0..e6c2fde0e08 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -6,7 +6,6 @@ #include "hnsw_index_explorer.h" #include "hnsw_index_loader.hpp" #include "hnsw_index_saver.h" -#include "mips_distance_transform.h" #include "random_level_generator.h" #include "vector_bundle.h" #include @@ -42,29 +41,6 @@ constexpr size_t max_level_array_size = 16; constexpr size_t max_link_array_size = 193; constexpr vespalib::duration MAX_COUNT_DURATION(1000ms); -const std::string hnsw_max_squared_norm = "hnsw.max_squared_norm"; - -void save_mips_max_distance(GenericHeader& header, DistanceFunctionFactory& dff) { - auto* mips_dff = dynamic_cast(&dff); - if (mips_dff != nullptr) { - auto& norm_store = mips_dff->get_max_squared_norm_store(); - header.putTag(GenericHeader::Tag(hnsw_max_squared_norm, norm_store.get_max())); - } -} - -void load_mips_max_distance(const GenericHeader& header, DistanceFunctionFactory& dff) { - auto* mips_dff = dynamic_cast(&dff); - if (mips_dff != nullptr) { - auto& norm_store = mips_dff->get_max_squared_norm_store(); - if (header.hasTag(hnsw_max_squared_norm)) { - auto& tag = header.getTag(hnsw_max_squared_norm); - if (tag.getType() == GenericHeader::Tag::Type::TYPE_FLOAT) { - (void) norm_store.get_max(tag.asFloat()); - } - } - } -} - bool has_link_to(std::span links, uint32_t id) { for (uint32_t link : links) { if (link == id) return true; @@ -1090,7 +1066,7 @@ template std::unique_ptr HnswIndex::make_saver(GenericHeader& header) const { - save_mips_max_distance(header, distance_function_factory()); + distance_function_factory().save_state(header); return std::make_unique>(_graph); } @@ -1099,7 +1075,7 @@ std::unique_ptr HnswIndex::make_loader(FastOS_FileInterface& file, const vespalib::GenericHeader& header) { assert(get_entry_nodeid() == 0); // cannot load after index has data - load_mips_max_distance(header, distance_function_factory()); + distance_function_factory().load_state(header); _graph.set_last_flush_duration(FileHeaderContext::get_flush_duration(header)); using ReaderType = FileReader; using LoaderType = HnswIndexLoader; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index ac2f813e8f8..760975caaec 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -11,6 +11,29 @@ namespace hwaccelerated = vespalib::hwaccelerated; namespace search::tensor { +namespace { + +const std::string hnsw_max_squared_norm = "hnsw.max_squared_norm"; + +} + +void +MipsDistanceFunctionFactoryBase::save_state(vespalib::GenericHeader& header) const +{ + header.putTag(vespalib::GenericHeader::Tag(hnsw_max_squared_norm, _sq_norm_store->get_max())); +} + +void +MipsDistanceFunctionFactoryBase::load_state(const vespalib::GenericHeader& header) +{ + if (header.hasTag(hnsw_max_squared_norm)) { + const auto& tag = header.getTag(hnsw_max_squared_norm); + if (tag.getType() == vespalib::GenericHeader::Tag::Type::TYPE_FLOAT) { + (void) _sq_norm_store->get_max(tag.asFloat()); + } + } +} + template class BoundMipsDistanceFunction final : public BoundDistanceFunction { using FloatType = VectorStoreType::FloatType; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h index 7b82661179f..f9d4ad8f26e 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h @@ -5,6 +5,7 @@ #include "distance_function.h" #include "distance_function_factory.h" #include +#include #include #include @@ -47,6 +48,8 @@ class MipsDistanceFunctionFactoryBase : public DistanceFunctionFactory { } ~MipsDistanceFunctionFactoryBase() override = default; MaximumSquaredNormStore& get_max_squared_norm_store() noexcept { return *_sq_norm_store; } + void save_state(vespalib::GenericHeader& header) const override; + void load_state(const vespalib::GenericHeader& header) override; }; /** diff --git a/searchlib/src/vespa/searchlib/tensor/turbo_quant_distance.cpp b/searchlib/src/vespa/searchlib/tensor/turbo_quant_distance.cpp new file mode 100644 index 00000000000..66e433cae27 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/turbo_quant_distance.cpp @@ -0,0 +1,225 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "turbo_quant_distance.h" +#include "temporary_vector_store.h" +#include +#include +#include +#include +#include +#include + +using vespalib::eval::Int8Float; +namespace hwaccelerated = vespalib::hwaccelerated; + +namespace search::tensor { + +namespace { + +const std::string hnsw_turbo_quant_version = "hnsw.turbo_quant.version"; +const std::string hnsw_turbo_quant_levels = "hnsw.turbo_quant.levels"; +constexpr uint32_t turbo_quant_state_version = 1; + +constexpr double k_qjl_scale = 1.2533141373155002512; // sqrt(pi/2) +constexpr double k_eps = 1e-12; + +inline int +residual_sign(double value) noexcept +{ + if (value > 0.0) { + return 1; + } + if (value < 0.0) { + return -1; + } + return 0; +} + +inline double +dequantize_normalized(double value, uint32_t levels) noexcept +{ + if (levels <= 4) { + // 2-bit base quantization centroids (TurboQuant-like low-bit stage). + if (value < -0.9815) return -1.510; + if (value < 0.0) return -0.453; + if (value < 0.9815) return 0.453; + return 1.510; + } + // 3-bit base quantization fallback using simple uniform bins. + if (value < -2.0) return -2.5; + if (value < -1.25) return -1.5; + if (value < -0.75) return -1.0; + if (value < -0.25) return -0.5; + if (value < 0.25) return 0.5; + if (value < 0.75) return 1.0; + if (value < 1.25) return 1.5; + return 2.5; +} + +template +double +vector_scale(std::span vector) noexcept +{ + if (vector.empty()) { + return 1.0; + } + double norm_sq = 0.0; + for (auto value : vector) { + double v = value; + norm_sq += (v * v); + } + if (norm_sq <= k_eps) { + return 1.0; + } + return std::sqrt(norm_sq / static_cast(vector.size())); +} + +} // namespace + +template +class BoundTurboQuantDistance final : public BoundDistanceFunction { + using FloatType = typename VectorStoreType::FloatType; + mutable VectorStoreType _tmp_space; + std::span _lhs; + MaximumSquaredNormStore* _sq_norm_store; + bool _update_norm_store; + uint32_t _mse_levels; + +public: + BoundTurboQuantDistance(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store, bool update_norm_store, uint32_t mse_levels) + : _tmp_space(lhs.size), + _lhs(_tmp_space.storeLhs(lhs)), + _sq_norm_store(&sq_norm_store), + _update_norm_store(update_norm_store), + _mse_levels(mse_levels) + { + const auto* data = _lhs.data(); + double lhs_sq_norm = hwaccelerated::dot_product(cast(data), cast(data), _lhs.size()); + if (_update_norm_store) { + (void) _sq_norm_store->get_max(lhs_sq_norm); + } else { + (void) _sq_norm_store->get_max(); + } + } + + double calc(TypedCells rhs) const noexcept override { + auto rhs_vec = _tmp_space.convertRhs(rhs); + const size_t sz = _lhs.size(); + if (sz == 0u) { + return 0.0; + } + + const double lhs_scale = vector_scale(_lhs); + const double rhs_scale = vector_scale(rhs_vec); + const double lhs_inv_scale = 1.0 / std::max(lhs_scale, k_eps); + const double rhs_inv_scale = 1.0 / std::max(rhs_scale, k_eps); + + double quantized_dot = 0.0; + double lhs_residual_norm_sq = 0.0; + double rhs_residual_norm_sq = 0.0; + int residual_sign_dot = 0; + + for (size_t i = 0; i < sz; ++i) { + const double lhs_val = _lhs[i]; + const double rhs_val = rhs_vec[i]; + + const double lhs_q = dequantize_normalized(lhs_val * lhs_inv_scale, _mse_levels) * lhs_scale; + const double rhs_q = dequantize_normalized(rhs_val * rhs_inv_scale, _mse_levels) * rhs_scale; + + quantized_dot += (lhs_q * rhs_q); + + const double lhs_residual = lhs_val - lhs_q; + const double rhs_residual = rhs_val - rhs_q; + lhs_residual_norm_sq += lhs_residual * lhs_residual; + rhs_residual_norm_sq += rhs_residual * rhs_residual; + residual_sign_dot += residual_sign(lhs_residual) * residual_sign(rhs_residual); + } + + double correction = 0.0; + if ((lhs_residual_norm_sq > k_eps) && (rhs_residual_norm_sq > k_eps)) { + const double residual_product_norm = std::sqrt(lhs_residual_norm_sq * rhs_residual_norm_sq); + correction = (k_qjl_scale / static_cast(sz)) * residual_product_norm * static_cast(residual_sign_dot); + } + const double estimated_inner_product = quantized_dot + correction; + return -estimated_inner_product; + } + + double convert_threshold(double threshold) const noexcept override { + return threshold; + } + + double to_rawscore(double distance) const noexcept override { + return -distance; + } + + double to_distance(double rawscore) const noexcept override { + return -rawscore; + } + + double min_rawscore() const noexcept override { + return std::numeric_limits::lowest(); + } + + double calc_with_limit(TypedCells rhs, double) const noexcept override { + return calc(rhs); + } +}; + +template class BoundTurboQuantDistance>; +template class BoundTurboQuantDistance>; +template class BoundTurboQuantDistance>; +template class BoundTurboQuantDistance>; +template class BoundTurboQuantDistance>; +template class BoundTurboQuantDistance>; +template class BoundTurboQuantDistance>; +template class BoundTurboQuantDistance>; + +template +BoundDistanceFunction::UP +TurboQuantDistanceFunctionFactory::for_query_vector(TypedCells lhs) const { + using DFT = BoundTurboQuantDistance>; + return std::make_unique(lhs, *_sq_norm_store, false, _mse_levels); +} + +template +BoundDistanceFunction::UP +TurboQuantDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) const { + if (_reference_insertion_vector) { + using DFT = BoundTurboQuantDistance>; + return std::make_unique(lhs, *_sq_norm_store, true, _mse_levels); + } + using DFT = BoundTurboQuantDistance>; + return std::make_unique(lhs, *_sq_norm_store, true, _mse_levels); +} + +template +void +TurboQuantDistanceFunctionFactory::save_state(vespalib::GenericHeader& header) const +{ + MipsDistanceFunctionFactoryBase::save_state(header); + header.putTag(vespalib::GenericHeader::Tag(hnsw_turbo_quant_version, static_cast(turbo_quant_state_version))); + header.putTag(vespalib::GenericHeader::Tag(hnsw_turbo_quant_levels, _mse_levels)); +} + +template +void +TurboQuantDistanceFunctionFactory::load_state(const vespalib::GenericHeader& header) +{ + MipsDistanceFunctionFactoryBase::load_state(header); + if (header.hasTag(hnsw_turbo_quant_levels)) { + const auto& tag = header.getTag(hnsw_turbo_quant_levels); + if (tag.getType() == vespalib::GenericHeader::Tag::Type::TYPE_INTEGER) { + auto levels = static_cast(tag.asInteger()); + if ((levels == 4u) || (levels == 8u)) { + _mse_levels = levels; + } + } + } +} + +template class TurboQuantDistanceFunctionFactory; +template class TurboQuantDistanceFunctionFactory; +template class TurboQuantDistanceFunctionFactory; +template class TurboQuantDistanceFunctionFactory; + +} \ No newline at end of file diff --git a/searchlib/src/vespa/searchlib/tensor/turbo_quant_distance.h b/searchlib/src/vespa/searchlib/tensor/turbo_quant_distance.h new file mode 100644 index 00000000000..8a97fb345e4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/turbo_quant_distance.h @@ -0,0 +1,40 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "mips_distance_transform.h" + +namespace search::tensor { + +/** + * Factory for TurboQuant distance functions. + * + * This implements a native quantized distance approximation that combines + * low-bit quantized inner product with a residual-sign correction term. + */ +template +class TurboQuantDistanceFunctionFactory : public MipsDistanceFunctionFactoryBase { +public: + using TypedCells = DistanceFunctionFactory::TypedCells; + + TurboQuantDistanceFunctionFactory() noexcept : TurboQuantDistanceFunctionFactory(false) {} + TurboQuantDistanceFunctionFactory(bool reference_insertion_vector) noexcept + : _reference_insertion_vector(reference_insertion_vector), + _mse_levels(4) + { + } + ~TurboQuantDistanceFunctionFactory() override = default; + + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override; + void save_state(vespalib::GenericHeader& header) const override; + void load_state(const vespalib::GenericHeader& header) override; + + uint32_t mse_levels() const noexcept { return _mse_levels; } + +private: + bool _reference_insertion_vector; + uint32_t _mse_levels; +}; + +} \ No newline at end of file diff --git a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp index 68f27e5862b..bd3692401ee 100644 --- a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp +++ b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp @@ -179,6 +179,7 @@ TEST_F(NearestNeighborSearcherTest, distance_metric_from_string) EXPECT_EQ(DistanceMetric::GeoDegrees, NNFS::distance_metric_from_string("GEODEGREES")); EXPECT_EQ(DistanceMetric::InnerProduct, NNFS::distance_metric_from_string("INNERPRODUCT")); EXPECT_EQ(DistanceMetric::Hamming, NNFS::distance_metric_from_string("HAMMING")); + EXPECT_EQ(DistanceMetric::TurboQuant, NNFS::distance_metric_from_string("TURBOQUANT")); EXPECT_EQ(DistanceMetric::Euclidean, NNFS::distance_metric_from_string("not_available")); }