diff --git a/src/comp_analyzer.cpp b/src/comp_analyzer.cpp index 26790946..33323085 100644 --- a/src/comp_analyzer.cpp +++ b/src/comp_analyzer.cpp @@ -29,6 +29,10 @@ THE SOFTWARE. #include #include #include +#include +#include +#include +#include "chibihash64.h" using namespace GanakInt; @@ -45,6 +49,7 @@ void CompAnalyzer::initialize( const LiteralIndexedVector & watches, // binary clauses ClauseAllocator const* alloc, const vector& _long_irred_cls) // longer-than-2-long clauses { + watches_ = &watches; max_var = watches.end_lit().var() - 1; comp_vars.reserve(max_var + 1); var_freq_scores.resize(max_var + 1, 0); @@ -408,6 +413,256 @@ end_sat:; << comp_vars.size() << " long"); } +// Computes Weisfeiler-Lehman canonical component information for cache lookup. +// +// Approach: +// 1. Build a clause->variable mapping restricted to this component's vars/clauses. +// 2. Compute an initial "color" for each variable: (long_degree, binary_degree, is_indep). +// 3. Run one round of WL refinement: each variable's new color incorporates the sorted +// multiset of its long-clause neighbours' initial colors. +// 4. Sort variables by WL color (original var_id as tiebreaker) to get canonical order. +// 5. Express every clause (long + binary) in terms of canonical variable indices and sort. +// 6. Hash the resulting canonical clause list to produce a structure-invariant cache key. +// +// The result is invariant to variable/clause ID renaming: two structurally isomorphic +// components will produce identical sorted_canon_clauses and the same hash, enabling a +// cache hit even if they involve completely different variable numberings. +CanonInfo CompAnalyzer::compute_canon_info(const Comp& comp, + uint64_t hash_seed, + uint32_t threshold) const { + CanonInfo info; + const uint32_t n = comp.nVars(); + // WL canonicalization is only sound for unweighted counting (mode 0). + // For weighted modes, each variable has an individual weight; two components + // with the same clause structure but different per-variable weights have + // different weighted counts, so structural isomorphism does not imply + // cache equivalence. weighted() is false only for FGenMpz (mode 0). + if (threshold == 0 || n > threshold || n == 0 || counter->weighted()) return info; + + // --- Build membership lookups --- + + // comp clause set for O(1) membership test + std::unordered_set comp_clause_set; + comp_clause_set.reserve(comp.num_long_cls() * 2); + for (auto it = comp.cls_begin(); *it != sentinel; ++it) comp_clause_set.insert(*it); + + // var_id -> position in comp (0..n-1) + std::unordered_map var_to_pos; + var_to_pos.reserve(n * 2); + for (uint32_t i = 0; i < n; ++i) var_to_pos[comp.vars_begin()[i]] = i; + + // --- Compute degree-based initial color per variable (position) --- + + // long_deg[i] = number of comp clauses containing position-i variable + // bin_deg[i] = number of comp variables that position-i variable shares a binary clause with + vector long_deg(n, 0); + vector bin_deg(n, 0); + + // clause_id -> list of comp-variable positions that appear in it (for WL neighbor graph) + std::unordered_map> clause_to_pos; + clause_to_pos.reserve(comp.num_long_cls() * 2); + + // clause_id -> all Lits with signs (for polarity-aware canonical form) + // For ternary clauses: accumulated across all 3 variable visits. + // For long clauses: read from long_clauses_data on first encounter. + std::unordered_map> clause_all_lits; + clause_all_lits.reserve(comp.num_long_cls() * 2); + + for (uint32_t i = 0; i < n; ++i) { + const uint32_t v = comp.vars_begin()[i]; + + // Long clauses: iterate over all long clauses v appears in. + const ClData* longs = holder.begin_long(v); + const ClData* longs_end = longs + holder.orig_size_long(v); + for (const ClData* d = longs; d != longs_end; ++d) { + if (!comp_clause_set.count(d->id)) continue; + clause_to_pos[d->id].push_back(i); + ++long_deg[i]; + + auto& lits = clause_all_lits[d->id]; + if (d->id < max_tri_clid) { + // Ternary clause: contribute the 2 "other" literals from this visit. + // After all 3 vars are visited, lits will contain all 3 signed literals. + const Lit l1 = d->get_lit1(); + const Lit l2 = d->get_lit2(); + if (std::find(lits.begin(), lits.end(), l1) == lits.end()) lits.push_back(l1); + if (std::find(lits.begin(), lits.end(), l2) == lits.end()) lits.push_back(l2); + } else { + // Long clause: read all signed literals from the literal pool on first encounter. + if (lits.empty()) { + const Lit* start = long_clauses_data.data() + d->off; + for (const Lit* it_l = start; *it_l != SENTINEL_LIT; ++it_l) lits.push_back(*it_l); + } + } + } + + // Binary degree (polarity-blind, used for WL initial color only). + const uint32_t* bins = holder.begin_bin(v); + for (uint32_t j = 0; j < holder.orig_size_bin(v); ++j) { + if (var_to_pos.count(bins[j])) ++bin_deg[i]; + } + } + + // --- Initial WL color per position --- + using Color3 = std::tuple; + vector init_color(n); + for (uint32_t i = 0; i < n; ++i) { + const bool is_indep = (comp.vars_begin()[i] < indep_support_end); + init_color[i] = {long_deg[i], bin_deg[i], static_cast(is_indep)}; + } + VERBOSE_DEBUG_DO( + cout << "WL canon: nVars=" << n + << " nLongCls=" << clause_all_lits.size() + << " initial colors (var longdeg bindeg isindep):"; + for (uint32_t i = 0; i < n; ++i) + cout << " [" << comp.vars_begin()[i] << " " + << long_deg[i] << " " << bin_deg[i] << " " + << (comp.vars_begin()[i] < indep_support_end ? 1 : 0) << "]"; + cout << endl; + ); + + // --- Build long-clause neighbour adjacency (for WL round) --- + // cl_neighbors[i] = positions of variables that share a long clause with position i + vector> cl_neighbors(n); + for (auto& [cl_id, positions] : clause_to_pos) { + for (uint32_t u : positions) + for (uint32_t w : positions) + if (u != w) cl_neighbors[u].push_back(w); + } + + // --- One round of WL refinement --- + // wl1[i] = hash of (init_color[i], sorted multiset of init_colors of clause-neighbours) + vector wl1(n); + for (uint32_t i = 0; i < n; ++i) { + vector ncolors; + ncolors.reserve(cl_neighbors[i].size()); + for (uint32_t j : cl_neighbors[i]) ncolors.push_back(init_color[j]); + sort(ncolors.begin(), ncolors.end()); + + // Mix into a 64-bit hash using simple polynomial mixing + uint64_t h = (static_cast(get<0>(init_color[i])) * 2654435761ULL) + ^ (static_cast(get<1>(init_color[i])) * 40503ULL) + ^ (static_cast(get<2>(init_color[i])) * 2246822519ULL); + for (const auto& [ld, bd, indp] : ncolors) { + h ^= (h >> 16) * 0x45d9f3bULL; + h += (static_cast(ld) * 2654435761ULL) + ^ (static_cast(bd) * 40503ULL) + ^ (static_cast(indp)); + } + wl1[i] = h; + } + + VERBOSE_DEBUG_DO( + cout << "WL canon: wl1 colors (var wl1hash):"; + for (uint32_t i = 0; i < n; ++i) + cout << " [" << comp.vars_begin()[i] << " 0x" << std::hex << wl1[i] << std::dec << "]"; + cout << endl; + ); + + // --- Sort variables by (wl1, init_color, var_id) to get canonical order --- + vector perm(n); + iota(perm.begin(), perm.end(), 0); + sort(perm.begin(), perm.end(), [&](uint32_t a, uint32_t b) { + if (wl1[a] != wl1[b]) return wl1[a] < wl1[b]; + if (init_color[a] != init_color[b]) return init_color[a] < init_color[b]; + return comp.vars_begin()[a] < comp.vars_begin()[b]; // stable tiebreak + }); + + // canon_vars[i] = original var_id at canonical position i + info.canon_vars.resize(n); + for (uint32_t i = 0; i < n; ++i) info.canon_vars[i] = comp.vars_begin()[perm[i]]; + + // canon_is_indep[i] = 1 if canonical position i is in the independent support, else 0. + // Must be included in the hash/equality data so that two structurally isomorphic + // components that differ only in their indep-support assignments are not confused. + info.canon_is_indep.resize(n); + for (uint32_t i = 0; i < n; ++i) + info.canon_is_indep[i] = static_cast(comp.vars_begin()[perm[i]] < indep_support_end); + + // orig_pos -> canonical index + vector orig_to_canon(n); + for (uint32_t i = 0; i < n; ++i) orig_to_canon[perm[i]] = i; + + // --- Build polarity-aware canonical clause representations --- + // Canonical literal index: 2 * canon_pos + (uint32_t)lit.sign() + // (sign()=false → negative literal, sign()=true → positive literal) + // + // Long/ternary: from clause_all_lits, filter to in-component (unknown) vars. + info.sorted_canon_clauses.reserve(clause_all_lits.size() + n); + for (auto& [cl_id, lits] : clause_all_lits) { + vector cv; + cv.reserve(lits.size()); + for (const Lit l : lits) { + auto it = var_to_pos.find(l.var()); + if (it == var_to_pos.end()) continue; // satisfied/false lit, skip + cv.push_back(2 * orig_to_canon[it->second] + static_cast(l.sign())); + } + if (cv.size() < 2) continue; // should not happen for in-comp clauses + sort(cv.begin(), cv.end()); + info.sorted_canon_clauses.push_back(std::move(cv)); + } + + // Binary clauses: use watches_ directly to get full literal polarities. + // watches_[Lit(v,s)] contains binary clauses of the form (Lit(v,s) ∨ bincl.lit()). + // Deduplicate via a 64-bit key (packed canonical lit-index pair, lo<<32|hi). + std::unordered_set seen_bin; + seen_bin.reserve(n * 4); + for (uint32_t pos_i = 0; pos_i < n; ++pos_i) { + const uint32_t v = comp.vars_begin()[pos_i]; + for (const bool s : {false, true}) { + for (const auto& bincl : (*watches_)[Lit(v, s)].binaries) { + if (!bincl.irred()) continue; + const Lit other = bincl.lit(); + auto it = var_to_pos.find(other.var()); + if (it == var_to_pos.end()) continue; // other end not in comp + const uint32_t cli = 2 * orig_to_canon[pos_i] + static_cast(s); + const uint32_t clj = 2 * orig_to_canon[it->second] + static_cast(other.sign()); + const uint32_t lo = std::min(cli, clj); + const uint32_t hi = std::max(cli, clj); + const uint64_t key = (static_cast(lo) << 32) | hi; + if (seen_bin.insert(key).second) + info.sorted_canon_clauses.push_back({lo, hi}); + } + } + } + + // --- Sort all canonical clauses lexicographically --- + sort(info.sorted_canon_clauses.begin(), info.sorted_canon_clauses.end()); + + // --- Compute structural hash of (nVars, canonical clauses, is_indep profile) --- + // Encoding: [n, n_total_clauses, for each clause: (size, v0, v1, ...), is_indep[0..n-1]] + // The is_indep profile distinguishes components whose clause structures are isomorphic + // but differ in which canonical positions belong to the independent (projection) support. + vector hdata; + hdata.reserve(2 + info.sorted_canon_clauses.size() * 4 + n); + hdata.push_back(n); + hdata.push_back(static_cast(info.sorted_canon_clauses.size())); + for (const auto& cv : info.sorted_canon_clauses) { + hdata.push_back(static_cast(cv.size())); + for (uint32_t idx : cv) hdata.push_back(idx); + } + for (uint32_t i = 0; i < n; ++i) hdata.push_back(info.canon_is_indep[i]); + info.hash = chibihash64(hdata.data(), hdata.size() * sizeof(uint32_t), hash_seed); + + VERBOSE_DEBUG_DO( + cout << "WL canon: final hash=0x" << std::hex << info.hash << std::dec + << " nclauses=" << info.sorted_canon_clauses.size() + << " canonical clauses:"; + for (const auto& cv : info.sorted_canon_clauses) { + cout << " ("; + for (uint32_t idx = 0; idx < cv.size(); ++idx) { + if (idx) cout << ","; + cout << cv[idx]; + } + cout << ")"; + } + cout << endl; + ); + + info.valid = true; + return info; +} + // There is exactly ONE of these CompAnalyzer::CompAnalyzer( const LiteralIndexedVector & lit_values, diff --git a/src/comp_analyzer.hpp b/src/comp_analyzer.hpp index 568ea012..fedd1aad 100644 --- a/src/comp_analyzer.hpp +++ b/src/comp_analyzer.hpp @@ -28,6 +28,7 @@ THE SOFTWARE. #include "statistics.hpp" #include "comp_types/comp.hpp" #include "comp_types/comp_archetype.hpp" +#include "comp_types/canon_info.hpp" #include #include @@ -95,6 +96,10 @@ struct MyHolder { auto start = data[v*hstride+offset+0]; return data.get() + start; } + const uint32_t* begin_bin(uint32_t v) const { + auto start = data[v*hstride+offset+0]; + return data.get() + start; + } uint32_t size_bin(uint32_t v) const { return data[v*hstride+offset+1];} uint32_t& size_bin(uint32_t v) { return data[v*hstride+offset+1];} uint32_t orig_size_bin(uint32_t v) const { return data[v*hstride+offset+2];} @@ -107,6 +112,10 @@ struct MyHolder { auto start = data[v*hstride+offset+3]; return reinterpret_cast(data.get() + start); } + const ClData* begin_long(uint32_t v) const { + auto start = data[v*hstride+offset+3]; + return reinterpret_cast(data.get() + start); + } uint32_t size_long(uint32_t v) const { return data[v*hstride+offset+4];} uint32_t& size_long(uint32_t v) { return data[v*hstride+offset+4];} uint32_t orig_size_long(uint32_t v) const { return data[v*hstride+offset+5];} @@ -179,6 +188,12 @@ class CompAnalyzer { uint32_t get_max_var() const { return max_var; } CompArchetype& get_archetype() { return archetype; } + // Compute WL-based canonical information for comp (only if nVars <= threshold). + // Must be called immediately after make_comp_from_archetype() and before the + // next explore_comp(), because it relies on the current holder state. + // Returns a CanonInfo with valid=false if nVars > threshold or threshold == 0. + CanonInfo compute_canon_info(const Comp& comp, uint64_t hash_seed, uint32_t threshold) const; + private: // the id of the last clause // note that clause ID is the clause number, @@ -191,6 +206,7 @@ class CompAnalyzer { MyHolder holder; vector long_clauses_data; const LiteralIndexedVector & values; + const LiteralIndexedVector* watches_ = nullptr; const CounterConfiguration& conf; const uint32_t indep_support_end; diff --git a/src/comp_cache.hpp b/src/comp_cache.hpp index 1d952b43..a32eeb17 100644 --- a/src/comp_cache.hpp +++ b/src/comp_cache.hpp @@ -56,8 +56,9 @@ class CompCache final: public CompCacheIF { T* comp = reinterpret_cast(c); return comp->extra_bytes(); } - void* create_new_comp(const Comp &comp, uint64_t hash_seed, const BPCSizes& bpc) override { - return new T(comp, hash_seed, bpc); + void* create_new_comp(const Comp &comp, uint64_t hash_seed, const BPCSizes& bpc, + const CanonInfo* canon = nullptr) override { + return new T(comp, hash_seed, bpc, canon); } [[nodiscard]] uint64_t get_max_num_entries() const override { return entry_base.size(); } diff --git a/src/comp_cache_if.hpp b/src/comp_cache_if.hpp index da03cd82..22c6ecc7 100644 --- a/src/comp_cache_if.hpp +++ b/src/comp_cache_if.hpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include "common.hpp" #include "comp_types/cacheable_comp.hpp" +#include "comp_types/canon_info.hpp" #include "statistics.hpp" #include #include "stack.hpp" @@ -40,7 +41,8 @@ class CompCacheIF { virtual CacheEntryID add_new_comp(void* comp, CacheEntryID super_comp_id) = 0; virtual uint64_t get_extra_bytes(void* comp) const = 0; virtual bool find_comp_and_incorporate_cnt(StackLevel &top, const uint32_t nvars, const void* comp) = 0; - virtual void* create_new_comp(const Comp &comp, uint64_t hash_seed, const BPCSizes& bpc) = 0; + virtual void* create_new_comp(const Comp &comp, uint64_t hash_seed, const BPCSizes& bpc, + const CanonInfo* canon = nullptr) = 0; virtual void free_comp(void* comp) = 0; virtual void make_entry_deletable(CacheEntryID id) = 0; diff --git a/src/comp_management.cpp b/src/comp_management.cpp index 3bdb0120..fd5ef4a6 100644 --- a/src/comp_management.cpp +++ b/src/comp_management.cpp @@ -23,6 +23,7 @@ THE SOFTWARE. #include "comp_management.hpp" #include "common.hpp" #include "comp_types/cacheable_comp.hpp" +#include "comp_types/canon_info.hpp" #include "counter.hpp" using namespace GanakInt; @@ -67,7 +68,23 @@ void CompManager::record_remaining_comps_for(StackLevel &top) { if (ana.var_unvisited_in_sup_comp(*vt) && ana.explore_comp(*vt, super_comp.num_long_cls(), super_comp.num_bin_cls())) { Comp *p_new_comp = ana.make_comp_from_archetype(); - void* ccomp = cache->create_new_comp(*p_new_comp, hash_seed, bpc); + const CanonInfo canon = ana.compute_canon_info(*p_new_comp, hash_seed, conf.wl_canonize_threshold); + if (canon.valid) stats.wl_canon_computed++; + else stats.wl_canon_skipped++; +#ifdef VERBOSE_DEBUG + if (canon.valid) { + cout << COLYEL2 "WL canon computed for comp nVars=" << p_new_comp->nVars() + << " hash=0x" << std::hex << canon.hash << std::dec + << " nclauses=" << canon.sorted_canon_clauses.size() + << " canon_vars:"; + for (uint32_t cv : canon.canon_vars) cout << " " << cv; + cout << endl; + } else { + cout << COLYEL2 "WL canon skipped for comp nVars=" << p_new_comp->nVars() + << " (threshold=" << conf.wl_canonize_threshold << ")" << endl; + } +#endif + void* ccomp = cache->create_new_comp(*p_new_comp, hash_seed, bpc, canon.valid ? &canon : nullptr); // TODO Yash: count it 1-by-1 in case the number of variables & clauses is small // essentially, brute-forcing the count @@ -86,9 +103,12 @@ void CompManager::record_remaining_comps_for(StackLevel &top) { #endif } else { // Cache hit + if (canon.valid) stats.wl_canon_hits++; #ifdef VERBOSE_DEBUG cout << COLYEL2 "Comp already in cache." - << " num vars: " << p_new_comp->nVars() << " vars: "; + << " num vars: " << p_new_comp->nVars() + << " wl_canonical=" << (canon.valid ? "yes" : "no") + << " vars: "; all_vars_in_comp(*p_new_comp, v) cout << *v << " "; cout << endl; #endif diff --git a/src/comp_types/cacheable_comp.hpp b/src/comp_types/cacheable_comp.hpp index 58aaa251..7bd662cf 100644 --- a/src/comp_types/cacheable_comp.hpp +++ b/src/comp_types/cacheable_comp.hpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include "structures.hpp" #include "base_comp.hpp" +#include "canon_info.hpp" namespace GanakInt { @@ -38,8 +39,9 @@ class CacheableComp: public BaseComp, private T { CacheableComp(CacheableComp&&) noexcept = default; CacheableComp(const CacheableComp&) = default; CacheableComp& operator=(const CacheableComp&) = default; - CacheableComp(const Comp &comp, const uint64_t hash_seed, const BPCSizes& bpc) { - hashkey = T::set_comp(comp, hash_seed, bpc); + CacheableComp(const Comp &comp, const uint64_t hash_seed, const BPCSizes& bpc, + const CanonInfo* canon = nullptr) { + hashkey = T::set_comp(comp, hash_seed, bpc, canon); } // Cache Pollution Management diff --git a/src/comp_types/canon_info.hpp b/src/comp_types/canon_info.hpp new file mode 100644 index 00000000..96114aee --- /dev/null +++ b/src/comp_types/canon_info.hpp @@ -0,0 +1,59 @@ +/****************************************** +Copyright (C) 2023 Authors of GANAK, see AUTHORS file + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +***********************************************/ + +#pragma once + +#include +#include + +namespace GanakInt { + +// Weisfeiler-Lehman canonical component information. +// Computed once in CompAnalyzer for small components (nVars <= wl_canonize_threshold). +// Invariant to variable renaming: two structurally isomorphic components produce identical +// sorted_canon_clauses (and therefore the same hash), enabling cache hits across +// different variable numberings. +struct CanonInfo { + bool valid = false; + + // canon_vars[i] = original var_id of the i-th canonical variable. + // Variables are sorted by WL color (degree-based initial coloring + 1 WL round). + // Tiebreaker: original var_id for stability. + std::vector canon_vars; + + // sorted_canon_clauses[j] = sorted list of canonical var indices (0..nVars-1) in clause j. + // The outer vector is sorted lexicographically so the representation is canonical. + // Binary clauses appear as 2-element inner vectors. + std::vector> sorted_canon_clauses; + + // is_indep[i] = 1 if the variable at canonical position i is in the independent support, + // 0 otherwise. Included in both the hash and the equality comparison so that two + // structurally isomorphic components that differ only in their projection-variable + // assignments are NOT considered equivalent (they may have different projected counts). + std::vector canon_is_indep; + + // Pre-computed structural hash derived from sorted_canon_clauses, nVars, and canon_is_indep. + // Used directly as the cache key hash for both HashedComp and DiffPackedComp modes. + uint64_t hash = 0; +}; + +} // namespace GanakInt diff --git a/src/comp_types/difference_packed_comp.hpp b/src/comp_types/difference_packed_comp.hpp index cc57537f..4c4df519 100644 --- a/src/comp_types/difference_packed_comp.hpp +++ b/src/comp_types/difference_packed_comp.hpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include "common.hpp" #include "comp.hpp" #include "chibihash64.h" +#include "canon_info.hpp" namespace GanakInt { @@ -68,7 +69,7 @@ class DiffPackedComp { public: DiffPackedComp() = default; ~DiffPackedComp() { delete[] data; } - DiffPackedComp(const DiffPackedComp& other) : data_size(other.data_size) { + DiffPackedComp(const DiffPackedComp& other) : data_size(other.data_size), is_canonical_(other.is_canonical_) { if (data_size > 0) { data = new uint32_t[data_size]; std::memcpy(data, other.data, data_size * sizeof(uint32_t)); @@ -78,22 +79,26 @@ class DiffPackedComp { if (this == &other) return *this; delete[] data; data_size = other.data_size; + is_canonical_ = other.is_canonical_; data = (data_size > 0) ? new uint32_t[data_size] : nullptr; if (data_size > 0) std::memcpy(data, other.data, data_size * sizeof(uint32_t)); return *this; } DiffPackedComp(DiffPackedComp&& other) noexcept - : data(other.data), data_size(other.data_size) { + : data(other.data), data_size(other.data_size), is_canonical_(other.is_canonical_) { other.data = nullptr; other.data_size = 0; + other.is_canonical_ = false; } DiffPackedComp& operator=(DiffPackedComp&& other) noexcept { if (this != &other) { delete[] data; data = other.data; data_size = other.data_size; + is_canonical_ = other.is_canonical_; other.data = nullptr; other.data_size = 0; + other.is_canonical_ = false; } return *this; } @@ -112,11 +117,35 @@ class DiffPackedComp { SLOW_DEBUG_DO(assert(data != nullptr)); SLOW_DEBUG_DO(assert(other.data != nullptr)); + if (is_canonical_ != other.is_canonical_) return false; if (data_size != other.data_size) return false; return std::memcmp(data, other.data, data_size * sizeof(uint32_t)) == 0; } - uint64_t set_comp(const Comp &comp, const uint64_t hash_seed, const BPCSizes& sz) { + uint64_t set_comp(const Comp &comp, const uint64_t hash_seed, const BPCSizes& sz, + const CanonInfo* canon = nullptr) { + if (canon && canon->valid) { + is_canonical_ = true; + // Store the same hdata that was used to compute canon->hash, for equality checking. + // Format: [nVars, n_clauses, (size, v0, v1, ...) per clause ..., is_indep[0..n-1]] + // The is_indep profile is appended so components that differ only in + // independent-support membership are correctly distinguished. + std::vector hdata; + const uint32_t n = comp.nVars(); + hdata.push_back(n); + hdata.push_back(static_cast(canon->sorted_canon_clauses.size())); + for (const auto& cv : canon->sorted_canon_clauses) { + hdata.push_back(static_cast(cv.size())); + for (uint32_t idx : cv) hdata.push_back(idx); + } + for (uint32_t i = 0; i < n; ++i) hdata.push_back(canon->canon_is_indep[i]); + delete[] data; + data_size = static_cast(hdata.size()); + data = new uint32_t[data_size]; + std::memcpy(data, hdata.data(), data_size * sizeof(uint32_t)); + return canon->hash; + } + is_canonical_ = false; // first, generate hashkey, and compute max diff for cls and vars uint32_t max_var_diff = 0; uint32_t v = *comp.vars_begin(); @@ -188,6 +217,7 @@ class DiffPackedComp { private: uint32_t* data = nullptr; // the packed data uint32_t data_size = 0; // the size of the packed data in uint32_t units + bool is_canonical_ = false; // true when data stores canonical clause representation }; } diff --git a/src/comp_types/hashed_comp.hpp b/src/comp_types/hashed_comp.hpp index bfccdc15..f62b4633 100644 --- a/src/comp_types/hashed_comp.hpp +++ b/src/comp_types/hashed_comp.hpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include "chibihash64.h" #include "common.hpp" #include "comp.hpp" +#include "canon_info.hpp" namespace GanakInt { @@ -34,7 +35,9 @@ class HashedComp { HashedComp(const HashedComp&) = default; HashedComp& operator=(const HashedComp&) = default; HashedComp(HashedComp&&) noexcept = default; - static uint64_t set_comp(const Comp& comp, const uint64_t hash_seed, const BPCSizes& /*bpc*/) { + static uint64_t set_comp(const Comp& comp, const uint64_t hash_seed, const BPCSizes& /*bpc*/, + const CanonInfo* canon = nullptr) { + if (canon && canon->valid) return canon->hash; return chibihash64(comp.get_raw_data(), comp.get_size()*4, hash_seed); } bool equals(const HashedComp&) const { diff --git a/src/counter_config.hpp b/src/counter_config.hpp index 20ff4f2d..12deeffc 100644 --- a/src/counter_config.hpp +++ b/src/counter_config.hpp @@ -115,6 +115,13 @@ struct CounterConfiguration { uint64_t seed = 0; double delta = 0.2; + // Weisfeiler-Lehman canonical component hashing. + // Components with nVars <= this threshold get a structure-invariant hash and + // (in DiffPackedComp mode) a canonical equality representation, so that + // structurally isomorphic components hit the same cache entry regardless of + // their variable numbering. Set to 0 to disable. + uint32_t wl_canonize_threshold = 25; + double appmc_timeout = -1; double appmc_epsilon = 0.8; }; diff --git a/src/main.cpp b/src/main.cpp index 0a873b43..0cd69a37 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -187,6 +187,7 @@ void add_ganak_options() add_arg("--epsilon", conf.appmc_epsilon, fc_double, "AppMC epsilon"); add_arg("--chronobt", conf.do_chronobt, fc_int, "ChronoBT. SAT must be DISABLED or this will fail"); add_arg("--prob", conf.do_probabilistic_hashing, fc_int, "Use probabilistic hashing. When set to 0, we are not running in probabilistic mode, but in deterministic mode, i.e. delta is 0 in Ganak mode (not in case we switch to ApproxMC mode via --appmct)"); + add_arg("--wlcanon", conf.wl_canonize_threshold, fc_int, "WL canonical component hashing threshold: components with nVars <= this get a structure-invariant hash so isomorphic components hit the same cache entry. Set to 0 to disable."); // Arjun options add_arg("--arjun", do_arjun, fc_int, "Use arjun"); diff --git a/src/statistics.cpp b/src/statistics.cpp index 8f9e52e6..2c852218 100644 --- a/src/statistics.cpp +++ b/src/statistics.cpp @@ -162,6 +162,13 @@ void DataAndStatistics::print_short(const Counter* counter, const std::unique_pt << get_avg_cache_store_sz() << " / " << get_avg_cache_store_size()); + verb_print(1, "WL canon: computed/skipped/hits " + << setw(7) << wl_canon_computed << " / " + << setw(7) << wl_canon_skipped << " / " + << setw(7) << wl_canon_hits + << " (canon hit rate: " + << setprecision(3) << safe_div(wl_canon_hits * 100.0, wl_canon_computed) << "%)"); + if (conf.verb >= 2) counter->get_cache()->debug_mem_data(); verb_print(1, ""); } diff --git a/src/statistics.hpp b/src/statistics.hpp index a809f3b3..d22f773c 100644 --- a/src/statistics.hpp +++ b/src/statistics.hpp @@ -131,6 +131,11 @@ class DataAndStatistics { uint64_t sum_cache_hit_sizes = 0; uint64_t sum_cache_store_sizes = 0; + // WL canonical hashing stats + uint64_t wl_canon_computed = 0; // comps with valid WL CanonInfo (size <= threshold) + uint64_t wl_canon_skipped = 0; // comps skipped (size > threshold or threshold==0) + uint64_t wl_canon_hits = 0; // cache hits on lookups that used WL canonical hash + // Components uint64_t comp_sorts = 0; uint64_t comp_sizes = 0;