diff --git a/CLAUDE.md b/CLAUDE.md index 754af8b7..b56ccde3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -18,10 +18,10 @@ Always disable ccache and build the `ganak` target: CCACHE_DISABLE=1 cmake --build build --target ganak -j8 2>&1 | tail -20 ``` -The build directory is `build/`. If it doesn't exist or CMake needs to be re-run: +The build directory is `build/`. Go to that directory, and run: ``` -mkdir -p build && cd build && cmake .. +./build_norm.sh ``` Common build options: diff --git a/CMakeLists.txt b/CMakeLists.txt index c2fcb37d..681f2d4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,6 +355,21 @@ if(POLICY CMP0169) endif() +# ---- cadical / cadiback (transitive deps of arjun/cryptominisat) ---- +# These are not used directly by ganak, but arjun's exported target lists +# them in its INTERFACE_LINK_LIBRARIES. Import them here so CMake resolves +# them to actual library paths rather than bare -l flags. +set(cadical_DIR "" CACHE PATH "cadical build dir containing cadicalConfig.cmake") +if(cadical_DIR AND NOT TARGET cadical) + find_package(cadical CONFIG REQUIRED HINTS "${cadical_DIR}") + message(STATUS "cadical: using pre-built at ${cadical_DIR}") +endif() +set(cadiback_DIR "" CACHE PATH "cadiback build dir containing cadibackConfig.cmake") +if(cadiback_DIR AND NOT TARGET cadiback) + find_package(cadiback CONFIG REQUIRED HINTS "${cadiback_DIR}") + message(STATUS "cadiback: using pre-built at ${cadiback_DIR}") +endif() + # ---- CryptoMiniSat5 ---- set(cryptominisat5_DIR "" CACHE PATH "cryptominisat5 install/build prefix (contains lib/cmake/cryptominisat5/). Auto-resolved if empty.") if(NOT TARGET cryptominisat5) diff --git a/EMSCRIPTEN.md b/EMSCRIPTEN.md index 08c7f2b1..96b494b1 100644 --- a/EMSCRIPTEN.md +++ b/EMSCRIPTEN.md @@ -15,6 +15,13 @@ emmake make -j16 emmake make install ``` +NOTE: after the above, you need to "make distclean" and also to configure GMP, run: +``` +./configure --enable-cxx --enable-static --enable-shared CC="gcc -std=gnu17" +``` +because newest GCC complains + + Useful commands for building stuff like MPFR and MPIR for Emscripten: ``` EMCONFIGURE_JS=1 emconfigure ./configure ABI=64 --disable-assembly CFLAGS=-m64 CXXFLAGS=-m64 LDFLAGS=-m64 --enable-cxx diff --git a/README.md b/README.md index e829e62a..dbff06ab 100644 --- a/README.md +++ b/README.md @@ -263,30 +263,17 @@ simplification, and then through Ganak's counting, finally, we combine these two systems' counts to get the final count. ## Supported Weights -Ganak supports many different weights: -- For counting over integers, the default mode `--mode 0` works. Here, you can - even run approximate counting after some timeout, with e.g `--appmct 1000` - which will automatically switch over to approximate counting after 1000s of - preprocessing and Ganak. -- For counting over rationals (i.e. infinite precision weighted counting), you - can use `--mode 1` which will give you a precise count, without _any_ - floating point errors. -- For counting over the complex rationals field, you need to specify the weight of the - literal as a complex number and pass the `--mode 2` flag. For example, if you - want to give the weight 1/2 + 4i for literal 9, you can specify the weight as - `c p weight 9 1/2 4 0`. -- For counting over polynomials over a finite field, you can use `--mode 3` - which will give you a polynomial count. The weight of the literal is given as - `c p weight 9 1/2*x0+4x1+2 0` which means the weight of literal 9 is - `1/2*x_0 + 4*x_1+2`. In this mode you MUST specify the number of polynomial - variables via `--npolyvars N` -- For parity counting, you can use the `--mode 4` flag. This will - count the number of solutions modulo 2 -- For counting over integers modulo a prime, you can use `--mode 5 --prime X`, - where X is the prime. -- For counting over the complex numbers, but using floats instead of infinite - precision rationals, you can use `--mode 6`, and specify the weights as - per `--mode 2`. + +| Mode | Flag | Field | Weight format example | Notes | +|------|------|-------|-----------------------|-------| +| 0 | `--mode 0` | Integer | `c p weight 1 5 0` | Default; supports `--appmct` | +| 1 | `--mode 1` | Rational (exact) | `c p weight 1 1/4 0` | No floating-point error | +| 2 | `--mode 2` | Complex rational | `c p weight 9 1/2+4i 0` | Must give both parts: `a+bi` or `a-bi` | +| 3 | `--mode 3` | Polynomial over rationals | `c p weight 9 1/2*x0+4*x1+2 0` | Requires `--npolyvars N` | +| 4 | `--mode 4` | Parity (mod 2) | `c p weight 1 1 0` | | +| 5 | `--mode 5` | Integer mod prime | `c p weight 1 3 0` | Requires `--prime X` | +| 6 | `--mode 6` | Complex float (MPFR) | `c p weight 9 1/2+4i 0` | Must give both parts: `a+bi` or `a-bi`; see `--mpfrprec` | +| 7 | `--mode 7` | Real float (MPFR) | `c p weight 1 0.3 0` | See `--mpfrprec` | You can also write your own field by implementing the `Field` and `FieldGen` interfaces. Absolutely _any_ field will work, and it's as easy as implementing diff --git a/scripts/build_clang.sh b/scripts/build_clang.sh index 3ee8936e..fe45747a 100755 --- a/scripts/build_clang.sh +++ b/scripts/build_clang.sh @@ -6,7 +6,7 @@ rm -rf sharp* rm -rf Make* rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang \ -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ -Dsbva_DIR="${SAT_DIR}/sbva/build" \ diff --git a/scripts/build_clang_analyze.sh b/scripts/build_clang_analyze.sh index cd2b4941..b28ec921 100755 --- a/scripts/build_clang_analyze.sh +++ b/scripts/build_clang_analyze.sh @@ -6,7 +6,7 @@ rm -rf sharp* rm -rf Make* rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" cmake -DANALYZE=ON -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang \ -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ -Dsbva_DIR="${SAT_DIR}/sbva/build" \ diff --git a/scripts/build_debug.sh b/scripts/build_debug.sh index 7e3a3409..a518558f 100755 --- a/scripts/build_debug.sh +++ b/scripts/build_debug.sh @@ -9,7 +9,7 @@ rm -rf sharp* rm -rf Make* rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" cmake -DCMAKE_BUILD_TYPE=Debug \ -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ -Dsbva_DIR="${SAT_DIR}/sbva/build" \ diff --git a/scripts/build_emscripten.sh b/scripts/build_emscripten.sh index 5fff5bcf..86dd7b23 100755 --- a/scripts/build_emscripten.sh +++ b/scripts/build_emscripten.sh @@ -13,7 +13,7 @@ rm -rf Makefile rm -rf rjun-src rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" emcmake cmake -DCMAKE_INSTALL_PREFIX=$EMINSTALL -DENABLE_TESTING=OFF \ -Dcadical_DIR="${SAT_DIR}/cadical/build" \ -Dcadiback_DIR="${SAT_DIR}/cadiback" \ diff --git a/scripts/build_norm.sh b/scripts/build_norm.sh index a4e70baa..ad45d932 100755 --- a/scripts/build_norm.sh +++ b/scripts/build_norm.sh @@ -14,16 +14,16 @@ rm -rf CM* rm -rf cmake* rm -rf deps rm -rf _deps -SOLVERS_DIR="$(cd "$(dirname "$0")/../.." && pwd)" -echo "solvers dir: $SOLVERS_DIR" +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" +echo "solvers dir: $SAT_DIR" cmake -DENABLE_TESTING=ON \ - -Dcadical_DIR="${SOLVERS_DIR}/cadical/build" \ - -Dcadiback_DIR="${SOLVERS_DIR}/cadiback/build" \ - -Dcryptominisat5_DIR="${SOLVERS_DIR}/cryptominisat/build" \ - -Dsbva_DIR="${SOLVERS_DIR}/sbva/build" \ - -Dtreedecomp_DIR="${SOLVERS_DIR}/treedecomp/build" \ - -Darjun_DIR="${SOLVERS_DIR}/arjun/build" \ - -Dapproxmc_DIR="${SOLVERS_DIR}/approxmc/build" \ + -Dcadical_DIR="${SAT_DIR}/cadical/build" \ + -Dcadiback_DIR="${SAT_DIR}/cadiback/build" \ + -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ + -Dsbva_DIR="${SAT_DIR}/sbva/build" \ + -Dtreedecomp_DIR="${SAT_DIR}/treedecomp/build" \ + -Darjun_DIR="${SAT_DIR}/arjun/build" \ + -Dapproxmc_DIR="${SAT_DIR}/approxmc/build" \ .. make -j$(nproc) make test diff --git a/scripts/build_release.sh b/scripts/build_release.sh index e747f6e3..1c1009c3 100755 --- a/scripts/build_release.sh +++ b/scripts/build_release.sh @@ -6,7 +6,7 @@ rm -rf sharp* rm -rf Make* rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" cmake -DCMAKE_BUILD_TYPE=Release \ -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ -Dsbva_DIR="${SAT_DIR}/sbva/build" \ diff --git a/scripts/build_sanitize.sh b/scripts/build_sanitize.sh index 777a924e..b35df86c 100755 --- a/scripts/build_sanitize.sh +++ b/scripts/build_sanitize.sh @@ -8,7 +8,7 @@ rm -rf sharp* rm -rf Make* rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" CC=clang CXX=clang++ cmake -DSANITIZE=ON \ -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ -Dsbva_DIR="${SAT_DIR}/sbva/build" \ diff --git a/scripts/build_static.sh b/scripts/build_static.sh index 97d813dc..0d23ee14 100755 --- a/scripts/build_static.sh +++ b/scripts/build_static.sh @@ -15,7 +15,7 @@ rm -rf .cmake rm -rf lib* rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" cmake -DBUILD_SHARED_LIBS=OFF \ -DGMPXX_LIBRARY=/usr/local/lib/libgmpxx.a \ -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ diff --git a/scripts/build_static_release.sh b/scripts/build_static_release.sh index faa49f99..9f3c5026 100755 --- a/scripts/build_static_release.sh +++ b/scripts/build_static_release.sh @@ -15,7 +15,7 @@ rm -rf .cmake rm -rf lib* rm -rf deps rm -rf _deps -SAT_DIR=$(cd ../.. && pwd) +SAT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF \ -DGMPXX_LIBRARY=/usr/local/lib/libgmpxx.a \ -Dcryptominisat5_DIR="${SAT_DIR}/cryptominisat/build" \ diff --git a/scripts/cache_miss_bucket_summary.py b/scripts/cache_miss_bucket_summary.py index 5b949a65..8338293d 100755 --- a/scripts/cache_miss_bucket_summary.py +++ b/scripts/cache_miss_bucket_summary.py @@ -105,7 +105,7 @@ def main(): total_rows, total_solved = cur.fetchone() con.close() - headers = ["metric"] + [b[0] for b in buckets] + headers = ["cache miss rate"] + [b[0] for b in buckets] title = f"Cache Miss Rate Bucket Summary — {args.dirname}" print(f"\n{BLUE}{title}{RESET}") for call in calls: diff --git a/scripts/data/analyze_preproc.py b/scripts/data/analyze_preproc.py new file mode 100644 index 00000000..3f78a224 --- /dev/null +++ b/scripts/data/analyze_preproc.py @@ -0,0 +1,88 @@ +import sqlite3 +import os +os.chdir("/home/soos/development/sat_solvers/ganak/build/data") +con = sqlite3.connect("data.sqlite3") +cur = con.cursor() + +cur.execute(""" +SELECT name, + COUNT(DISTINCT fname) as n_cnfs, + SUM(CASE WHEN delta_irred_long_lits < 0 THEN 1 ELSE 0 END) as invoc_lits, + SUM(CASE WHEN delta_free_vars < 0 THEN 1 ELSE 0 END) as invoc_vars, + -SUM(delta_irred_long_lits) as sum_lits, + -SUM(delta_free_vars) as sum_vars, + COUNT(*) as n_invoc +FROM preproc +GROUP BY name +ORDER BY sum_lits DESC +""") +rows = cur.fetchall() +print(f"{'step':<30} {'n_cnfs':>7} {'%inv_lits':>9} {'%inv_vars':>9} {'sum_lits':>12} {'sum_vars':>12} {'n_invoc':>8}") +print("-"*104) +for r in rows: + name, n_cnfs, invoc_lits, invoc_vars, sum_lits, sum_vars, n_invoc = r + pct_lits = 100*invoc_lits/n_invoc if n_invoc else 0 + pct_vars = 100*invoc_vars/n_invoc if n_invoc else 0 + print(f"{name:<30} {n_cnfs:>7} {pct_lits:>8.0f}% {pct_vars:>8.0f}% {sum_lits:>12,} {sum_vars:>12,} {n_invoc:>8,}") + +cur.execute("SELECT -SUM(delta_irred_long_lits), -SUM(delta_free_vars) FROM preproc") +t = cur.fetchone() +print(f"\nTOTAL: lits={t[0]:,} vars={t[1]:,}") + +print("\n\n--- Per-CNF distribution of lits removed (steps that remove lits) ---") +cur.execute(""" +SELECT p.fname, + -SUM(p.delta_irred_long_lits) as total_lits_removed, + -SUM(p.delta_free_vars) as total_vars_removed, + d.ganak_time +FROM preproc p +JOIN data d ON d.fname = p.fname AND d.dirname = p.dirname +WHERE d.ganak_time IS NOT NULL +GROUP BY p.dirname, p.fname +ORDER BY total_lits_removed DESC +LIMIT 20 +""") +rows2 = cur.fetchall() +print(f"{'fname':<50} {'lits_removed':>13} {'vars_removed':>13} {'time':>8}") +for r in rows2: + print(f"{r[0][-48:]:<50} {r[1]:>13,} {r[2]:>13,} {r[3]:>8.2f}") + +print("\n\n--- Steps that are ALWAYS active (invoc_pct_lits > 80%) vs rarely active ---") +cur.execute(""" +SELECT name, + COUNT(*) as n_invoc, + -SUM(delta_irred_long_lits) as sum_lits, + ROUND(100.0 * SUM(CASE WHEN delta_irred_long_lits < 0 THEN 1 ELSE 0 END) / COUNT(*), 1) as pct_lits +FROM preproc +GROUP BY name +HAVING sum_lits > 0 +ORDER BY pct_lits DESC +""") +rows3 = cur.fetchall() +print(f"{'step':<30} {'n_invoc':>8} {'sum_lits':>12} {'%active':>8}") +for r in rows3: + print(f"{r[0]:<30} {r[1]:>8,} {r[2]:>12,} {r[3]:>7.1f}%") + +print("\n\n--- occ-bve deep dive: bins added ---") +cur.execute(""" +SELECT -SUM(delta_irred_bins), SUM(delta_irred_bins), COUNT(*) +FROM preproc WHERE name = 'occ-bve' +""") +r = cur.fetchone() +print(f"delta_bins total={r[0]:,} sum={r[1]:,} n={r[2]:,}") + +print("\n\n--- What's removing the most lits ON AVERAGE per invocation? ---") +cur.execute(""" +SELECT name, + COUNT(*) as n_invoc, + -SUM(delta_irred_long_lits) as sum_lits, + ROUND(-1.0*SUM(delta_irred_long_lits)/COUNT(*), 1) as avg_lits_per_invoc, + ROUND(-1.0*SUM(delta_free_vars)/COUNT(*), 1) as avg_vars_per_invoc +FROM preproc +GROUP BY name +ORDER BY avg_lits_per_invoc DESC +""") +rows4 = cur.fetchall() +print(f"{'step':<30} {'n_invoc':>8} {'sum_lits':>12} {'avg_lits/inv':>13} {'avg_vars/inv':>13}") +for r in rows4: + print(f"{r[0]:<30} {r[1]:>8,} {r[2]:>12,} {r[3]:>13,.1f} {r[4]:>13,.1f}") diff --git a/scripts/data/create_graphs_ganak.py b/scripts/data/create_graphs_ganak.py index e4898a65..0a86f6a1 100755 --- a/scripts/data/create_graphs_ganak.py +++ b/scripts/data/create_graphs_ganak.py @@ -10,11 +10,12 @@ import nbformat as nbf BLUE = "\033[94m" +GREEN = "\033[92m" RED = "\033[91m" RESET = "\033[0m" -def convert_to_cactus(fname, fname2): +def convert_to_cdf(fname, fname2): with open(fname, "r") as f: times = sorted(float(line.split()[0]) for line in f) @@ -116,7 +117,7 @@ def build_csv_data(todo, matched_dirs, only_calls, not_calls, not_versions, fnam os.unlink("gencsv.sqlite") fname2 = fname + ".gnuplotdata" - num_solved = convert_to_cactus(fname, fname2) + num_solved = convert_to_cdf(fname, fname2) fname2_s.append([fname2, call, ver[:10], num_solved, dir]) table_todo.append([dir, ver]) return fname2_s, table_todo @@ -243,25 +244,30 @@ def print_instance_stats_table(table_todo, fname_like, verbose=False): ("irred_cls", "irr_cls"), ] - union_parts = [] - for dir, ver in table_todo: - parts = [f"replace('{dir}','out-ganak-mc','') as dirname"] - for col, alias in metrics: - parts.append(f"{_median_subquery(col, dir, ver, fname_like)} as med_{alias}") - parts.append(f"{_avg_subquery(col, dir, ver, fname_like)} as avg_{alias}") - count_sq = (f"(SELECT COUNT(*) FROM data WHERE dirname='{dir}'" - f" AND ganak_ver='{ver}'{fname_like})") - parts.append(f"{count_sq} as n_inst") - union_parts.append("SELECT " + ", ".join(parts)) - - query = "\nUNION ALL\n".join(union_parts) - if verbose: - print(f" Instance stats query ({len(table_todo)} rows)") - with open("gen_table.sqlite", "w") as f: - f.write(".mode table\n") - f.write(query + "\n") - os.system("sqlite3 data.sqlite3 < gen_table.sqlite") - os.unlink("gen_table.sqlite") + for min_nvars, label in [(None, "all"), (20, "nvars>=20")]: + nvars_filter = f" and new_nvars >= {min_nvars}" if min_nvars is not None else "" + fl = fname_like + nvars_filter + + union_parts = [] + for dir, ver in table_todo: + parts = [f"replace('{dir}','out-ganak-mc','') as dirname"] + for col, alias in metrics: + parts.append(f"{_median_subquery(col, dir, ver, fl)} as med_{alias}") + parts.append(f"{_avg_subquery(col, dir, ver, fl)} as avg_{alias}") + count_sq = (f"(SELECT COUNT(*) FROM data WHERE dirname='{dir}'" + f" AND ganak_ver='{ver}'{fl})") + parts.append(f"{count_sq} as n_inst") + union_parts.append("SELECT " + ", ".join(parts)) + + query = "\nUNION ALL\n".join(union_parts) + print(f"\n [{label}]") + if verbose: + print(f" Instance stats query ({len(table_todo)} rows)") + with open("gen_table.sqlite", "w") as f: + f.write(".mode table\n") + f.write(query + "\n") + os.system("sqlite3 data.sqlite3 < gen_table.sqlite") + os.unlink("gen_table.sqlite") def print_preproc_diffs(table_todo, fname_like, verbose=False): @@ -478,8 +484,8 @@ def gp_str(s): with open(gp_file, "w") as f: for term, out in [ - ('pdfcairo size 15cm,15cm', pdf_file), - ('pngcairo size 600,600', png_file), + ('pdfcairo size 52cm,40cm background "#d0d0d0"', pdf_file), + ('pngcairo size 600,600 background "#d0d0d0"', png_file), ]: f.write(f'set terminal {term}\n') f.write(f'set output "{out}"\n') @@ -495,11 +501,7 @@ def gp_str(s): f.write(f'plot "{dat_file}" using (bin($1,binwidth)):(1) smooth freq with boxes lc rgb "blue"\n\n') os.system(f"gnuplot {gp_file}") - - if os.path.exists(png_file): - with open(png_file, "rb") as fh: - img_b64 = base64.b64encode(fh.read()).decode() - print(f"\033]1337;File=inline=1;width=600px;height=600px:{img_b64}\a") + _display_png(png_file) def print_sigabrt_files(table_todo, fname_like): @@ -536,6 +538,974 @@ def print_sigabrt_files(table_todo, fname_like): print(sep) +def _preproc_has_data(matched_dirs): + """Return True if the preproc table exists and has data for any of the matched dirs.""" + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + try: + cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='preproc'") + if not cur.fetchone(): + return False + if not matched_dirs: + return False + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + cur.execute(f"SELECT COUNT(*) FROM preproc WHERE dirname IN ({dirs_sql})") + count = cur.fetchone()[0] + return count > 0 + except Exception: + return False + finally: + con.close() + + +def _preproc_step_stats(con, dirs_sql, where_extra="", per_cnf=False): + """Fetch all preproc delta rows and compute per-step stats including medians in Python. + + If per_cnf=True, first sum all occurrences within each (dirname, fname) before computing + the median across CNFs — giving the total effect per CNF per step. + + Returns dict: name -> {n, n_active, med_lits, med_bins, med_cls, med_vars, + med_step_num} + """ + from collections import defaultdict + cur = con.cursor() + cur.execute( + f"SELECT name, dirname, fname, delta_irred_bins, delta_irred_long_cls," + f" delta_irred_long_lits, delta_free_vars, step_num" + f" FROM preproc WHERE dirname IN ({dirs_sql}){where_extra}" + ) + + if per_cnf: + # Accumulate per (name, dirname, fname), then collect per name + # cnf_totals[name][(dirname, fname)] = {'bins': int, 'cls': int, 'lits': int, 'vars': int, 'snums': list} + cnf_totals: dict = {} + for name, dirname, fname, d_bins, d_cls, d_lits, d_vars, snum in cur.fetchall(): + key = (dirname, fname) + if name not in cnf_totals: + cnf_totals[name] = {} + if key not in cnf_totals[name]: + cnf_totals[name][key] = {'bins': 0, 'cls': 0, 'lits': 0, 'vars': 0, 'snums': []} + t = cnf_totals[name][key] + t['bins'] += d_bins if d_bins is not None else 0 + t['cls'] += d_cls if d_cls is not None else 0 + t['lits'] += d_lits if d_lits is not None else 0 + t['vars'] += d_vars if d_vars is not None else 0 + if snum is not None: + t['snums'].append(snum) + groups = defaultdict(lambda: {'bins': [], 'cls': [], 'lits': [], 'vars': [], 'snums': []}) + for name, cnf_dict in cnf_totals.items(): + g = groups[name] + for t in cnf_dict.values(): + g['bins'].append(t['bins']) + g['cls'].append(t['cls']) + g['lits'].append(t['lits']) + g['vars'].append(t['vars']) + g['snums'].extend(t['snums']) + else: + groups = defaultdict(lambda: {'bins': [], 'cls': [], 'lits': [], 'vars': [], 'snums': []}) + for name, _, _, d_bins, d_cls, d_lits, d_vars, snum in cur.fetchall(): + g = groups[name] + g['bins'].append(d_bins if d_bins is not None else 0) + g['cls'].append(d_cls if d_cls is not None else 0) + g['lits'].append(d_lits if d_lits is not None else 0) + g['vars'].append(d_vars if d_vars is not None else 0) + if snum is not None: + g['snums'].append(snum) + + def median(vals): + s = sorted(vals) + return s[len(s) // 2] if s else 0 + + result = {} + for name, g in groups.items(): + lits = g['lits'] + vars_ = g['vars'] + result[name] = { + 'n': len(lits), + 'med_bins': median(g['bins']), + 'med_cls': median(g['cls']), + 'med_lits': median(lits), + 'med_vars': median(vars_), + 'med_step_num': median(g['snums']) if g['snums'] else 0, + } + return result + + +def _print_table(headers, str_rows): + widths = [max(len(h), max((len(r[i]) for r in str_rows), default=0)) + for i, h in enumerate(headers)] + sep = "+-" + "-+-".join("-" * w for w in widths) + "-+" + fmt = "| " + " | ".join(f"{{:<{w}}}" for w in widths) + " |" + print(sep) + print(fmt.format(*headers)) + print(sep) + for row in str_rows: + print(fmt.format(*row)) + print(sep) + + +def _preproc_file_label(matched_dirs): + """Return a filesystem-safe label for chart filenames, derived from the dirname.""" + if len(matched_dirs) == 1: + return re.sub(r'[^a-zA-Z0-9_-]', '_', matched_dirs[0])[-50:] + return "combined" + + +def _preproc_has_step_time(matched_dirs): + """Return True if step_time column exists and has data for matched dirs.""" + try: + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + cur.execute(f"SELECT COUNT(*) FROM preproc WHERE dirname IN ({dirs_sql}) AND step_time IS NOT NULL") + count = cur.fetchone()[0] + con.close() + return count > 0 + except Exception: + return False + + +def print_preproc_delta_table(matched_dirs, verbose=False): + """Per-step total contribution: SUM of each delta across all CNFs, sorted by lits impact.""" + if not _preproc_has_data(matched_dirs): + return + + has_time = _preproc_has_step_time(matched_dirs) + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + COUNT(*) as n_calls, + SUM(delta_irred_long_lits) as sum_d_lits, + SUM(delta_irred_long_cls) as sum_d_cls, + SUM(delta_irred_bins) as sum_d_bins, + SUM(delta_free_vars) as sum_d_vars, + ROUND(100.0 * SUM(CASE WHEN delta_irred_long_lits < 0 + THEN 1 ELSE 0 END) / COUNT(*), 0) as pct_invoc_red_lits, + ROUND(100.0 * SUM(CASE WHEN delta_free_vars < 0 + THEN 1 ELSE 0 END) / COUNT(*), 0) as pct_invoc_red_vars, + ROUND(SUM(step_time), 2) as total_step_s + FROM preproc WHERE dirname IN ({dirs_sql}) + GROUP BY name + ORDER BY sum_d_lits ASC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + title = "Preprocessing step total contribution (sum across all invocations, sorted by lits removed)" + print(f"\n{BLUE}{title}{RESET}") + print(" (n_calls = total invocations; d_lits/d_cls/d_bins/d_fvars = raw delta," + " negative = removed; %calls_lit = % of calls that reduced lits;" + " %calls_vars = % of calls that removed a var; total_s = wall time)") + + if has_time: + headers = ["step", "n_calls", "d_lits", "d_long_cls", "d_bin_cls", "d_fvars", "%calls_lit", "%calls_vars", "total_s"] + str_rows = [ + (str(r[0]), str(r[1]), str(int(r[2] or 0)), str(int(r[3] or 0)), + str(int(r[4] or 0)), str(int(r[5] or 0)), f"{int(r[6] or 0)}%", + f"{int(r[7] or 0)}%", + f"{r[8]:.1f}" if r[8] is not None else "N/A") + for r in rows + ] + else: + headers = ["step", "n_calls", "d_lits", "d_long_cls", "d_bin_cls", "d_fvars", "%calls_lit", "%calls_vars"] + str_rows = [ + (str(r[0]), str(r[1]), str(int(r[2] or 0)), str(int(r[3] or 0)), + str(int(r[4] or 0)), str(int(r[5] or 0)), f"{int(r[6] or 0)}%", + f"{int(r[7] or 0)}%") + for r in rows + ] + _print_table(headers, str_rows) + + +def print_preproc_step_efficiency(matched_dirs): + """Step efficiency: total lits/vars removed (SUM), % of invocations that do anything, + and average removal per active invocation. Sorted by total lits removed.""" + if not _preproc_has_data(matched_dirs): + return + + has_time = _preproc_has_step_time(matched_dirs) + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + COUNT(*) AS n_calls, + -SUM(delta_irred_long_lits) AS lits_rmvd, + -SUM(delta_irred_long_cls) AS cls_rmvd, + -SUM(delta_free_vars) AS fvars_rmvd, + SUM(CASE WHEN delta_irred_long_lits < 0 THEN 1 ELSE 0 END) AS calls_lit, + SUM(CASE WHEN delta_irred_long_cls < 0 THEN 1 ELSE 0 END) AS calls_cls, + SUM(CASE WHEN delta_free_vars < 0 THEN 1 ELSE 0 END) AS calls_var, + ROUND(SUM(step_time), 2) AS total_s + FROM preproc + WHERE dirname IN ({dirs_sql}) + GROUP BY name + HAVING lits_rmvd > 0 OR fvars_rmvd > 0 + ORDER BY lits_rmvd DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + title = "Preprocessing step efficiency (sum across all invocations)" + print(f"\n{BLUE}{title}{RESET}") + print(" (lits_rmvd = irred long lits removed; fvars_rmvd = free vars freed;" + " %lits_act/%cls_act/%vars_act = % of calls with nonzero removal;" + " lits/act, cls/act, vars/act = avg removed per active call)") + + if has_time: + headers = ["step", "n_calls", "lits_rmvd", "fvars_rmvd", "%lits_act", "%cls_act", "%vars_act", + "lits/act", "cls/act", "vars/act", "total_s"] + str_rows = [] + for name, n_calls, lits_rmvd, cls_rmvd, fvars_rmvd, calls_lit, calls_cls, calls_var, total_s in rows: + avg_lits = int(lits_rmvd / calls_lit) if calls_lit else 0 + avg_cls = int(cls_rmvd / calls_cls) if calls_cls else 0 + avg_vars = int(fvars_rmvd / calls_var) if calls_var else 0 + pct_lits = f"{100*calls_lit//n_calls}%" if n_calls else "0%" + pct_cls = f"{100*calls_cls//n_calls}%" if n_calls else "0%" + pct_vars = f"{100*calls_var//n_calls}%" if n_calls else "0%" + str_rows.append((name, str(n_calls), f"{lits_rmvd:,}", f"{fvars_rmvd:,}", + pct_lits, pct_cls, pct_vars, + f"{avg_lits:,}", f"{avg_cls:,}", f"{avg_vars:,}", + f"{total_s:.1f}" if total_s is not None else "N/A")) + else: + headers = ["step", "n_calls", "lits_rmvd", "fvars_rmvd", "%lits_act", "%cls_act", "%vars_act", + "lits/act", "cls/act", "vars/act"] + str_rows = [] + for name, n_calls, lits_rmvd, cls_rmvd, fvars_rmvd, calls_lit, calls_cls, calls_var, _ in rows: + avg_lits = int(lits_rmvd / calls_lit) if calls_lit else 0 + avg_cls = int(cls_rmvd / calls_cls) if calls_cls else 0 + avg_vars = int(fvars_rmvd / calls_var) if calls_var else 0 + pct_lits = f"{100*calls_lit//n_calls}%" if n_calls else "0%" + pct_cls = f"{100*calls_cls//n_calls}%" if n_calls else "0%" + pct_vars = f"{100*calls_var//n_calls}%" if n_calls else "0%" + str_rows.append((name, str(n_calls), f"{lits_rmvd:,}", f"{fvars_rmvd:,}", + pct_lits, pct_cls, pct_vars, + f"{avg_lits:,}", f"{avg_cls:,}", f"{avg_vars:,}")) + _print_table(headers, str_rows) + + +def print_preproc_time_breakdown(matched_dirs): + """Time breakdown: total seconds and % of preprocessing time per step, sorted by time.""" + if not _preproc_has_data(matched_dirs) or not _preproc_has_step_time(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + COUNT(*) AS n_calls, + ROUND(SUM(step_time), 2) AS total_s, + ROUND(AVG(step_time) * 1000, 1) AS avg_ms, + -SUM(delta_irred_long_lits) AS lits_rmvd, + -SUM(delta_irred_long_cls) AS cls_rmvd, + -SUM(delta_free_vars) AS fvars_rmvd, + SUM(IFNULL(delta_elimed_vars, 0)) AS elim_vars, + SUM(IFNULL(delta_units, 0)) AS units_found + FROM preproc + WHERE dirname IN ({dirs_sql}) AND step_time IS NOT NULL + GROUP BY name + ORDER BY total_s DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + total_t = sum(r[2] for r in rows if r[2] is not None) + + title = "Preprocessing time breakdown per step (sorted by time)" + print(f"\n{BLUE}{title}{RESET}") + print(f" Total preprocessing time: {total_t:.1f}s across {len(matched_dirs)} dirs") + print(" (n_calls = total invocations; lits_rmvd = irred long lits removed;" + " cls_rmvd = irred long clauses removed; fvars_rmvd = free vars freed;" + " elim_vars = vars eliminated by BVE;" + " units = unit clauses found; lits/s, cls/s = removed per second of step time)") + + headers = ["step", "n_calls", "total_s", "%time", "avg_ms", "lits_rmvd", "cls_rmvd", + "fvars_rmvd", "elim_vars", "units", "lits/s", "cls/s"] + str_rows = [] + for name, n_calls, total_s, avg_ms, lits_rmvd, cls_rmvd, fvars_rmvd, elim_vars, units_found in rows: + pct = f"{100.0 * total_s / total_t:.1f}%" if total_t else "0%" + lits_per_s = int(max(lits_rmvd, 0) / total_s) if total_s and total_s > 0 else 0 + cls_per_s = int(max(cls_rmvd, 0) / total_s) if total_s and total_s > 0 else 0 + str_rows.append(( + name, + str(n_calls), + f"{total_s:.1f}", + pct, + f"{avg_ms:.1f}", + f"{max(lits_rmvd, 0):,}", + f"{max(cls_rmvd, 0):,}", + f"{max(fvars_rmvd, 0):,}", + f"{elim_vars:,}" if elim_vars else "0", + f"{units_found:,}" if units_found else "0", + f"{lits_per_s:,}", + f"{cls_per_s:,}", + )) + _print_table(headers, str_rows) + + +def print_preproc_noop_waste(matched_dirs): + """No-op waste: time spent in steps that do absolutely nothing. Potential optimization target.""" + if not _preproc_has_data(matched_dirs) or not _preproc_has_step_time(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + COUNT(*) AS n_total, + SUM(CASE WHEN delta_irred_long_lits = 0 AND delta_free_vars = 0 + AND IFNULL(delta_elimed_vars, 0) = 0 + AND IFNULL(delta_replaced_vars, 0) = 0 + AND IFNULL(delta_units, 0) = 0 + THEN 1 ELSE 0 END) AS n_noop, + ROUND(SUM(CASE WHEN delta_irred_long_lits = 0 AND delta_free_vars = 0 + AND IFNULL(delta_elimed_vars, 0) = 0 + AND IFNULL(delta_replaced_vars, 0) = 0 + AND IFNULL(delta_units, 0) = 0 + THEN step_time ELSE 0 END), 2) AS wasted_s, + ROUND(SUM(step_time), 2) AS total_s + FROM preproc + WHERE dirname IN ({dirs_sql}) AND step_time IS NOT NULL + GROUP BY name + HAVING n_noop > 0 + ORDER BY wasted_s DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + total_wasted = sum(r[3] for r in rows if r[3] is not None) # r[3] = wasted_s + title = "No-op waste: time in steps that change nothing (potential optimization targets)" + print(f"\n{BLUE}{title}{RESET}") + print(f" Total wasted time: {total_wasted:.1f}s") + + headers = ["step", "n_total", "n_noop", "%noop", "wasted_s", "total_s", "%wasted_of_step"] + str_rows = [] + for name, n_total, n_noop, wasted_s, total_s in rows: + pct_noop = f"{100 * n_noop // n_total}%" if n_total else "0%" + pct_wasted = f"{100.0 * wasted_s / total_s:.0f}%" if total_s else "0%" + str_rows.append((name, str(n_total), str(n_noop), pct_noop, + f"{wasted_s:.1f}", f"{total_s:.1f}", pct_wasted)) + _print_table(headers, str_rows) + + +def print_preproc_extended_vars(matched_dirs): + """Extended var analysis: elim_vars, replaced_vars, units found per step.""" + if not _preproc_has_data(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + COUNT(*) AS n_active, + SUM(IFNULL(delta_elimed_vars, 0)) AS sum_elim, + SUM(IFNULL(delta_replaced_vars, 0)) AS sum_repl, + SUM(IFNULL(delta_units, 0)) AS sum_units, + ROUND(SUM(step_time), 2) AS total_s + FROM preproc + WHERE dirname IN ({dirs_sql}) + AND (IFNULL(delta_elimed_vars,0) != 0 + OR IFNULL(delta_replaced_vars,0) != 0 + OR IFNULL(delta_units,0) != 0) + GROUP BY name + ORDER BY (sum_elim + sum_repl + sum_units) DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + has_time = _preproc_has_step_time(matched_dirs) + title = "Extended variable analysis: elim/replaced/unit propagations per step" + print(f"\n{BLUE}{title}{RESET}") + + print(" (n_active = invocations where step changed at least one of elim/repl/units; " + "total_s = time across those active invocations only)") + if has_time: + headers = ["step", "n_active", "elim_vars", "repl_vars", "units_found", "total_s"] + str_rows = [(name, str(n), f"{e:,}", f"{r:,}", f"{u:,}", + f"{t:.1f}" if t is not None else "N/A") + for name, n, e, r, u, t in rows] + else: + headers = ["step", "n_active", "elim_vars", "repl_vars", "units_found"] + str_rows = [(name, str(n), f"{e:,}", f"{r:,}", f"{u:,}") + for name, n, e, r, u, _ in rows] + _print_table(headers, str_rows) + + +def print_step_predecessor_effectiveness(matched_dirs, step="must-scc-vrepl"): + """For a given step, show how effective it is depending on which step preceded it. + Rows are sorted by total lits removed DESC; cumulative columns show what fraction + of the step's total work is explained by the top-N predecessor contexts.""" + if not _preproc_has_data(matched_dirs): + return + # Only meaningful if prev_step column exists (newer log format) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + # Check that at least some rows have a non-'none' prev_step + cur.execute(f"SELECT COUNT(*) FROM preproc WHERE dirname IN ({dirs_sql})" + f" AND name='{step}' AND prev_step != 'none'") + if cur.fetchone()[0] == 0: + con.close() + return + + cur.execute(f""" + SELECT prev_step, + COUNT(*) AS n, + -SUM(delta_irred_long_lits) AS sum_lits, + -SUM(delta_free_vars) AS sum_vars, + ROUND(AVG(-delta_irred_long_lits), 1) AS avg_lits, + ROUND(AVG(-delta_free_vars), 1) AS avg_vars, + ROUND(100.0 * SUM(CASE WHEN delta_irred_long_lits < 0 + OR delta_free_vars < 0 THEN 1 ELSE 0 END) + / COUNT(*), 1) AS pct_active + FROM preproc + WHERE dirname IN ({dirs_sql}) AND name='{step}' + GROUP BY prev_step + ORDER BY sum_lits DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + total_lits = sum(max(r[2] or 0, 0) for r in rows) + total_vars = sum(max(r[3] or 0, 0) for r in rows) + total_n = sum(r[1] for r in rows) + + title = f"'{step}' effectiveness by predecessor step (sorted by lits removed, with cumulative)" + print(f"\n{BLUE}{title}{RESET}") + print(f" Total across all predecessors: {total_n:,} invocations, " + f"{total_lits:,} lits removed, {total_vars:,} vars removed") + + print(" (lits_rmvd/fvars_rmvd = total removed in that predecessor context;" + " avg_lit/var = per-call average; %active = % of calls with any effect;" + " cum_lits%/cum_vars% = cumulative share of step's total work)") + headers = ["prev_step", "n", "%n", "lits_rmvd", "fvars_rmvd", + "avg_lit", "avg_var", "%active", "cum_lits%", "cum_vars%"] + cum_lits = 0 + cum_vars = 0 + str_rows = [] + for prev, n, s_lits, s_vars, a_lits, a_vars, pct_act in rows: + s_lits = max(s_lits or 0, 0) + s_vars = max(s_vars or 0, 0) + cum_lits += s_lits + cum_vars += s_vars + pct_n = f"{100 * n // total_n}%" if total_n else "0%" + cum_lits_p = f"{100 * cum_lits // total_lits}%" if total_lits else "0%" + cum_vars_p = f"{100 * cum_vars // total_vars}%" if total_vars else "0%" + str_rows.append(( + str(prev), + str(n), + pct_n, + f"{s_lits:,}", + f"{s_vars:,}", + f"{a_lits:.1f}", + f"{a_vars:.1f}", + f"{pct_act:.1f}%", + cum_lits_p, + cum_vars_p, + )) + _print_table(headers, str_rows) + + +def preproc_time_chart(matched_dirs): + """Chart: time breakdown bar chart — total seconds per step, colored by effect type.""" + if not _preproc_has_data(matched_dirs) or not _preproc_has_step_time(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + ROUND(SUM(step_time), 2) AS total_s, + ROUND(SUM(CASE WHEN delta_irred_long_lits = 0 AND delta_free_vars = 0 + AND IFNULL(delta_elimed_vars,0) = 0 + AND IFNULL(delta_replaced_vars,0) = 0 + AND IFNULL(delta_units,0) = 0 + THEN step_time ELSE 0 END), 2) AS wasted_s + FROM preproc + WHERE dirname IN ({dirs_sql}) AND step_time IS NOT NULL + GROUP BY name + ORDER BY total_s DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + lbl = _preproc_file_label(matched_dirs) + dat_file = f"preproc_time_{lbl}.dat" + pdf_file = f"preproc_time_{lbl}.pdf" + png_file = f"preproc_time_{lbl}.png" + gp_file = f"preproc_time_{lbl}.gnuplot" + + n = len(rows) + with open(dat_file, "w") as f: + f.write("# step useful_s wasted_s\n") + for name, total_s, wasted_s in rows: + useful_s = (total_s or 0) - (wasted_s or 0) + f.write(f'"{name}"\t{useful_s:.2f}\t{wasted_s:.2f}\n') + + width_cm = max(22, n * 2.2) + + with open(gp_file, "w") as f: + for term, out in [ + (f'pdfcairo size {width_cm:.0f}cm,40cm background "#d0d0d0"', pdf_file), + (f'pngcairo size {int(width_cm * 40)},640 background "#d0d0d0"', png_file), + ]: + f.write(f'set terminal {term}\n') + f.write(f'set output "{out}"\n') + f.write(f'set title "Preprocessing time per step (useful vs wasted/no-op time)\\n{lbl}"\n') + f.write('set ylabel "Time (seconds)"\n') + f.write('set yrange [0:*]\n') + f.write('set grid ytics\n') + f.write('set key top right\n') + f.write('set style fill solid 0.8 border -1\n') + f.write('set style data histograms\n') + f.write('set style histogram rowstacked\n') + f.write('set boxwidth 0.6\n') + f.write('set xtics rotate by -45 right\n') + f.write('set bmargin 10\n') + f.write(f'plot "{dat_file}" using 2:xtic(1) lc rgb "steelblue" title "useful time",\\\n') + f.write(f' "{dat_file}" using 3 lc rgb "red" title "no-op waste"\n\n') + + title = "Preproc time chart: useful vs no-op time per step" + print(f"\n{BLUE}{title}{RESET}") + print(f" PDF: {pdf_file} PNG: {png_file}") + _gnuplot_run(gp_file, png_file) + + +def preproc_efficiency_chart(matched_dirs): + """Chart: lits removed per second for each step (efficiency ratio).""" + if not _preproc_has_data(matched_dirs) or not _preproc_has_step_time(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + -SUM(delta_irred_long_lits) AS sum_lits, + ROUND(SUM(step_time), 2) AS total_s + FROM preproc + WHERE dirname IN ({dirs_sql}) AND step_time IS NOT NULL AND step_time > 0 + GROUP BY name + HAVING sum_lits > 0 AND total_s > 0 + ORDER BY (sum_lits / total_s) DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + lbl = _preproc_file_label(matched_dirs) + dat_file = f"preproc_eff_{lbl}.dat" + pdf_file = f"preproc_eff_{lbl}.pdf" + png_file = f"preproc_eff_{lbl}.png" + gp_file = f"preproc_eff_{lbl}.gnuplot" + + n = len(rows) + with open(dat_file, "w") as f: + f.write("# idx lits_per_s step\n") + for i, (name, lits, total_s) in enumerate(rows): + lits_per_s = (lits or 0) / total_s if total_s else 0 + f.write(f"{i+1}\t{lits_per_s:.0f}\t{name}\n") + + xtics = ", ".join(f'"{name}" {i+1}' for i, (name, _, __) in enumerate(rows)) + width_cm = max(20, n * 2.2) + + with open(gp_file, "w") as f: + for term, out in [ + (f'pdfcairo size {width_cm:.0f}cm,40cm background "#d0d0d0"', pdf_file), + (f'pngcairo size {int(width_cm * 40)},640 background "#d0d0d0"', png_file), + ]: + f.write(f'set terminal {term}\n') + f.write(f'set output "{out}"\n') + f.write(f'set title "Preprocessing efficiency: lits removed per second of CPU time\\n{lbl}"\n') + f.write('set ylabel "Lits removed per second"\n') + f.write('set yrange [0:*]\n') + f.write(f'set xrange [0.5:{n + 0.5}]\n') + f.write('set grid ytics\n') + f.write('set key off\n') + f.write('set style fill solid 0.8 border -1\n') + f.write('set boxwidth 0.6\n') + f.write(f'set xtics ({xtics}) rotate by -45 left\n') + f.write('set bmargin 10\n') + f.write(f'plot "{dat_file}" using ($1):2 with boxes lc rgb "steelblue" notitle\n\n') + + title = "Preproc efficiency chart: lits removed per second per step" + print(f"\n{BLUE}{title}{RESET}") + print(f" PDF: {pdf_file} PNG: {png_file}") + _gnuplot_run(gp_file, png_file) + + +def preproc_time_pie_chart(matched_dirs): + """Pie chart: share of total preprocessing wall time per step. + Steps with <1% share are merged into 'other'.""" + if not _preproc_has_data(matched_dirs) or not _preproc_has_step_time(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, ROUND(SUM(step_time), 2) AS total_s + FROM preproc + WHERE dirname IN ({dirs_sql}) AND step_time IS NOT NULL + GROUP BY name + ORDER BY total_s DESC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + grand_total = sum(r[1] for r in rows if r[1]) + if grand_total <= 0: + return + + # Merge steps < 1% into "other" + kept, other_s = [], 0.0 + for name, total_s in rows: + pct = 100.0 * (total_s or 0) / grand_total + if pct >= 1.0: + kept.append((name, total_s or 0)) + else: + other_s += total_s or 0 + if other_s > 0: + kept.append(("other", other_s)) + + lbl = _preproc_file_label(matched_dirs) + dat_file = f"preproc_timepie_{lbl}.dat" + pdf_file = f"preproc_timepie_{lbl}.pdf" + png_file = f"preproc_timepie_{lbl}.png" + gp_file = f"preproc_timepie_{lbl}.gnuplot" + + with open(dat_file, "w") as f: + f.write("# step seconds pct\n") + for name, s in kept: + pct = 100.0 * s / grand_total + f.write(f'"{name}"\t{s:.2f}\t{pct:.1f}\n') + + # gnuplot pie chart via filled circles — gnuplot doesn't have native pie, + # so we use a parametric polar approach with filled boxes as wedges. + # We write an explicit set of 'object circle' / wedge commands per slice. + n_slices = len(kept) + # Assign gnuplot line colors cyclically from a palette + colors = ["#4e79a7","#f28e2b","#e15759","#76b7b2","#59a14f", + "#edc948","#b07aa1","#ff9da7","#9c755f","#bab0ac", + "#d37295","#fabfd2","#8cd17d","#b6992d","#499894"] + + with open(gp_file, "w") as f: + for term, out, sz in [ + (f'pdfcairo size 52cm,40cm background "#d0d0d0"', pdf_file, ""), + (f'pngcairo size 800,640 background "#d0d0d0"', png_file, ""), + ]: + f.write(f'set terminal {term}\n') + f.write(f'set output "{out}"\n') + f.write(f'set title "Preprocessing time breakdown (total {grand_total:.0f}s)\\n{lbl}"\n') + f.write('set size ratio -1\n') + f.write('unset border\n') + f.write('unset tics\n') + f.write('unset key\n') + f.write('set xrange [-1.5:2.5]\n') + f.write('set yrange [-1.3:1.3]\n') + # Draw wedges as filled polygons + angle = 0.0 + label_lines = [] + for i, (name, s) in enumerate(kept): + pct = s / grand_total + end_angle = angle + pct * 2 * math.pi + color = colors[i % len(colors)] + # Build polygon points for the wedge (center + arc) + steps = max(3, int(pct * 60)) + arc_pts = [] + for k in range(steps + 1): + a = angle + (end_angle - angle) * k / steps + arc_pts.append(f"{math.cos(a):.4f},{math.sin(a):.4f}") + poly = "0,0 to " + " to ".join(arc_pts) + " to 0,0" + f.write(f'set object {i+1} polygon from {poly} ' + f'fc rgb "{color}" fs solid 0.85 border lc rgb "white" lw 0.5\n') + # Label at midpoint of arc + mid = (angle + end_angle) / 2 + r_label = 0.65 + lx = r_label * math.cos(mid) + ly = r_label * math.sin(mid) + pct_str = f"{pct*100:.1f}%" + label_lines.append(f'set label "{name}\\n{pct_str}" at {lx:.3f},{ly:.3f} center font ",8"\n') + # Legend entry at right + lx2 = 1.25 + ly2 = 0.95 - i * 0.13 + f.write(f'set object {n_slices+i+1} rect from {lx2-0.05},{ly2-0.04} to {lx2+0.05},{ly2+0.04} ' + f'fc rgb "{color}" fs solid 0.85 border lc rgb "grey"\n') + label_lines.append(f'set label "{name} ({pct*100:.1f}%)" at {lx2+0.08},{ly2} left font ",9"\n') + angle = end_angle + for ll in label_lines: + f.write(ll) + f.write('plot NaN notitle\n\n') + + title = "Preproc time pie chart: share of total preprocessing time per step" + print(f"\n{BLUE}{title}{RESET}") + print(f" PDF: {pdf_file} PNG: {png_file}") + _gnuplot_run(gp_file, png_file) + + +def _display_png(png_file): + if os.path.exists(png_file): + with open(png_file, "rb") as fh: + img_b64 = base64.b64encode(fh.read()).decode() + w, h = _png_dimensions(png_file) + print(f"\033]1337;File=inline=1;width={w}px;height={h}px:{img_b64}\a") + + +def _png_dimensions(png_file): + """Read width/height from PNG header (bytes 16-24).""" + try: + with open(png_file, "rb") as fh: + fh.seek(16) + w = int.from_bytes(fh.read(4), "big") + h = int.from_bytes(fh.read(4), "big") + return w, h + except Exception: + return 800, 600 + + +def _gnuplot_run(gp_file, png_file): + """Run gnuplot and display the result inline.""" + os.system(f"gnuplot {gp_file}") + _display_png(png_file) + + +def preproc_share_chart(matched_dirs): + """Chart A: what fraction of total lits/vars removal each step accounts for. + Shows two horizontal 100%-normalised bars side by side (lits and vars). + Steps with <1% share in both are merged into 'other'.""" + if not _preproc_has_data(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + cur.execute(f""" + SELECT name, + -SUM(delta_irred_long_lits) as sum_lits, + -SUM(delta_free_vars) as sum_vars + FROM preproc + WHERE dirname IN ({dirs_sql}) + GROUP BY name + ORDER BY sum_lits DESC + """) + rows = cur.fetchall() + con.close() + + total_lits = sum(max(r[1], 0) for r in rows) + total_vars = sum(max(r[2], 0) for r in rows) + if total_lits == 0 and total_vars == 0: + return + + # Keep steps with >= 1% share in either metric; merge rest into "other" + kept, other_lits, other_vars = [], 0, 0 + for name, lits, vars_ in rows: + lits = max(lits, 0); vars_ = max(vars_, 0) + pct_l = 100.0 * lits / total_lits if total_lits else 0 + pct_v = 100.0 * vars_ / total_vars if total_vars else 0 + if pct_l >= 1.0 or pct_v >= 1.0: + kept.append((name, lits, vars_)) + else: + other_lits += lits; other_vars += vars_ + if other_lits > 0 or other_vars > 0: + kept.append(("other", other_lits, other_vars)) + + # Sort kept by lits% descending so largest bar is on the left + kept.sort(key=lambda r: r[1], reverse=True) + n = len(kept) + width_cm = max(20, n * 2.0) + + lbl = _preproc_file_label(matched_dirs) + dat_file = f"preproc_share_{lbl}.dat" + pdf_file = f"preproc_share_{lbl}.pdf" + png_file = f"preproc_share_{lbl}.png" + gp_file = f"preproc_share_{lbl}.gnuplot" + + with open(dat_file, "w") as f: + f.write("# idx pct_lits pct_vars step_name\n") + for i, (name, lits, vars_) in enumerate(kept): + pl = 100.0 * lits / total_lits if total_lits else 0 + pv = 100.0 * vars_ / total_vars if total_vars else 0 + f.write(f"{i+1}\t{pl:.1f}\t{pv:.1f}\t{name}\n") + + xtics = ", ".join(f'"{name}" {i+1}' for i, (name, _, __) in enumerate(kept)) + with open(gp_file, "w") as f: + for term, out in [ + (f'pdfcairo size {width_cm:.0f}cm,40cm background "#d0d0d0"', pdf_file), + (f'pngcairo size {int(width_cm * 40)},640 background "#d0d0d0"', png_file), + ]: + f.write(f'set terminal {term}\n') + f.write(f'set output "{out}"\n') + f.write(f'set title "Share of total preprocessing work per step\\n{lbl}"\n') + f.write('set ylabel "% of all lits / vars removed across all CNFs"\n') + f.write('set yrange [0:100]\n') + f.write(f'set xrange [0.5:{n + 0.5}]\n') + f.write('set grid ytics\n') + f.write('set key top right\n') + f.write('set style fill solid 0.7 border -1\n') + f.write('set boxwidth 0.3\n') + f.write(f'set xtics ({xtics}) rotate by -45 left\n') + f.write('set bmargin 10\n') + f.write(f'plot "{dat_file}" using ($1-0.18):2 with boxes lc rgb "steelblue" title "% lits removed",\\\n') + f.write(f' "{dat_file}" using ($1+0.18):3 with boxes lc rgb "dark-green" title "% vars removed"\n\n') + + title = "Preproc share chart: % of total lits/vars removed per step" + print(f"\n{BLUE}{title}{RESET}") + print(f" PDF: {pdf_file} PNG: {png_file}") + _gnuplot_run(gp_file, png_file) + + + +def preproc_cumulative_chart(matched_dirs): + """Chart C: cumulative SUM of lits/vars removed across pipeline stages (ordered by avg position). + Each step is one horizontal tick; lines show how the running total grows.""" + if not _preproc_has_data(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + cur = con.cursor() + + # Per-step: sum removed and average pipeline position (avg step_num) + cur.execute(f""" + SELECT name, + -SUM(delta_irred_long_lits) as sum_lits, + -SUM(delta_irred_long_cls) as sum_cls, + -SUM(delta_irred_bins) as sum_bins, + -SUM(delta_free_vars) as sum_vars, + AVG(step_num) as avg_pos + FROM preproc + WHERE dirname IN ({dirs_sql}) + GROUP BY name + ORDER BY avg_pos ASC + """) + rows = cur.fetchall() + con.close() + + if not rows: + return + + lbl = _preproc_file_label(matched_dirs) + dat_file = f"preproc_cumul_{lbl}.dat" + pdf_file = f"preproc_cumul_{lbl}.pdf" + png_file = f"preproc_cumul_{lbl}.png" + gp_file = f"preproc_cumul_{lbl}.gnuplot" + + # Normalise to millions for readability + cum_lits = cum_cls = cum_vars = 0.0 + with open(dat_file, "w") as f: + f.write("# i cum_lits_M cum_cls_M cum_vars_M step_name\n") + f.write("0\t0.0\t0.0\t0.0\tstart\n") + for i, (name, s_lits, s_long_cls, s_bins, s_vars, _pos) in enumerate(rows): + cum_lits += max(s_lits, 0) / 1e6 + cum_cls += max((s_long_cls or 0) + (s_bins or 0), 0) / 1e6 + cum_vars += max(s_vars, 0) / 1e6 + f.write(f"{i+1}\t{cum_lits:.3f}\t{cum_cls:.3f}\t{cum_vars:.3f}\t{name}\n") + + n = len(rows) + height_cm = max(16, n * 1.1) + + ytics_parts = ['"start" 0'] + [f'"{name}" {i+1}' for i, (name, *_) in enumerate(rows)] + ytics_str = ", ".join(ytics_parts) + + with open(gp_file, "w") as f: + for term, out in [ + (f'pdfcairo size 52cm,{height_cm:.0f}cm background "#d0d0d0"', pdf_file), + (f'pngcairo size 1200,{int(height_cm * 50)} background "#d0d0d0"', png_file), + ]: + f.write(f'set terminal {term}\n') + f.write(f'set output "{out}"\n') + f.write(f'set title "Cumulative preprocessing effect across pipeline (total across all CNFs)\\n{lbl}"\n') + f.write('set xlabel "Cumulative total removed (millions)"\n') + f.write('set ylabel ""\n') + f.write(f'set yrange [-0.5:{n + 0.5}]\n') + f.write('set xrange [0:*]\n') + f.write('set grid xtics\n') + f.write('set key top left\n') + f.write(f'set ytics ({ytics_str})\n') + f.write(f'plot "{dat_file}" using 2:1 with linespoints lc rgb "steelblue" lw 2 pt 7 ps 1 title "lits removed",\\\n') + f.write(f' "{dat_file}" using 3:1 with linespoints lc rgb "dark-orange" lw 2 pt 5 ps 1 title "cls removed (bin+long)",\\\n') + f.write(f' "{dat_file}" using 4:1 with linespoints lc rgb "dark-green" lw 2 pt 9 ps 1 title "vars removed"\n\n') + + title = f"Preproc cumulative chart (all steps, n={n}, ordered by pipeline position)" + print(f"\n{BLUE}{title}{RESET}") + print(f" PDF: {pdf_file} PNG: {png_file}") + _gnuplot_run(gp_file, png_file) + + +def print_preproc_per_step_detail(matched_dirs, verbose=False): + """Top steps by total absolute lits impact (all occurrences), sorted by median lits reduction.""" + if not _preproc_has_data(matched_dirs): + return + + dirs_sql = ",".join("'" + d + "'" for d in matched_dirs) + con = sqlite3.connect("data.sqlite3") + stats = _preproc_step_stats(con, dirs_sql) + + # Also need n_active_any (active on any metric) — fetch from DB + cur = con.cursor() + cur.execute( + f"SELECT name," + f" ROUND(100.0 * SUM(CASE WHEN delta_irred_long_lits != 0 OR delta_irred_bins != 0" + f" OR delta_free_vars != 0 THEN 1 ELSE 0 END) / COUNT(*), 1)" + f" FROM preproc WHERE dirname IN ({dirs_sql}) GROUP BY name" + ) + pct_active = {row[0]: row[1] for row in cur.fetchall()} + con.close() + + if not stats: + return + + # Sort by median lits reduction (most reduction first) + ordered = sorted(stats.items(), key=lambda kv: kv[1]['med_lits'])[:10] + + title = "Top steps by median lits reduction (all occurrences)" + print(f"\n{BLUE}{title}{RESET}") + + headers = ["step", "n", "med_d_lits", "med_d_bins", "med_d_vars", "%active"] + str_rows = [ + (name, + str(s['n']), + str(s['med_lits']), + str(s['med_bins']), + str(s['med_vars']), + str(pct_active.get(name, 0))) + for name, s in ordered + ] + _print_table(headers, str_rows) + + def print_errored_files(matched_dirs): if not matched_dirs: return @@ -628,8 +1598,8 @@ def gp_str(s): with open(gp_file, "w") as f: for term, out in [ - (f'pdfcairo size 15cm,15cm', pdf_file), - (f'pngcairo size 600,600', png_file), + (f'pdfcairo size 15cm,15cm background "#d0d0d0"', pdf_file), + (f'pngcairo size 600,600 background "#d0d0d0"', png_file), ]: f.write(f'set terminal {term}\n') f.write(f'set output "{out}"\n') @@ -650,12 +1620,7 @@ def gp_str(s): print(f" PDF: {pdf_file} PNG: {png_file}") os.system(f"gnuplot {gp_file}") - - # Display PNG inline (wezterm / iTerm2 inline-image protocol) - if os.path.exists(png_file): - with open(png_file, "rb") as fh: - img_b64 = base64.b64encode(fh.read()).decode() - print(f"\033]1337;File=inline=1;width=600px;height=600px:{img_b64}\a") + _display_png(png_file) con.close() @@ -684,8 +1649,8 @@ def plot_lines(): with open(gnuplotfn, "w") as f: for term, out in [ - ('pdfcairo size 15cm,15cm', pdf_file), - ('pngcairo size 600,600', png_file), + ('pdfcairo size 15cm,15cm background "#d0d0d0"', pdf_file), + ('pngcairo size 600,600 background "#d0d0d0"', png_file), ]: f.write(f'set terminal {term}\n') f.write(f'set output "{out}"\n') @@ -862,20 +1827,26 @@ def create_notebook(dirs): # "out-ganak-mccomp2324-1140184-", # no dodgy cache, check SBVA # "out-ganak-mccomp2324-1146702-", # sbva checks, oracle mult checks # "out-ganak-mccomp2324-1152658-1", # fixing bug, testing pura and oraclemult - "out-ganak-mccomp2324-1193808-0", # fixing counting bug, adding new cube extend system, fixing cube counting (and maybe effectiveness) + # "out-ganak-mccomp2324-1193808-0", # fixing counting bug, adding new cube extend system, fixing cube counting (and maybe effectiveness) # "out-ganak-mccomp2324-1229753-0", # lots of bug fixes, beauty changes with Claude, etc # "out-ganak-mccomp2324-1231407-0", # the same as above but without (most) of the Claude improvements # "out-ganak-mccomp2324-1247484-0", ## new shrinking, fixing arjun SLOW_DEBUG, improved propagation idea from CaDiCaL, improved 3-tier clause database. Also undoing only 2 particular Claude changes (propagation and --prob 0 non-zeroing of data) # "out-ganak-mccomp2324-1250247-", # CMS cleanup, oracle improvements, fix parsing issue (?) of CNF header -- weird + one of them is a binary with: reason-side bumping and lbd update, evsids - "out-ganak-mccomp2324-1256426-0", # fixing oracle, mostly, and also header parsing more lax - "out-ganak-mccomp2324-1261017-0", # Different order in Arjun -] -only_dirs = [ - "mei-march-2026-1239767-1", # gpmc - # "mei-march-2026-1239767-0", # ganak old - # "mei-march-2026-1269673-0", # ganak release, but WRONG SED - "mei-march-2026-1274973-0", # ganak release, new SED + # "out-ganak-mccomp2324-1256426-0", # fixing oracle, mostly, and also header parsing more lax + # "out-ganak-mccomp2324-1261017-0", # Different order in Arjun + # "out-ganak-mccomp2324-1279568-0", # release + # "out-ganak-mccomp2324-1281478-0", # more data + "out-ganak-mccomp2324-1282000-0", # new preproc + # "out-ganak-mccomp2324-1282412-0", # new preproc v2 + # "out-ganak-mccomp2324-1286085-", # 4 new puura orders + # "out-ganak-mccomp2324-1286556-", # 4 new puura orders, and wl-based cache compact ] +# only_dirs = [ +# "mei-march-2026-1239767-1", # gpmc +# # "mei-march-2026-1239767-0", # ganak old +# # "mei-march-2026-1269673-0", # ganak release, but WRONG SED +# "mei-march-2026-1274973-0", # ganak release, new SED +# ] # not_calls = ["--nvarscutoffcache 20", "--nvarscutoffcache 3"] # not_calls = ["--satsolver 0"] @@ -903,6 +1874,8 @@ def main(): parser.add_argument("--fname", nargs="+", metavar="PATTERN", default=[], help="Filter by fname pattern(s), e.g. --fname '%%track1%%' '%%track3%%'") + parser.add_argument("--nopreproc", action="store_true", + help="Skip all preprocessing tables and graphs (preproc table)") args = parser.parse_args() if args.fname: @@ -939,7 +1912,25 @@ def main(): print("Printing median tables...") print_median_tables(table_todo, fname_like, args.verbose) print_instance_stats_table(table_todo, fname_like, args.verbose) - print_preproc_diffs(table_todo, fname_like, args.verbose) + if not args.nopreproc: + print_preproc_diffs(table_todo, fname_like, args.verbose) + + # Preprocessing step analysis — one block per directory + for d in matched_dirs: + one = [d] + print(f"\n{GREEN}{'='*70}{RESET}") + print(f"{GREEN} Preprocessing analysis --- {d}{RESET}") + print(f"{GREEN}{'='*70}{RESET}") + print_preproc_delta_table(one, args.verbose) + print_preproc_step_efficiency(one) + + print_preproc_time_breakdown(one) + print_preproc_noop_waste(one) + print_preproc_extended_vars(one) + print_step_predecessor_effectiveness(one, step="must-scc-vrepl") + preproc_cumulative_chart(one) + preproc_time_pie_chart(one) + unique_dirs = list(dict.fromkeys(d for d, _ in table_todo)) for dir1, dir2 in itertools.combinations(unique_dirs, 2): print_two_dir_diffs(dir1, dir2, fname_like, args.verbose) @@ -959,11 +1950,7 @@ def main(): console_title = f"CDF: instances counted vs. solve time" print(f"\n{BLUE}{console_title}{RESET}") print(f" PDF: {pdf_file} PNG: {png_file}") - - if os.path.exists(png_file): - with open(png_file, "rb") as fh: - img_b64 = base64.b64encode(fh.read()).decode() - print(f"\033]1337;File=inline=1;width=600px;height=600px:{img_b64}\a") + _display_png(png_file) if __name__ == "__main__": diff --git a/scripts/data/ganak.sqlite b/scripts/data/ganak.sqlite index 28b4ecd7..6c639f18 100644 --- a/scripts/data/ganak.sqlite +++ b/scripts/data/ganak.sqlite @@ -86,3 +86,66 @@ UPDATE data SET irred_bin = NULL WHERE irred_bin = ''; UPDATE data SET irred_long = NULL WHERE irred_long = ''; UPDATE data SET irred_tri = NULL WHERE irred_tri = ''; UPDATE data SET irred_cls = NULL WHERE irred_cls = ''; + +DROP TABLE IF EXISTS preproc; +CREATE TABLE preproc ( + dirname STRING NOT NULL, + fname STRING NOT NULL, + step_num INT NOT NULL, + occurrence INT NOT NULL, + name STRING NOT NULL, + prev_step STRING NOT NULL DEFAULT 'none', + before_irred_bins INT, + before_irred_long_cls INT, + before_irred_long_lits INT, + before_free_vars INT, + after_irred_bins INT, + after_irred_long_cls INT, + after_irred_long_lits INT, + after_free_vars INT, + delta_irred_bins INT, + delta_irred_long_cls INT, + delta_irred_long_lits INT, + delta_free_vars INT, + -- Extended fields (present only in newer log format) + before_elimed_vars INT, + before_replaced_vars INT, + before_units INT, + before_mem_MB INT, + after_elimed_vars INT, + after_replaced_vars INT, + after_units INT, + after_mem_MB INT, + delta_elimed_vars INT, + delta_replaced_vars INT, + delta_units INT, + delta_mem_MB INT, + step_time FLOAT +); +. mode csv +. import -skip 1 preproc.csv preproc +UPDATE preproc SET before_irred_bins = NULL WHERE before_irred_bins = ''; +UPDATE preproc SET before_irred_long_cls = NULL WHERE before_irred_long_cls = ''; +UPDATE preproc SET before_irred_long_lits = NULL WHERE before_irred_long_lits = ''; +UPDATE preproc SET before_free_vars = NULL WHERE before_free_vars = ''; +UPDATE preproc SET after_irred_bins = NULL WHERE after_irred_bins = ''; +UPDATE preproc SET after_irred_long_cls = NULL WHERE after_irred_long_cls = ''; +UPDATE preproc SET after_irred_long_lits = NULL WHERE after_irred_long_lits = ''; +UPDATE preproc SET after_free_vars = NULL WHERE after_free_vars = ''; +UPDATE preproc SET delta_irred_bins = NULL WHERE delta_irred_bins = ''; +UPDATE preproc SET delta_irred_long_cls = NULL WHERE delta_irred_long_cls = ''; +UPDATE preproc SET delta_irred_long_lits = NULL WHERE delta_irred_long_lits = ''; +UPDATE preproc SET delta_free_vars = NULL WHERE delta_free_vars = ''; +UPDATE preproc SET before_elimed_vars = NULL WHERE before_elimed_vars = ''; +UPDATE preproc SET before_replaced_vars = NULL WHERE before_replaced_vars = ''; +UPDATE preproc SET before_units = NULL WHERE before_units = ''; +UPDATE preproc SET before_mem_MB = NULL WHERE before_mem_MB = ''; +UPDATE preproc SET after_elimed_vars = NULL WHERE after_elimed_vars = ''; +UPDATE preproc SET after_replaced_vars = NULL WHERE after_replaced_vars = ''; +UPDATE preproc SET after_units = NULL WHERE after_units = ''; +UPDATE preproc SET after_mem_MB = NULL WHERE after_mem_MB = ''; +UPDATE preproc SET delta_elimed_vars = NULL WHERE delta_elimed_vars = ''; +UPDATE preproc SET delta_replaced_vars = NULL WHERE delta_replaced_vars = ''; +UPDATE preproc SET delta_units = NULL WHERE delta_units = ''; +UPDATE preproc SET delta_mem_MB = NULL WHERE delta_mem_MB = ''; +UPDATE preproc SET step_time = NULL WHERE step_time = ''; diff --git a/scripts/data/get_data_ganak.py b/scripts/data/get_data_ganak.py index 934bfb12..f6265dee 100755 --- a/scripts/data/get_data_ganak.py +++ b/scripts/data/get_data_ganak.py @@ -5,6 +5,7 @@ import decimal import glob import os +import re import string import subprocess import sys @@ -179,7 +180,7 @@ def parse_ganak_output(fname): # Unconditional status checks if "ERROR" in line or "assertion fail" in line.lower(): result["errored"] = 1 - if "bad_alloc" in line: + if "Cannot allocate memory" in line or"bad_alloc" in line: result["mem_out"] = 1 if line.startswith("s SATISFIABLE") or line.startswith("s UNSATISFIABLE"): result["not_solved"] = False @@ -284,6 +285,88 @@ def parse_ganak_output(fname): return result +_SIMP_STATS_RE = re.compile( + r'c o \[simp-stats\] (BEFORE|AFTER) (\S+) ' + r'irred_bins (\d+) irred_long_cls (\d+) irred_long_lits (\d+) free_vars (\d+)' + r'(?:\s+elimed_vars (\d+) replaced_vars (\d+) units (\d+) mem_MB (\d+) T:\s*([\d.]+))?' +) + + +def parse_simp_stats(fname): + """Parse [simp-stats] BEFORE/AFTER pairs from a ganak output file. + Returns a list of dicts, one per matched BEFORE/AFTER pair. + + New fields (from extended log format): + elimed_vars, replaced_vars, units, mem_MB, step_time, prev_step + """ + # pending[name] -> list of full parsed tuples waiting for matching AFTER + pending = {} + step_counts = {} # name -> number of completed pairs so far + records = [] + last_completed_step = "none" # tracks prev_step + + with open(fname, "r") as f: + for line in f: + line = line.replace("[0m", "").replace("\x1b", "").strip() + m = _SIMP_STATS_RE.search(line) + if not m: + continue + kind = m.group(1) + name = m.group(2) + # Core fields (always present) + core = (int(m.group(3)), int(m.group(4)), int(m.group(5)), int(m.group(6))) + # Extended fields (present in newer logs) + ext = None + if m.group(7) is not None: + ext = (int(m.group(7)), int(m.group(8)), int(m.group(9)), + int(m.group(10)), float(m.group(11))) + + if kind == "BEFORE": + pending.setdefault(name, []).append((core, ext)) + elif kind == "AFTER": + if pending.get(name): + before_core, before_ext = pending[name].pop(0) + occurrence = step_counts.get(name, 0) + step_counts[name] = occurrence + 1 + step_num = len(records) + rec = { + "step_num": step_num, + "occurrence": occurrence, + "name": name, + "prev_step": last_completed_step, + "before_irred_bins": before_core[0], + "before_irred_long_cls": before_core[1], + "before_irred_long_lits": before_core[2], + "before_free_vars": before_core[3], + "after_irred_bins": core[0], + "after_irred_long_cls": core[1], + "after_irred_long_lits": core[2], + "after_free_vars": core[3], + "delta_irred_bins": core[0] - before_core[0], + "delta_irred_long_cls": core[1] - before_core[1], + "delta_irred_long_lits": core[2] - before_core[2], + "delta_free_vars": core[3] - before_core[3], + } + # Extended fields + if ext is not None and before_ext is not None: + rec["before_elimed_vars"] = before_ext[0] + rec["before_replaced_vars"] = before_ext[1] + rec["before_units"] = before_ext[2] + rec["before_mem_MB"] = before_ext[3] + rec["after_elimed_vars"] = ext[0] + rec["after_replaced_vars"] = ext[1] + rec["after_units"] = ext[2] + rec["after_mem_MB"] = ext[3] + rec["delta_elimed_vars"] = ext[0] - before_ext[0] + rec["delta_replaced_vars"] = ext[1] - before_ext[1] + rec["delta_units"] = ext[2] - before_ext[2] + rec["delta_mem_MB"] = ext[3] - before_ext[3] + rec["step_time"] = ext[4] - before_ext[4] + records.append(rec) + last_completed_step = name + return records + + def main(): parser = argparse.ArgumentParser(description="Parse ganak output files into CSV") parser.add_argument("--files", default="out-ganak*/*cnf*", @@ -324,6 +407,9 @@ def main(): if f.endswith(".out_ganak") or f.endswith(".out"): files[base]["solver"] = "ganak" files[base].update(parse_ganak_output(f)) + simp = parse_simp_stats(f) + if simp: + files[base]["simp_stats"] = simp if ".out_approxmc" in f: files[base]["solver"] = "approxmc" files[base]["solverver"] = approxmc_version(f) @@ -442,6 +528,63 @@ def g(d, key): g(f, "irred_cls"), ]) + preproc_cols = [ + "dirname", "fname", "step_num", "occurrence", "name", "prev_step", + "before_irred_bins", "before_irred_long_cls", "before_irred_long_lits", "before_free_vars", + "after_irred_bins", "after_irred_long_cls", "after_irred_long_lits", "after_free_vars", + "delta_irred_bins", "delta_irred_long_cls", "delta_irred_long_lits", "delta_free_vars", + # Extended fields (newer log format) + "before_elimed_vars", "before_replaced_vars", "before_units", "before_mem_MB", + "after_elimed_vars", "after_replaced_vars", "after_units", "after_mem_MB", + "delta_elimed_vars", "delta_replaced_vars", "delta_units", "delta_mem_MB", + "step_time", + ] + + def g_rec(rec, key): + v = rec.get(key) + return "" if v is None else v + + with open("preproc.csv", "w", newline="") as out: + writer = csv.writer(out) + writer.writerow(preproc_cols) + for _, f in files.items(): + if "simp_stats" not in f: + continue + for rec in f["simp_stats"]: + writer.writerow([ + f["dirname"], + f["fname"], + rec["step_num"], + rec["occurrence"], + rec["name"], + rec.get("prev_step", "none"), + rec["before_irred_bins"], + rec["before_irred_long_cls"], + rec["before_irred_long_lits"], + rec["before_free_vars"], + rec["after_irred_bins"], + rec["after_irred_long_cls"], + rec["after_irred_long_lits"], + rec["after_free_vars"], + rec["delta_irred_bins"], + rec["delta_irred_long_cls"], + rec["delta_irred_long_lits"], + rec["delta_free_vars"], + g_rec(rec, "before_elimed_vars"), + g_rec(rec, "before_replaced_vars"), + g_rec(rec, "before_units"), + g_rec(rec, "before_mem_MB"), + g_rec(rec, "after_elimed_vars"), + g_rec(rec, "after_replaced_vars"), + g_rec(rec, "after_units"), + g_rec(rec, "after_mem_MB"), + g_rec(rec, "delta_elimed_vars"), + g_rec(rec, "delta_replaced_vars"), + g_rec(rec, "delta_units"), + g_rec(rec, "delta_mem_MB"), + g_rec(rec, "step_time"), + ]) + with open("ganak.sqlite") as sql_f: subprocess.run(["sqlite3", "data.sqlite3"], stdin=sql_f, check=True) diff --git a/src/main.cpp b/src/main.cpp index c67ab3b8..5af1278a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -552,6 +552,12 @@ void run_weighted_counter(Ganak& counter, const ArjunNS::SimplifiedCNF& cnf, con print_log(od->val); mpfr_printf("c s exact quadruple float %.8Re\n", od->val); } + } else if (mode == 3) { + cout << "c s exact poly " << *cnt << endl; + } else if (mode == 4) { + cout << "c s exact parity " << *cnt << endl; + } else if (mode == 5) { + cout << "c s exact modprime " << *cnt << endl; } if (counter.get_is_approximate()) { cout << "c s pac guarantees epsilon: " << conf.appmc_epsilon << " delta: " << conf.delta << endl; diff --git a/src/mcomplex.hpp b/src/mcomplex.hpp index 00bc9b07..701b8e70 100644 --- a/src/mcomplex.hpp +++ b/src/mcomplex.hpp @@ -27,30 +27,32 @@ THE SOFTWARE. #include #include +// Only accepted format: a+bi or a-bi (spaces around '+'/'-' are optional). +// Both the real part and the imaginary part must always be present. +// Examples: "1/2+4i", "1/2 + 4i", "1/2-4i", "-1/2+4i" +// The weight string must end with the DIMACS terminator " 0". inline bool parse_complex_mpq(const std::string& str, ArjunNS::FMpq& real_out, ArjunNS::FMpq& imag_out, const uint32_t line_no) { uint32_t at = 0; if (!real_out.parse_mpq(str, at, line_no)) return false; ArjunNS::FMpq::skip_whitespace(str, at); - if (at < str.size()) { - if (str[at] == '+' || str[at] == '-') { - bool pos = (str[at] == '+'); - at++; - if (!imag_out.parse_mpq(str, at, line_no)) return false; - ArjunNS::FMpq::skip_whitespace(str, at); - if (at < str.size() && str[at] == 'i') { - at++; - } else { - std::cerr << "ERROR: Expected 'i' at position " << at << " in line " << line_no << std::endl; - return false; - } - if (!pos) imag_out *= ArjunNS::FMpq(-1); - } else { - // Space-separated format: "real imag" (e.g. "2 0") - if (!imag_out.parse_mpq(str, at, line_no)) return false; - } + if (at >= str.size() || (str[at] != '+' && str[at] != '-')) { + std::cerr << "ERROR: complex weight requires both real and imaginary parts (a+bi or a-bi)," + << " missing '+' or '-' at line " << line_no << std::endl; + return false; + } + bool pos = (str[at] == '+'); + at++; + ArjunNS::FMpq::skip_whitespace(str, at); + if (!imag_out.parse_mpq(str, at, line_no)) return false; + ArjunNS::FMpq::skip_whitespace(str, at); + if (at >= str.size() || str[at] != 'i') { + std::cerr << "ERROR: Expected 'i' after imaginary part at line " << line_no << std::endl; + return false; } - return true; + at++; + if (!pos) imag_out *= ArjunNS::FMpq(-1); + return ArjunNS::FMpq::check_end_of_weight(str, at, line_no); } class FComplex final : public CMSat::Field { diff --git a/tests/cnf-files/mode0.cnf b/tests/cnf-files/mode0.cnf new file mode 100644 index 00000000..8eedd3c6 --- /dev/null +++ b/tests/cnf-files/mode0.cnf @@ -0,0 +1,5 @@ +c c RUN: %solver --mode 0 %s | %OutputCheck %s +p cnf 2 1 +1 2 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact arb int 3$ diff --git a/tests/cnf-files/mode1.cnf b/tests/cnf-files/mode1.cnf new file mode 100644 index 00000000..c1f7804f --- /dev/null +++ b/tests/cnf-files/mode1.cnf @@ -0,0 +1,10 @@ +c c RUN: %solver --mode 1 %s | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 1/3 0 +c p weight -1 2/3 0 +c p weight 2 1/4 0 +c p weight -2 3/4 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact arb frac 1/2$ diff --git a/tests/cnf-files/mode2_invalid_pureim.cnf b/tests/cnf-files/mode2_invalid_pureim.cnf new file mode 100644 index 00000000..50fe7916 --- /dev/null +++ b/tests/cnf-files/mode2_invalid_pureim.cnf @@ -0,0 +1,7 @@ +c c RUN: not %solver --mode 2 %s 2>&1 | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 4i 0 +c p weight -1 3i 0 +c c CHECK: ERROR.*complex weight requires diff --git a/tests/cnf-files/mode2_invalid_purereal.cnf b/tests/cnf-files/mode2_invalid_purereal.cnf new file mode 100644 index 00000000..6aff7ac7 --- /dev/null +++ b/tests/cnf-files/mode2_invalid_purereal.cnf @@ -0,0 +1,7 @@ +c c RUN: not %solver --mode 2 %s 2>&1 | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 5 0 +c p weight -1 3 0 +c c CHECK: ERROR.*complex weight requires diff --git a/tests/cnf-files/mode2_invalid_spacesepar.cnf b/tests/cnf-files/mode2_invalid_spacesepar.cnf new file mode 100644 index 00000000..e5d80680 --- /dev/null +++ b/tests/cnf-files/mode2_invalid_spacesepar.cnf @@ -0,0 +1,7 @@ +c c RUN: not %solver --mode 2 %s 2>&1 | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 4 3 0 +c p weight -1 2 1 0 +c c CHECK: ERROR.*complex weight requires diff --git a/tests/cnf-files/mode2_valid.cnf b/tests/cnf-files/mode2_valid.cnf new file mode 100644 index 00000000..1a15c033 --- /dev/null +++ b/tests/cnf-files/mode2_valid.cnf @@ -0,0 +1,10 @@ +c c RUN: %solver --mode 2 %s | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 1/2+1i 0 +c p weight -1 1/2-1i 0 +c p weight 2 1+0i 0 +c p weight -2 1+0i 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact arb frac 3/2 \+ 1i$ diff --git a/tests/cnf-files/mode3.cnf b/tests/cnf-files/mode3.cnf new file mode 100644 index 00000000..34db215a --- /dev/null +++ b/tests/cnf-files/mode3.cnf @@ -0,0 +1,10 @@ +c c RUN: %solver --mode 3 --npolyvars 2 %s | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 x0+1 0 +c p weight -1 x0 0 +c p weight 2 x1+1 0 +c p weight -2 x1 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact poly 3\*x0\*x1 \+ 2\*x0 \+ 2\*x1 \+ 1$ diff --git a/tests/cnf-files/mode4.cnf b/tests/cnf-files/mode4.cnf new file mode 100644 index 00000000..c598ce4e --- /dev/null +++ b/tests/cnf-files/mode4.cnf @@ -0,0 +1,5 @@ +c c RUN: %solver --mode 4 %s | %OutputCheck %s +p cnf 2 1 +1 2 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact parity 1$ diff --git a/tests/cnf-files/mode5.cnf b/tests/cnf-files/mode5.cnf new file mode 100644 index 00000000..af32b371 --- /dev/null +++ b/tests/cnf-files/mode5.cnf @@ -0,0 +1,10 @@ +c c RUN: %solver --mode 5 --prime 7 %s | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 3 0 +c p weight -1 2 0 +c p weight 2 4 0 +c p weight -2 1 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact modprime 2 mod 7$ diff --git a/tests/cnf-files/mode6.cnf b/tests/cnf-files/mode6.cnf new file mode 100644 index 00000000..ad4287bd --- /dev/null +++ b/tests/cnf-files/mode6.cnf @@ -0,0 +1,10 @@ +c c RUN: %solver --mode 6 %s | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 1/2+1i 0 +c p weight -1 1/2-1i 0 +c p weight 2 1+0i 0 +c p weight -2 1+0i 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact quadruple float 1\.50000000e\+00 \+ 1\.00000000e\+00i$ diff --git a/tests/cnf-files/mode7.cnf b/tests/cnf-files/mode7.cnf new file mode 100644 index 00000000..fc63da3a --- /dev/null +++ b/tests/cnf-files/mode7.cnf @@ -0,0 +1,10 @@ +c c RUN: %solver --mode 7 %s | %OutputCheck %s +p cnf 2 1 +c t wmc +1 2 0 +c p weight 1 1/3 0 +c p weight -1 2/3 0 +c p weight 2 1/4 0 +c p weight -2 3/4 0 +c c CHECK: ^s SATISFIABLE$ +c c CHECK: ^c s exact quadruple float 5\.00000000e\-01$