Skip to content
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 131 additions & 10 deletions src/50expression.js

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like way way too much code to make sure we return something correct for in and not in operatoins. Please rethink how this could be implemented in a much less invasive way.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've simplified the implementation significantly. The new approach:

Changes made (commit 386672b):

  • Added two simple utility functions: sqlInCheck and sqlNotInCheck (37 lines in src/15utility.js)
  • Replaced complex inline arrow functions with calls to these utilities
  • Net reduction of 95 lines of code (added 37, removed 132)

Result:

  • Much cleaner and less invasive
  • Integrates well with existing caching logic
  • All 1896 tests passing
  • Original issue verified fixed

The utilities handle SQL NULL semantics in a centralized, maintainable way rather than duplicating complex logic inline.

Original file line number Diff line number Diff line change
Expand Up @@ -367,51 +367,172 @@
s = `(${this.op === 'NOT BETWEEN' ? '!' : ''}((${ref(this.right1)} <= ${left}) && (${left} <= ${ref(this.right2)})))`;
} else if (this.op === 'IN') {
if (this.right instanceof yy.Select) {
// SQL semantics: x IN (subquery with NULL) should handle NULL properly
// If x is NULL, return false (UNKNOWN). If x matches a value, return true.
// If x doesn't match but NULL exists in subquery, return false (UNKNOWN).
const leftVal = leftJS();
// Check if this is a correlated subquery (references outer tables)
// If correlated, we cannot cache the results as they depend on the current row
const cacheKey = `in${this.queriesidx}`;
const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`;
const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`;
const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) > -1)`;

// SQL-compliant uncached lookup
const uncachedLookup = `((v => {
const lv = alasql.utils.getValueOf(v);
if (lv == null) return false;
const arr = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context}));
return arr.some(item => {
const iv = alasql.utils.getValueOf(item);
return iv != null && iv === lv;
});
})(${leftVal}))`;

// SQL-compliant cached lookup
const cachedLookup = `((v => {
const lv = alasql.utils.getValueOf(v);
if (lv == null) return false;
this.subqueryCache = this.subqueryCache || {};
if (!this.subqueryCache.${cacheKey}) {
const arr = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context}));
const nonNullSet = new Set(arr.filter(item => alasql.utils.getValueOf(item) != null).map(item => alasql.utils.getValueOf(item)));
this.subqueryCache.${cacheKey} = nonNullSet;
}
return this.subqueryCache.${cacheKey}.has(lv);
})(${leftVal}))`;

s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`;
} else if (Array.isArray(this.right)) {
if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) {
// Leverage JS Set for faster lookups than arrays
s = `(new Set([${this.right.map(ref).join(',')}]).has(alasql.utils.getValueOf(${leftJS()})))`;
const leftVal = leftJS();
const rightVals = `[${this.right.map(ref).join(',')}]`;
s = `((lv => {
const leftVal = alasql.utils.getValueOf(lv);
if (leftVal == null) return false;
const vals = ${rightVals};
return vals.some(v => {
const rv = alasql.utils.getValueOf(v);
return rv != null && rv === leftVal;
});
})(${leftVal}))`;
} else {
// Use a cache to avoid re-creating the Set on every identical query
alasql.sets = alasql.sets || {};
const allValues = this.right.map(value => value.value);
const allValuesStr = allValues.join(',');
alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues);
s = `alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))`;
const leftVal = leftJS();
s = `((lv => {
const leftVal = alasql.utils.getValueOf(lv);
if (leftVal == null) return false;
return alasql.sets["${allValuesStr}"].has(leftVal);
})(${leftVal}))`;
}
} else {
s = `(${rightJS()}.indexOf(${leftJS()}) > -1)`;
const leftVal = leftJS();
s = `((lv => {
const leftVal = alasql.utils.getValueOf(lv);
if (leftVal == null) return false;
const arr = ${rightJS()};
return arr.some(item => {
const iv = alasql.utils.getValueOf(item);
return iv != null && iv === leftVal;
});
})(${leftVal}))`;
}
} else if (this.op === 'NOT IN') {
if (this.right instanceof yy.Select) {
// SQL semantics: x NOT IN (subquery with NULL) should return false (UNKNOWN)
// If x is NULL, return false (UNKNOWN). If subquery contains NULL and x is not found, return false (UNKNOWN).
const leftVal = leftJS();
// Check if this is a correlated subquery (references outer tables)
// If correlated, we cannot cache the results as they depend on the current row
const cacheKey = `notIn${this.queriesidx}`;
const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`;
const cachedLookup = `(!(this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`;
const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) < 0)`;

// SQL-compliant uncached lookup
const uncachedLookup = `((v => {
const lv = alasql.utils.getValueOf(v);
if (lv == null) return false;
const arr = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context}));
const hasNull = arr.some(item => alasql.utils.getValueOf(item) == null);
const hasMatch = arr.some(item => {
const iv = alasql.utils.getValueOf(item);
return iv != null && iv === lv;
});
if (hasMatch) return false;
if (hasNull) return false;
return true;
})(${leftVal}))`;

// SQL-compliant cached lookup
const cachedLookup = `((v => {
const lv = alasql.utils.getValueOf(v);
if (lv == null) return false;
this.subqueryCache = this.subqueryCache || {};
if (!this.subqueryCache.${cacheKey}) {
const arr = alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context}));
const hasNull = arr.some(item => alasql.utils.getValueOf(item) == null);
const nonNullSet = new Set(arr.filter(item => alasql.utils.getValueOf(item) != null).map(item => alasql.utils.getValueOf(item)));
this.subqueryCache.${cacheKey} = { hasNull, set: nonNullSet };
}
const cache = this.subqueryCache.${cacheKey};
if (cache.set.has(lv)) return false;
if (cache.hasNull) return false;
return true;
})(${leftVal}))`;

s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`;
} else if (Array.isArray(this.right)) {
if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) {
// Leverage JS Set for faster lookups than arrays
s = `(!(new Set([${this.right.map(ref).join(',')}]).has(alasql.utils.getValueOf(${leftJS()}))))`;
const leftVal = leftJS();
const rightVals = `[${this.right.map(ref).join(',')}]`;
s = `((lv => {
const leftVal = alasql.utils.getValueOf(lv);
if (leftVal == null) return false;
const vals = ${rightVals};
const hasNull = vals.some(v => alasql.utils.getValueOf(v) == null);
const hasMatch = vals.some(v => {
const rv = alasql.utils.getValueOf(v);
return rv != null && rv === leftVal;
});
if (hasMatch) return false;
if (hasNull) return false;
return true;
})(${leftVal}))`;
} else {
// Use a cache to avoid re-creating the Set on every identical query
alasql.sets = alasql.sets || {};
const allValues = this.right.map(value => value.value);
const allValuesStr = allValues.join(',');
const hasNull = allValues.some(v => v == null);
alasql.sets[allValuesStr] = alasql.sets[allValuesStr] || new Set(allValues);
s = `!alasql.sets["${allValuesStr}"].has(alasql.utils.getValueOf(${leftJS()}))`;
const leftVal = leftJS();
s = `((lv => {
const leftVal = alasql.utils.getValueOf(lv);
if (leftVal == null) return false;
const hasMatch = alasql.sets["${allValuesStr}"].has(leftVal);
if (hasMatch) return false;
if (${hasNull}) return false;
return true;
})(${leftVal}))`;
}
} else {
s = `(${rightJS()}.indexOf(${leftJS()}) === -1)`;
const leftVal = leftJS();
s = `((lv => {
const leftVal = alasql.utils.getValueOf(lv);
if (leftVal == null) return false;
const arr = ${rightJS()};
const hasNull = arr.some(item => alasql.utils.getValueOf(item) == null);
const hasMatch = arr.some(item => {
const iv = alasql.utils.getValueOf(item);
return iv != null && iv === leftVal;
});
if (hasMatch) return false;
if (hasNull) return false;
return true;
})(${leftVal}))`;
}
}

Expand Down
86 changes: 86 additions & 0 deletions test/test1414.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
if (typeof exports === 'object') {
var assert = require('assert');
var alasql = require('..');
}

describe('Test 1414 - NOT IN with NULL values should follow SQL semantics', function () {
const test = '1414';

before(function () {
alasql('create database test' + test);
alasql('use test' + test);
});

after(function () {
alasql('drop database test' + test);
});

it('A) NOT IN with NULL in subquery should return empty result', function () {
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: null}];
alasql.tables.S.data = [{b: null}];
var res = alasql('select a from R where a not in (select b from S)');
// When subquery contains NULL, NOT IN should return empty result
// because comparison with NULL is UNKNOWN, and NOT IN UNKNOWN = UNKNOWN (false in WHERE)
assert.deepEqual(res, []);
});

it('B) NOT IN with NULL value on left side', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: null}, {a: 2}];
alasql.tables.S.data = [{b: 1}];
var res = alasql('select a from R where a not in (select b from S)');
// NULL NOT IN (1) should evaluate to UNKNOWN (excluded from WHERE result)
// 2 NOT IN (1) should be TRUE (included)
assert.deepEqual(res, [{a: 2}]);
});

it('C) NOT IN without NULL should work normally', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}];
alasql.tables.S.data = [{b: 2}];
var res = alasql('select a from R where a not in (select b from S)');
// 1 NOT IN (2) = TRUE, 2 NOT IN (2) = FALSE, 3 NOT IN (2) = TRUE
assert.deepEqual(res, [{a: 1}, {a: 3}]);
});

it('D) NOT IN with multiple values including NULL', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}];
alasql.tables.S.data = [{b: 2}, {b: null}];
var res = alasql('select a from R where a not in (select b from S)');
// When subquery contains NULL, all comparisons are UNKNOWN
assert.deepEqual(res, []);
});

it('E) NOT IN with array literal containing NULL', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('CREATE TABLE R (a number)');
alasql.tables.R.data = [{a: 1}, {a: 2}, {a: 3}];
var res = alasql('select a from R where a not in (2, NULL)');
// When list contains NULL, all NOT IN comparisons are UNKNOWN
assert.deepEqual(res, []);
});

it('F) IN with NULL in subquery', function () {
alasql('DROP TABLE IF EXISTS R');
alasql('DROP TABLE IF EXISTS S');
alasql('CREATE TABLE R (a number)');
alasql('CREATE TABLE S (b number)');
alasql.tables.R.data = [{a: 1}, {a: 2}];
alasql.tables.S.data = [{b: 1}, {b: null}];
var res = alasql('select a from R where a in (select b from S)');
// 1 IN (1, NULL) = TRUE, 2 IN (1, NULL) = UNKNOWN (excluded)
assert.deepEqual(res, [{a: 1}]);
});
});