-
Notifications
You must be signed in to change notification settings - Fork 691
Implement LEAD, LAG, FIRST_VALUE, LAST_VALUE window functions #2444
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 3 commits
e4088e8
0029d50
6532331
d39981d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -570,6 +570,46 @@ yy.Select.prototype.compileSelectGroup0 = function (query) { | |
| if (col.funcid && col.funcid.toUpperCase() === 'GROUP_ROW_NUMBER') { | ||
| query.grouprownums.push({as: col.as, columnIndex: 0}); // Track which column to use for grouping | ||
| } | ||
|
|
||
| // Detect positional window functions: LEAD, LAG, FIRST_VALUE, LAST_VALUE | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets move the detection into the parser so we dont pollute the areas of concern. Please consider how we can add lead, lag, first value, last value to the grammar in alasqlparser.jison (use |
||
| if (col.funcid) { | ||
| var fid = col.funcid.toUpperCase(); | ||
| if (fid === 'LEAD' || fid === 'LAG' || fid === 'FIRST_VALUE' || fid === 'LAST_VALUE') { | ||
| var wfConfig = { | ||
| funcid: fid, | ||
| as: col.as, | ||
| expressionColumnId: col.args && col.args[0] ? col.args[0].columnid : null, | ||
| offset: col.args && col.args[1] ? col.args[1].value : 1, | ||
| defaultValue: | ||
| col.args && col.args[2] | ||
| ? col.args[2].value != null | ||
| ? col.args[2].value | ||
| : col.args[2].op === '-' && col.args[2].right | ||
| ? -col.args[2].right.value | ||
| : null | ||
| : null, | ||
| partitionColumns: | ||
| col.over && col.over.partition | ||
| ? col.over.partition.map(function (p) { | ||
| return p.columnid || p.toString(); | ||
| }) | ||
| : [], | ||
| orderColumns: | ||
| col.over && col.over.order | ||
| ? col.over.order.map(function (o) { | ||
| return { | ||
| columnid: | ||
| o.expression && o.expression.columnid | ||
| ? o.expression.columnid | ||
| : o.columnid || o.toString(), | ||
| direction: o.direction || 'ASC', | ||
| }; | ||
| }) | ||
| : [], | ||
| }; | ||
| query.windowfns.push(wfConfig); | ||
| } | ||
| } | ||
| // console.log("colas:",colas); | ||
| // } | ||
| } else { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -250,6 +250,18 @@ stdlib.ROW_NUMBER = function () { | |
| stdlib.GROUP_ROW_NUMBER = function () { | ||
| return '1'; | ||
| }; | ||
| stdlib.LEAD = function () { | ||
| return 'undefined'; | ||
| }; | ||
| stdlib.LAG = function () { | ||
| return 'undefined'; | ||
| }; | ||
| stdlib.FIRST_VALUE = function () { | ||
| return 'undefined'; | ||
| }; | ||
| stdlib.LAST_VALUE = function () { | ||
| return 'undefined'; | ||
| }; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its not great to have functions intended to not do anything other than being a flag for code to operate on. Lets move the functionality to the jison parser. |
||
|
|
||
| stdlib.SQRT = function (s) { | ||
| return 'Math.sqrt(' + s + ')'; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,238 @@ | ||
| if (typeof exports === 'object') { | ||
| var assert = require('assert'); | ||
| var alasql = require('..'); | ||
| } | ||
|
|
||
| describe('Test 2409 - LEAD/LAG/FIRST_VALUE/LAST_VALUE Window Functions', function () { | ||
| var data = [ | ||
| {dept: 'Sales', emp: 'Alice', salary: 1000}, | ||
| {dept: 'Sales', emp: 'Bob', salary: 1200}, | ||
| {dept: 'Sales', emp: 'Carol', salary: 1500}, | ||
| {dept: 'IT', emp: 'Dave', salary: 2000}, | ||
| {dept: 'IT', emp: 'Eve', salary: 2500}, | ||
| ]; | ||
|
|
||
| // --- LEAD tests --- | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make sure the tests makes sense without comments
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see now its because you are bundling across lead lag first last and so on. I propose making a level of So instead of the comment on this line its something like |
||
|
|
||
| it('1. LEAD basic - next row value', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LEAD(salary) OVER (ORDER BY salary) AS next_salary FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| assert.strictEqual(res[0].next_salary, 1200); | ||
| assert.strictEqual(res[1].next_salary, 1500); | ||
| assert.strictEqual(res[4].next_salary, null); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please seek to compare the output from alasql with the complete output json object so we provide the total output expected as one object. |
||
| }); | ||
|
|
||
| it('2. LEAD with explicit offset', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LEAD(salary, 2) OVER (ORDER BY salary) AS next2_salary FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| assert.strictEqual(res[0].next2_salary, 1500); | ||
| assert.strictEqual(res[1].next2_salary, 2000); | ||
| assert.strictEqual(res[3].next2_salary, null); | ||
| assert.strictEqual(res[4].next2_salary, null); | ||
| }); | ||
|
|
||
| it('3. LEAD with custom default', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LEAD(salary, 1, 0) OVER (ORDER BY salary) AS next_salary FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| assert.strictEqual(res[4].next_salary, 0); | ||
| assert.strictEqual(res[0].next_salary, 1200); | ||
| }); | ||
|
|
||
| it('4. LEAD with PARTITION BY', function () { | ||
| var res = alasql( | ||
| 'SELECT dept, emp, salary, LEAD(salary) OVER (PARTITION BY dept ORDER BY salary) AS next_salary FROM ? ORDER BY dept, salary', | ||
| [data] | ||
| ); | ||
| // IT partition: Dave(2000), Eve(2500) | ||
| var it = res.filter(function (r) { | ||
| return r.dept === 'IT'; | ||
| }); | ||
| assert.strictEqual(it[0].next_salary, 2500); | ||
| assert.strictEqual(it[1].next_salary, null); | ||
|
|
||
| // Sales partition: Alice(1000), Bob(1200), Carol(1500) | ||
| var sales = res.filter(function (r) { | ||
| return r.dept === 'Sales'; | ||
| }); | ||
| assert.strictEqual(sales[0].next_salary, 1200); | ||
| assert.strictEqual(sales[1].next_salary, 1500); | ||
| assert.strictEqual(sales[2].next_salary, null); | ||
| }); | ||
|
|
||
| // --- LAG tests --- | ||
|
|
||
| it('5. LAG basic - previous row value', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LAG(salary) OVER (ORDER BY salary) AS prev_salary FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| assert.strictEqual(res[0].prev_salary, null); | ||
| assert.strictEqual(res[1].prev_salary, 1000); | ||
| assert.strictEqual(res[4].prev_salary, 2000); | ||
| }); | ||
|
|
||
| it('6. LAG with offset and default', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LAG(salary, 2, -1) OVER (ORDER BY salary) AS prev2_salary FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| assert.strictEqual(res[0].prev2_salary, -1); | ||
| assert.strictEqual(res[1].prev2_salary, -1); | ||
| assert.strictEqual(res[2].prev2_salary, 1000); | ||
| assert.strictEqual(res[3].prev2_salary, 1200); | ||
| }); | ||
|
|
||
| it('7. LAG with PARTITION BY', function () { | ||
| var res = alasql( | ||
| 'SELECT dept, emp, salary, LAG(salary) OVER (PARTITION BY dept ORDER BY salary) AS prev_salary FROM ? ORDER BY dept, salary', | ||
| [data] | ||
| ); | ||
| var it = res.filter(function (r) { | ||
| return r.dept === 'IT'; | ||
| }); | ||
| assert.strictEqual(it[0].prev_salary, null); | ||
| assert.strictEqual(it[1].prev_salary, 2000); | ||
|
|
||
| var sales = res.filter(function (r) { | ||
| return r.dept === 'Sales'; | ||
| }); | ||
| assert.strictEqual(sales[0].prev_salary, null); | ||
| assert.strictEqual(sales[1].prev_salary, 1000); | ||
| assert.strictEqual(sales[2].prev_salary, 1200); | ||
| }); | ||
|
|
||
| // --- FIRST_VALUE tests --- | ||
|
|
||
| it('8. FIRST_VALUE basic', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, FIRST_VALUE(salary) OVER (ORDER BY salary) AS first_sal FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| for (var i = 0; i < res.length; i++) { | ||
| assert.strictEqual(res[i].first_sal, 1000); | ||
| } | ||
| }); | ||
|
|
||
| it('9. FIRST_VALUE with PARTITION BY', function () { | ||
| var res = alasql( | ||
| 'SELECT dept, emp, salary, FIRST_VALUE(salary) OVER (PARTITION BY dept ORDER BY salary) AS first_sal FROM ? ORDER BY dept, salary', | ||
| [data] | ||
| ); | ||
| var it = res.filter(function (r) { | ||
| return r.dept === 'IT'; | ||
| }); | ||
| assert.strictEqual(it[0].first_sal, 2000); | ||
| assert.strictEqual(it[1].first_sal, 2000); | ||
|
|
||
| var sales = res.filter(function (r) { | ||
| return r.dept === 'Sales'; | ||
| }); | ||
| assert.strictEqual(sales[0].first_sal, 1000); | ||
| assert.strictEqual(sales[2].first_sal, 1000); | ||
| }); | ||
|
|
||
| it('10. FIRST_VALUE with column name reference', function () { | ||
| var res = alasql( | ||
| 'SELECT dept, emp, FIRST_VALUE(emp) OVER (PARTITION BY dept ORDER BY salary) AS first_emp FROM ? ORDER BY dept, salary', | ||
| [data] | ||
| ); | ||
| var it = res.filter(function (r) { | ||
| return r.dept === 'IT'; | ||
| }); | ||
| assert.strictEqual(it[0].first_emp, 'Dave'); | ||
| assert.strictEqual(it[1].first_emp, 'Dave'); | ||
| }); | ||
|
|
||
| // --- LAST_VALUE tests --- | ||
|
|
||
| it('11. LAST_VALUE basic', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LAST_VALUE(salary) OVER (ORDER BY salary) AS last_sal FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| for (var i = 0; i < res.length; i++) { | ||
| assert.strictEqual(res[i].last_sal, 2500); | ||
| } | ||
| }); | ||
|
|
||
| it('12. LAST_VALUE with PARTITION BY', function () { | ||
| var res = alasql( | ||
| 'SELECT dept, emp, salary, LAST_VALUE(salary) OVER (PARTITION BY dept ORDER BY salary) AS last_sal FROM ? ORDER BY dept, salary', | ||
| [data] | ||
| ); | ||
| var it = res.filter(function (r) { | ||
| return r.dept === 'IT'; | ||
| }); | ||
| assert.strictEqual(it[0].last_sal, 2500); | ||
| assert.strictEqual(it[1].last_sal, 2500); | ||
|
|
||
| var sales = res.filter(function (r) { | ||
| return r.dept === 'Sales'; | ||
| }); | ||
| assert.strictEqual(sales[0].last_sal, 1500); | ||
| assert.strictEqual(sales[2].last_sal, 1500); | ||
| }); | ||
|
|
||
| // --- Edge cases --- | ||
|
|
||
| it('13. Null values in column', function () { | ||
| var dataWithNulls = [ | ||
| {id: 1, val: 10}, | ||
| {id: 2, val: null}, | ||
| {id: 3, val: 30}, | ||
| ]; | ||
| var res = alasql('SELECT id, val, LAG(val) OVER (ORDER BY id) AS prev_val FROM ? ORDER BY id', [ | ||
| dataWithNulls, | ||
| ]); | ||
| assert.strictEqual(res[0].prev_val, null); | ||
| assert.strictEqual(res[1].prev_val, 10); | ||
| assert.strictEqual(res[2].prev_val, null); // null from the data row | ||
| }); | ||
|
|
||
| it('14. Offset exceeds partition size', function () { | ||
| var smallData = [ | ||
| {id: 1, val: 100}, | ||
| {id: 2, val: 200}, | ||
| ]; | ||
| var res = alasql( | ||
| 'SELECT id, val, LEAD(val, 5) OVER (ORDER BY id) AS far_ahead FROM ? ORDER BY id', | ||
| [smallData] | ||
| ); | ||
| assert.strictEqual(res[0].far_ahead, null); | ||
| assert.strictEqual(res[1].far_ahead, null); | ||
| }); | ||
|
|
||
| it('15. Multiple window functions in one query', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LEAD(salary) OVER (ORDER BY salary) AS next_sal, LAG(salary) OVER (ORDER BY salary) AS prev_sal, FIRST_VALUE(salary) OVER (ORDER BY salary) AS first_sal, LAST_VALUE(salary) OVER (ORDER BY salary) AS last_sal FROM ? ORDER BY salary', | ||
| [data] | ||
| ); | ||
| // First row | ||
| assert.strictEqual(res[0].prev_sal, null); | ||
| assert.strictEqual(res[0].next_sal, 1200); | ||
| assert.strictEqual(res[0].first_sal, 1000); | ||
| assert.strictEqual(res[0].last_sal, 2500); | ||
| // Last row | ||
| assert.strictEqual(res[4].prev_sal, 2000); | ||
| assert.strictEqual(res[4].next_sal, null); | ||
| assert.strictEqual(res[4].first_sal, 1000); | ||
| assert.strictEqual(res[4].last_sal, 2500); | ||
| }); | ||
|
|
||
| it('16. DESC ordering', function () { | ||
| var res = alasql( | ||
| 'SELECT emp, salary, LEAD(salary) OVER (ORDER BY salary DESC) AS next_sal FROM ? ORDER BY salary DESC', | ||
| [data] | ||
| ); | ||
| // DESC order: 2500, 2000, 1500, 1200, 1000 | ||
| assert.strictEqual(res[0].next_sal, 2000); | ||
| assert.strictEqual(res[1].next_sal, 1500); | ||
| assert.strictEqual(res[4].next_sal, null); | ||
| }); | ||
| }); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice way to structure this functionality