Skip to content

Commit b3f62d6

Browse files
committed
Typed workflows
Signed-off-by: Ben Sherman <bentshermann@gmail.com>
1 parent 432fce8 commit b3f62d6

File tree

3 files changed

+227
-119
lines changed

3 files changed

+227
-119
lines changed

src/main/java/nextflow/script/control/TupleOpResolver.java renamed to src/main/java/nextflow/script/control/DataflowOpResolver.java

Lines changed: 75 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,19 @@
1515
*/
1616
package nextflow.script.control;
1717

18-
import java.util.Collections;
18+
import java.lang.reflect.Modifier;
19+
import java.util.ArrayList;
1920
import java.util.List;
20-
import java.util.Map;
2121

2222
import nextflow.script.types.Bag;
2323
import nextflow.script.types.Channel;
24+
import nextflow.script.types.Record;
2425
import nextflow.script.types.Tuple;
26+
import nextflow.script.types.TypesEx;
2527
import nextflow.script.types.Value;
2628
import org.codehaus.groovy.ast.ClassHelper;
2729
import org.codehaus.groovy.ast.ClassNode;
30+
import org.codehaus.groovy.ast.FieldNode;
2831
import org.codehaus.groovy.ast.GenericsType;
2932
import org.codehaus.groovy.ast.MethodNode;
3033
import org.codehaus.groovy.ast.expr.Expression;
@@ -37,16 +40,17 @@
3740
*
3841
* @author Ben Sherman <bentshermann@gmail.com>
3942
*/
40-
class TupleOpResolver {
43+
class DataflowOpResolver {
4144

4245
private static final ClassNode BAG_TYPE = ClassHelper.makeCached(Bag.class);
4346
private static final ClassNode CHANNEL_TYPE = ClassHelper.makeCached(Channel.class);
47+
private static final ClassNode RECORD_TYPE = ClassHelper.makeCached(Record.class);
4448
private static final ClassNode TUPLE_TYPE = ClassHelper.makeCached(Tuple.class);
4549
private static final ClassNode VALUE_TYPE = ClassHelper.makeCached(Value.class);
4650

4751
/**
48-
* Resolve the return type of dataflow operators that tranform
49-
* tuples, such as `combine`, `groupTuple`, and `join`.
52+
* Resolve the return type of dataflow operators where applicable,
53+
* such as `combine`, `groupBy`, and `join`.
5054
*
5155
* @param lhsType
5256
* @param method
@@ -58,7 +62,7 @@ public ClassNode apply(ClassNode lhsType, MethodNode method, List<Expression> ar
5862
if( "combine".equals(name) )
5963
return applyCombine(lhsType, arguments);
6064

61-
if( "groupTuple".equals(name) )
65+
if( "groupBy".equals(name) )
6266
return applyGroupBy(lhsType, arguments);
6367

6468
if( "join".equals(name) )
@@ -71,122 +75,109 @@ public ClassNode apply(ClassNode lhsType, MethodNode method, List<Expression> ar
7175
* Resolve the result type of a `combine` operation in terms of the left
7276
* and right operands.
7377
*
74-
* Given arguments of type `(L1, L2, ..., Lm)` and `R`, `combine`
75-
* produces a tuple of type `(L1, L2, ..., Lm, R).
76-
*
77-
* When the `by` option is specified, `combine` produces the same result
78-
* type as `join`.
78+
* Given arguments of type `L` and `R`, `combine` produces a tuple of type
79+
* `(L, R)`. If `L` and/or `R` are tuples, they are flattened into the resulting
80+
* tuple.
7981
*
8082
* @param lhsType
8183
* @param arguments
8284
*/
8385
private ClassNode applyCombine(ClassNode lhsType, List<Expression> arguments) {
84-
if( !TUPLE_TYPE.equals(lhsType) )
85-
return ClassHelper.dynamicType();
86-
87-
var namedArgs = namedArgs(arguments);
88-
if( namedArgs.containsKey("by") )
89-
return applyJoin(lhsType, arguments);
90-
91-
var argType = getType(arguments.get(arguments.size() - 1));
92-
var rhsType = dataflowElementType(argType);
86+
if( arguments.size() == 1 && arguments.get(0) instanceof NamedArgumentListExpression nale )
87+
return applyCombineNamedArgs(lhsType, nale);
88+
var rhsType = dataflowElementType(getType(arguments.get(0)));
89+
var componentTypes = new ArrayList<ClassNode>();
90+
if( !combineTupleOrValue(componentTypes, lhsType) )
91+
return channelTupleType(null);
92+
if( !combineTupleOrValue(componentTypes, rhsType) )
93+
return channelTupleType(null);
94+
var gts = componentTypes.stream()
95+
.map(cn -> new GenericsType(cn))
96+
.toArray(GenericsType[]::new);
97+
return channelTupleType(gts);
98+
}
9399

94-
var lgts = lhsType.getGenericsTypes();
95-
if( lgts == null || lgts.length == 0 )
100+
private ClassNode applyCombineNamedArgs(ClassNode lhsType, NamedArgumentListExpression nale) {
101+
if( !RECORD_TYPE.equals(lhsType) )
96102
return ClassHelper.dynamicType();
103+
var rhsType = new ClassNode(Record.class);
104+
for( var entry : nale.getMapEntryExpressions() ) {
105+
var name = entry.getKeyExpression().getText();
106+
var value = entry.getValueExpression();
107+
var valueType = dataflowValueType(getType(value));
108+
var fn = new FieldNode(name, Modifier.PUBLIC, valueType, rhsType, null);
109+
fn.setDeclaringClass(rhsType);
110+
rhsType.addField(fn);
111+
}
112+
var elementType = recordSumType(lhsType, rhsType);
113+
return makeType(CHANNEL_TYPE, elementType);
114+
}
97115

98-
var gts = new GenericsType[lgts.length + 1];
99-
for( int i = 0; i < lgts.length; i++ )
100-
gts[i] = lgts[i];
101-
gts[lgts.length] = new GenericsType(rhsType);
116+
private static ClassNode dataflowValueType(ClassNode type) {
117+
if( CHANNEL_TYPE.equals(type) )
118+
return ClassHelper.dynamicType();
119+
if( VALUE_TYPE.equals(type) )
120+
return elementType(type);
121+
return type;
122+
}
102123

103-
return channelTupleType(gts);
124+
private boolean combineTupleOrValue(List<ClassNode> componentTypes, ClassNode type) {
125+
if( TUPLE_TYPE.equals(type) ) {
126+
var gts = type.getGenericsTypes();
127+
if( gts == null && gts.length == 0 )
128+
return false;
129+
for( int i = 0; i < gts.length; i++ )
130+
componentTypes.add(gts[i].getType());
131+
}
132+
else {
133+
componentTypes.add(type);
134+
}
135+
return true;
104136
}
105137

106138
/**
107-
* Resolve the result type of a `groupTuple` operation.
139+
* Resolve the result type of a `groupBy` operation.
108140
*
109-
* Given source tuples of type `(K, V1, V2, ..., Vn)`,
110-
* `groupTuple` produces a tuple of type `(K, Bag<V1>, Bag<V2>, ..., Bag<Vn>)`.
141+
* Given source tuples of type `(K, N, V)` or `(K, V)`,
142+
* `groupBy` produces a tuple of type `(K, Bag<V>)`.
111143
*
112144
* @param lhsType
113145
* @param arguments
114146
*/
115147
private ClassNode applyGroupBy(ClassNode lhsType, List<Expression> arguments) {
116148
if( !TUPLE_TYPE.equals(lhsType) )
117149
return ClassHelper.dynamicType();
118-
119-
var namedArgs = namedArgs(arguments);
120-
if( namedArgs.containsKey("by") )
121-
return ClassHelper.dynamicType();
122-
123150
var lgts = lhsType.getGenericsTypes();
124-
if( lgts == null || lgts.length == 0 )
151+
if( lgts == null || !(lgts.length == 2 || lgts.length == 3) )
125152
return ClassHelper.dynamicType();
126-
127-
// TODO: group on index specified by `by` option
128-
// TODO: skip if `by` option isn't a single integer
129-
var gts = new GenericsType[lgts.length];
130-
gts[0] = lgts[0];
131-
for( int i = 1; i < lgts.length; i++ ) {
132-
var groupType = makeType(BAG_TYPE, lgts[i].getType());
133-
gts[i] = new GenericsType(groupType);
134-
}
135-
153+
var keyType = lgts[0].getType();
154+
var valueType = lgts[lgts.length - 1].getType();
155+
var gts = new GenericsType[] {
156+
new GenericsType(keyType),
157+
new GenericsType(makeType(BAG_TYPE, valueType))
158+
};
136159
return channelTupleType(gts);
137160
}
138161

139162
/**
140163
* Resolve the result type of a `join` operation in terms of the left
141164
* and right operands.
142165
*
143-
* Given tuples of type `(K, L1, L2, ..., Lm)` and `(K, R1, R2, ..., Rn)`,
144-
* `join` produces a tuple of type `(K, L1, L2, ..., Lm, R1, R2, ..., Rn).
166+
* Given two metching records R1 and R2, `join` produces R1 + R2.
145167
*
146168
* @param lhsType
147169
* @param arguments
148170
*/
149171
private ClassNode applyJoin(ClassNode lhsType, List<Expression> arguments) {
150-
if( !TUPLE_TYPE.equals(lhsType) )
151-
return ClassHelper.dynamicType();
152-
153-
var namedArgs = namedArgs(arguments);
154-
if( namedArgs.containsKey("by") )
172+
if( !RECORD_TYPE.equals(lhsType) )
155173
return ClassHelper.dynamicType();
156-
157174
var argType = getType(arguments.get(arguments.size() - 1));
158175
var rhsType = dataflowElementType(argType);
159-
if( !TUPLE_TYPE.equals(rhsType) )
176+
if( !RECORD_TYPE.equals(rhsType) )
160177
return ClassHelper.dynamicType();
161-
162-
var lgts = lhsType.getGenericsTypes();
163-
var rgts = rhsType.getGenericsTypes();
164-
if( lgts == null || lgts.length == 0 || rgts == null || rgts.length == 0 )
165-
return ClassHelper.dynamicType();
166-
167-
// TODO: join on index specified by `by` option
168-
// TODO: skip if `by` option isn't a single integer
169-
var gts = new GenericsType[lgts.length + rgts.length - 1];
170-
for( int i = 0; i < lgts.length; i++ )
171-
gts[i] = lgts[i];
172-
for( int i = 1; i < rgts.length; i++ )
173-
gts[lgts.length + i - 1] = rgts[i];
174-
175-
return channelTupleType(gts);
176-
}
177-
178-
private static Map<String,Expression> namedArgs(List<Expression> args) {
179-
return args.size() > 0 && args.get(0) instanceof NamedArgumentListExpression nale
180-
? Map.ofEntries(
181-
nale.getMapEntryExpressions().stream()
182-
.map((entry) -> {
183-
var name = entry.getKeyExpression().getText();
184-
var value = entry.getValueExpression();
185-
return Map.entry(name, value);
186-
})
187-
.toArray(Map.Entry[]::new)
188-
)
189-
: Collections.emptyMap();
178+
// TODO: report error if `by` field is not in both records
179+
var elementType = recordSumType(lhsType, rhsType);
180+
return makeType(CHANNEL_TYPE, elementType);
190181
}
191182

192183
private static ClassNode dataflowElementType(ClassNode type) {

src/main/java/nextflow/script/control/TypeCheckingVisitorEx.java

Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import nextflow.script.ast.FunctionNode;
3030
import nextflow.script.ast.OutputNode;
3131
import nextflow.script.ast.ProcessNode;
32-
import nextflow.script.ast.ProcessNodeV1;
3332
import nextflow.script.ast.ProcessNodeV2;
3433
import nextflow.script.ast.RecordNode;
3534
import nextflow.script.ast.ScriptNode;
@@ -259,15 +258,6 @@ private void visitProcessTopics(Statement block) {
259258
}
260259
}
261260

262-
@Override
263-
public void visitProcessV1(ProcessNodeV1 node) {
264-
// don't try to type-check input/output directives
265-
visitProcessDirectives(node.directives);
266-
visit(node.when);
267-
visit(node.exec);
268-
visit(node.stub);
269-
}
270-
271261
@Override
272262
public void visitFunction(FunctionNode node) {
273263
// visit parameters and code
@@ -292,7 +282,7 @@ public void visitOutput(OutputNode node) {
292282
var call = asMethodCallX(stmt);
293283
if( checkPublishStatements(call, elementType) )
294284
continue;
295-
super.visitMethodCallExpression(call);
285+
visitMethodCallExpression(call);
296286
}
297287
}
298288

@@ -462,6 +452,7 @@ public void visitMethodCallExpression(MethodCallExpression node) {
462452
node.putNodeMetaData(ASTNodeMarker.INFERRED_TYPE, dummyMethod.getReturnType());
463453

464454
checkOperatorCall(node);
455+
checkWorkflowCall(node);
465456
}
466457
else if( node.getNodeMetaData(ASTNodeMarker.METHOD_TARGET) instanceof MethodNode mn ) {
467458
var parameters = mn.getParameters();
@@ -534,8 +525,7 @@ private void checkSpreadMethodCall(MethodCallExpression node) {
534525
}
535526

536527
/**
537-
* Resolve the return type of operators that transform tuples.
538-
* such as `combine`, `groupTuple`, and `join`.
528+
* Resolve the return type of dataflow operators where applicable.
539529
*
540530
* @param node
541531
*/
@@ -547,16 +537,63 @@ private void checkOperatorCall(MethodCallExpression node) {
547537
if( !CHANNEL_TYPE.equals(receiverType) )
548538
return;
549539

550-
var lhsType = elementType(receiverType);
551540
var method = (MethodNode) node.getNodeMetaData(ASTNodeMarker.METHOD_TARGET);
541+
if( findAnnotation(method, Deprecated.class).isPresent() )
542+
addSoftError("Operator `" + method.getName() + "` is discouraged from use with static typing", node);
543+
544+
var lhsType = elementType(receiverType);
552545
var arguments = asMethodCallArguments(node);
553-
var resultType = new TupleOpResolver().apply(lhsType, method, arguments);
546+
var resultType = new DataflowOpResolver().apply(lhsType, method, arguments);
554547
if( ClassHelper.isDynamicTyped(resultType) )
555548
return;
556549

557550
node.putNodeMetaData(ASTNodeMarker.INFERRED_TYPE, resultType);
558551
}
559552

553+
/**
554+
* Resolve the return type of a workflow based on the declared outputs.
555+
*
556+
* When the workflow declares a single output, the return type contains only
557+
* that type. When the workflow declares multiple outputs, the return type is
558+
* a Record containing each named output.
559+
*
560+
* @param node
561+
*/
562+
private void checkWorkflowCall(MethodCallExpression node) {
563+
var mn = (MethodNode) node.getNodeMetaData(ASTNodeMarker.METHOD_TARGET);
564+
if( mn instanceof WorkflowNode wn ) {
565+
var resultType = workflowOutputType(wn.emits);
566+
node.putNodeMetaData(ASTNodeMarker.INFERRED_TYPE, resultType);
567+
}
568+
}
569+
570+
private static ClassNode workflowOutputType(Statement block) {
571+
var emits = asBlockStatements(block);
572+
if( emits.size() == 1 ) {
573+
var first = emits.get(0);
574+
var emit = ((ExpressionStatement) first).getExpression();
575+
return workflowEmitType(getType(emit));
576+
}
577+
var cn = new ClassNode(Record.class);
578+
for( var stmt : emits ) {
579+
var emit = ((ExpressionStatement) stmt).getExpression();
580+
var emitName = outputTarget(emit).getName();
581+
var emitType = workflowEmitType(getType(emit));
582+
var fn = new FieldNode(emitName, Modifier.PUBLIC, emitType, cn, null);
583+
fn.setDeclaringClass(cn);
584+
cn.addField(fn);
585+
}
586+
return cn;
587+
}
588+
589+
private static ClassNode workflowEmitType(ClassNode innerType) {
590+
if( ClassHelper.isDynamicTyped(innerType) )
591+
return innerType;
592+
if( CHANNEL_TYPE.equals(innerType) || VALUE_TYPE.equals(innerType) )
593+
return innerType;
594+
return makeType(VALUE_TYPE, innerType);
595+
}
596+
560597
/**
561598
* Check the arguments of an invalid method call and report appropriate
562599
* errors for each invalid argument.
@@ -650,8 +687,8 @@ private void visitClosureArguments(ClassNode receiverType, List<Expression> argu
650687
* - When a process is called with a Channel argument, the output is wrapped
651688
* in a Channel, otherwise it is wrapped in a Value.
652689
*
653-
* - When a process declares a single output expression, the return type contains
654-
* only that type (e.g. T -> Channel<T>). When a process declares named outputs,
690+
* - When a process declares a single output, the return type contains only that
691+
* type (e.g. T -> Channel<T>). When a process declares multiple outputs,
655692
* the return type is a Record where each field is wrapped in the appropriate
656693
* dataflow type.
657694
*
@@ -684,10 +721,6 @@ private boolean checkProcessCall(MethodCallExpression node) {
684721
var resultType = processOutputType(dataflowType, ((ProcessNodeV2) mn).outputs);
685722
node.putNodeMetaData(ASTNodeMarker.INFERRED_TYPE, resultType);
686723

687-
var methodVariable = currentWorkflow.getVariableScope().getReferencedClassVariable(mn.getName());
688-
if( methodVariable instanceof PropertyNode pn )
689-
pn.getType().getField("out").setType(resultType);
690-
691724
return true;
692725
}
693726

@@ -702,8 +735,7 @@ private static ClassNode processOutputType(ClassNode dataflowType, Statement blo
702735
if( outputs.size() == 1 ) {
703736
var first = outputs.get(0);
704737
var output = ((ExpressionStatement) first).getExpression();
705-
if( outputTarget(output) == null )
706-
return processEmitType(dataflowType, getType(output));
738+
return processEmitType(dataflowType, getType(output));
707739
}
708740
var cn = new ClassNode(Record.class);
709741
for( var stmt : outputs ) {

0 commit comments

Comments
 (0)