Loop - 13 vs 16¶
Next section compares an older to a newer version of the same operator after both definition are converted into markdown text. Green means an addition to the newer version, red means a deletion. Anything else is unchanged.
- Loop13 → Loop16 +8 -8
Loop13 → Loop16
RENAMED
@@ -1 +1 @@
|
|
1
1
|
Generic Looping construct. This loop has multiple termination conditions:
|
2
2
|
1) Trip count. Iteration count specified at runtime. Set by
|
3
3
|
specifying the input M. Optional. Set to empty string to omit.
|
4
4
|
Note that a static trip count (specified at graph construction time) can be
|
5
5
|
specified by passing in a constant node for input M.
|
6
6
|
2) Loop termination condition. This is an input to the op that determines
|
7
7
|
whether to run the first iteration and also a loop-carried dependency for
|
8
8
|
the body graph. The body graph must yield a value for the condition variable,
|
9
9
|
whether this input is provided or not.
|
10
10
|
This table summarizes the operating modes of this operator with equivalent
|
11
11
|
C-style code:
|
12
|
-
|
12
|
+
Operator inputs defined as (max_trip_count, condition_var).
|
13
|
-
|
13
|
+
* input ("", ""):
|
14
14
|
for (int i=0; ; ++i) {
|
15
15
|
cond = ... // Note this value is ignored, but is required in the body
|
16
16
|
}
|
17
|
-
|
17
|
+
* input ("", cond) // Note this is analogous to a while loop
|
18
18
|
bool cond = ...;
|
19
19
|
for (int i=0; cond; ++i) {
|
20
20
|
cond = ...;
|
21
21
|
}
|
22
|
-
|
22
|
+
* input ("", 1) // Note this is analogous to a do-while loop
|
23
23
|
bool cond = true
|
24
24
|
for (int i=0; cond; ++i) {
|
25
25
|
cond = ...;
|
26
26
|
}
|
27
|
-
|
27
|
+
* input (trip_count, "") // Note this is analogous to a for loop
|
28
28
|
int trip_count = ...
|
29
29
|
for (int i=0; i < trip_count; ++i) {
|
30
30
|
cond = ...; // ignored
|
31
31
|
}
|
32
|
-
|
32
|
+
* input (trip_count, cond)
|
33
33
|
int trip_count = ...;
|
34
34
|
bool cond = ...;
|
35
35
|
for (int i=0; i < trip_count && cond; ++i) {
|
36
36
|
cond = ...;
|
37
37
|
}
|
38
38
|
*Sample usage - cond as well as trip count*
|
39
39
|
graph predict-net {
|
40
40
|
%a = Constant[value = <Scalar Tensor [3]>]()
|
41
41
|
%b = Constant[value = <Scalar Tensor [6]>]()
|
42
42
|
%keepgoing = Constant[value = <Scalar Tensor [1]>]()
|
43
43
|
%max_trip_count = Constant[value = <Scalar Tensor [10]>]()
|
44
44
|
%keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
|
45
45
|
return
|
46
46
|
}
|
47
47
|
graph body-net (
|
48
48
|
%i[INT32, scalar] // iteration number
|
49
49
|
%keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used
|
50
50
|
%b_in[INT32, scalar] // incoming value of loop-carried-dependency b
|
51
51
|
) {
|
52
52
|
%my_local = Add(%a, %b_in)
|
53
53
|
%b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b
|
54
54
|
%keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition
|
55
55
|
%user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated
|
56
56
|
return %keepgoing_out, %b_out, %user_defined_val
|
57
57
|
}
|
58
58
|
*Sample equivalent C code*
|
59
59
|
{
|
60
60
|
/* User-defined code (enclosing scope) */
|
61
61
|
int a = 3, b = 6;
|
62
62
|
bool keepgoing = true; // Analogous to input cond
|
63
63
|
/* End user-defined code */
|
64
64
|
/* Implicitly-defined code */
|
65
65
|
const int max_trip_count = 10; // Analogous to input M
|
66
66
|
int user_defined_vals[]; // Imagine this is resizable
|
67
67
|
/* End implicitly-defined code */
|
68
68
|
/* initialize loop-carried variables and scan-output variables */
|
69
69
|
bool keepgoing_out = keepgoing
|
70
70
|
int b_out = b
|
71
71
|
for (int i=0; i < max_trip_count && keepgoing_out; ++i) {
|
72
72
|
/* Implicitly-defined code: bind actual parameter values
|
73
73
|
to formal parameter variables of loop-body */
|
74
74
|
bool keepgoing_in = keepgoing_out;
|
75
75
|
bool b_in = b_out;
|
76
76
|
/* User-defined code (loop body) */
|
77
77
|
int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
|
78
78
|
b_out = a - b_in;
|
79
79
|
keepgoing_out = my_local > b_out;
|
80
80
|
user_defined_val = b_in + b_in; // b_in and b_out are different variables
|
81
81
|
/* End user-defined code */
|
82
82
|
/* Implicitly defined-code */
|
83
83
|
user_defined_vals[i] = user_defined_val // accumulate scan-output values
|
84
84
|
}
|
85
85
|
// int t = my_local; // Can't do this. my_local is not accessible here.
|
86
86
|
// The values below are bound to the output variables of the loop and therefore accessible
|
87
87
|
// b_out; user_defined_vals; keepgoing_out;
|
88
88
|
}
|
89
89
|
There are several things of note in this code snippet:
|
90
90
|
1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can
|
91
91
|
be referenced in the inputs of the loop.
|
92
92
|
2) Any values computed in the loop body that needs to be used in a subsequent
|
93
93
|
iteration or after the loop are modelled using a pair of variables in the loop-body,
|
94
94
|
consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
|
95
95
|
These are referred to as loop-carried dependences. The loop operation node
|
96
96
|
supplies the input value of the input variable for the first iteration, and
|
97
97
|
returns the output value of the output variable produced by the final
|
98
98
|
iteration.
|
99
99
|
3) Scan_output variables are used to implicitly concatenate values computed across
|
100
100
|
all the iterations. In the above example, the value of user_defined_val computed
|
101
101
|
over all iterations are concatenated and returned as the value of user_defined_vals
|
102
102
|
after the loop.
|
103
103
|
4) Values created in the body cannot be accessed in the enclosing scope,
|
104
104
|
except using the mechanism described above.
|
105
105
|
Note that the semantics of this op support "diagonal" or "wavefront" execution.
|
106
106
|
(See Step 3 here for an example:
|
107
107
|
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
|
108
108
|
Frontends should emit multi-layer RNNs as a series of While operators (with
|
109
109
|
time being the inner looping dimension), with each successive layer consuming
|
110
110
|
the scan_outputs from the previous layer, possibly going through several
|
111
111
|
point-wise operators (e.g. dropout, residual connections, linear layer).
|
112
112
|
The input/output of subgraph (produced by loop node) matching is based on order instead of name. The implementation will figure out the names based on this order.
|
113
113
|
### Attributes
|
114
114
|
* **body - GRAPH** (required) :
|
115
115
|
The graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop carried dependencies...). It has 1+N+K outputs: (condition, loop carried dependencies..., scan_outputs...). Each scan_output is created by concatenating the value of the specified output value at the end of each iteration of the loop. It is an error if the dimensions or data type of these scan_outputs change across loop iterations.
|
116
116
|
### Inputs
|
117
117
|
Between 2 and 2147483647 inputs.
|
118
118
|
- **M** (optional, heterogeneous) - **I**:
|
119
119
|
A maximum trip-count for the loop specified at runtime. Optional. Pass empty string to skip.
|
120
120
|
- **cond** (optional, heterogeneous) - **B**:
|
121
121
|
A boolean termination condition. Optional. Pass empty string to skip.
|
122
122
|
- **v_initial** (variadic) - **V**:
|
123
123
|
The initial values of any loop-carried dependencies (values that change across loop iterations)
|
124
124
|
### Outputs
|
125
125
|
Between 1 and 2147483647 outputs.
|
126
126
|
- **v_final_and_scan_outputs** (variadic) - **V**:
|
127
127
|
Final N loop carried dependency values then K scan_outputs. Scan outputs must be Tensors.
|
128
128
|
### Type Constraints
|
129
|
-
* **V** in ( seq(tensor(bool)), seq(tensor(complex128)), seq(tensor(complex64)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(complex128), tensor(complex64), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8) ):
|
129
|
+
* **V** in ( optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(complex128))), optional(seq(tensor(complex64))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(complex128)), optional(tensor(complex64)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8)), seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(complex128)), seq(tensor(complex64)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(complex128), tensor(complex64), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8) ):
|
130
|
-
All Tensor and Sequence types
|
130
|
+
All Tensor, Sequence(Tensor), Optional(Tensor), and Optional(Sequence(Tensor)) types up to IRv4.
|
131
131
|
* **I** in ( tensor(int64) ):
|
132
132
|
tensor of int64, which should be a scalar.
|
133
133
|
* **B** in ( tensor(bool) ):
|
134
134
|
tensor of bool, which should be a scalar.
|