1
+ import { createHash } from "crypto" ;
1
2
import type { SummaryEvent } from "./log-summary" ;
2
3
3
4
export interface PipelineSummary {
4
5
steps : string [ ] ;
5
6
/** Total counts for each step in the RA array, across all iterations */
6
7
counts : number [ ] ;
8
+ hash : string ;
7
9
}
8
10
9
11
/**
@@ -26,6 +28,9 @@ export interface PerformanceComparisonDataFromLog {
26
28
*/
27
29
names : string [ ] ;
28
30
31
+ /** RA hash of the `i`th predicate event */
32
+ raHashes : string [ ] ;
33
+
29
34
/** Number of milliseconds spent evaluating the `i`th predicate from the `names` array. */
30
35
timeCosts : number [ ] ;
31
36
@@ -52,41 +57,51 @@ export interface PerformanceComparisonDataFromLog {
52
57
* All the pipeline runs seen for the `i`th predicate from the `names` array.
53
58
*/
54
59
pipelineSummaryList : Array < Record < string , PipelineSummary > > ;
60
+
61
+ /** All dependencies of the `i`th predicate from the `names` array, encoded as a list of indices in `names`. */
62
+ dependencyLists : number [ ] [ ] ;
55
63
}
56
64
57
65
export class PerformanceOverviewScanner {
58
- private readonly nameToIndex = new Map < string , number > ( ) ;
59
66
private readonly data : PerformanceComparisonDataFromLog = {
60
67
names : [ ] ,
68
+ raHashes : [ ] ,
61
69
timeCosts : [ ] ,
62
70
tupleCosts : [ ] ,
63
71
cacheHitIndices : [ ] ,
64
72
sentinelEmptyIndices : [ ] ,
65
73
pipelineSummaryList : [ ] ,
66
74
evaluationCounts : [ ] ,
67
75
iterationCounts : [ ] ,
76
+ dependencyLists : [ ] ,
68
77
} ;
78
+ private readonly raToIndex = new Map < string , number > ( ) ;
79
+ private readonly mainHashToRepr = new Map < string , number > ( ) ;
80
+ private readonly nameToIndex = new Map < string , number > ( ) ;
69
81
70
- private getPredicateIndex ( name : string ) : number {
71
- const { nameToIndex } = this ;
72
- let index = nameToIndex . get ( name ) ;
82
+ private getPredicateIndex ( name : string , ra : string ) : number {
83
+ let index = this . raToIndex . get ( ra ) ;
73
84
if ( index === undefined ) {
74
- index = nameToIndex . size ;
75
- nameToIndex . set ( name , index ) ;
85
+ index = this . raToIndex . size ;
86
+ this . raToIndex . set ( ra , index ) ;
76
87
const {
77
88
names,
89
+ raHashes,
78
90
timeCosts,
79
91
tupleCosts,
80
92
iterationCounts,
81
93
evaluationCounts,
82
94
pipelineSummaryList,
95
+ dependencyLists,
83
96
} = this . data ;
84
97
names . push ( name ) ;
98
+ raHashes . push ( ra ) ;
85
99
timeCosts . push ( 0 ) ;
86
100
tupleCosts . push ( 0 ) ;
87
101
iterationCounts . push ( 0 ) ;
88
102
evaluationCounts . push ( 0 ) ;
89
103
pipelineSummaryList . push ( { } ) ;
104
+ dependencyLists . push ( [ ] ) ;
90
105
}
91
106
return index ;
92
107
}
@@ -96,46 +111,63 @@ export class PerformanceOverviewScanner {
96
111
}
97
112
98
113
onEvent ( event : SummaryEvent ) : void {
99
- if (
100
- event . completionType !== undefined &&
101
- event . completionType !== "SUCCESS"
102
- ) {
114
+ const { completionType, evaluationStrategy, predicateName, raHash } = event ;
115
+ if ( completionType !== undefined && completionType !== "SUCCESS" ) {
103
116
return ; // Skip any evaluation that wasn't successful
104
117
}
105
118
106
- switch ( event . evaluationStrategy ) {
107
- case "EXTENSIONAL" :
119
+ switch ( evaluationStrategy ) {
120
+ case "EXTENSIONAL" : {
121
+ break ;
122
+ }
108
123
case "COMPUTED_EXTENSIONAL" : {
124
+ if ( predicateName . startsWith ( "cached_" ) ) {
125
+ // Add a dependency from a cached COMPUTED_EXTENSIONAL to the predicate with the actual contents.
126
+ // The raHash of the this event may appear in a CACHE_HIT event in the other event log. The dependency
127
+ // we're adding here is needed in order to associate the original predicate with such a cache hit.
128
+ const originalName = predicateName . substring ( "cached_" . length ) ;
129
+ const originalIndex = this . nameToIndex . get ( originalName ) ;
130
+ if ( originalIndex != null ) {
131
+ const index = this . getPredicateIndex ( predicateName , raHash ) ;
132
+ this . data . dependencyLists [ index ] . push ( originalIndex ) ;
133
+ }
134
+ }
109
135
break ;
110
136
}
111
137
case "CACHE_HIT" :
112
138
case "CACHACA" : {
113
139
// Record a cache hit, but only if the predicate has not been seen before.
114
140
// We're mainly interested in the reuse of caches from an earlier query run as they can distort comparisons.
115
- if ( ! this . nameToIndex . has ( event . predicateName ) ) {
141
+ if ( ! this . raToIndex . has ( raHash ) ) {
116
142
this . data . cacheHitIndices . push (
117
- this . getPredicateIndex ( event . predicateName ) ,
143
+ this . getPredicateIndex ( predicateName , raHash ) ,
118
144
) ;
119
145
}
120
146
break ;
121
147
}
122
148
case "SENTINEL_EMPTY" : {
123
- this . data . sentinelEmptyIndices . push (
124
- this . getPredicateIndex ( event . predicateName ) ,
125
- ) ;
149
+ const index = this . getPredicateIndex ( predicateName , raHash ) ;
150
+ this . data . sentinelEmptyIndices . push ( index ) ;
151
+ const sentinelIndex = this . raToIndex . get ( event . sentinelRaHash ) ;
152
+ if ( sentinelIndex != null ) {
153
+ this . data . dependencyLists [ index ] . push ( sentinelIndex ) ; // needed for matching up cache hits
154
+ }
126
155
break ;
127
156
}
128
157
case "COMPUTE_RECURSIVE" :
129
158
case "COMPUTE_SIMPLE" :
159
+ case "NAMED_LOCAL" :
130
160
case "IN_LAYER" : {
131
- const index = this . getPredicateIndex ( event . predicateName ) ;
161
+ const index = this . getPredicateIndex ( predicateName , raHash ) ;
162
+ this . nameToIndex . set ( predicateName , index ) ;
132
163
let totalTime = 0 ;
133
164
let totalTuples = 0 ;
134
- if ( event . evaluationStrategy !== "IN_LAYER ") {
165
+ if ( evaluationStrategy === "COMPUTE_SIMPLE ") {
135
166
totalTime += event . millis ;
136
167
} else {
137
- // IN_LAYER events do no record of their total time.
138
- // Make a best-effort estimate by adding up the positive iteration times (they can be negative).
168
+ // Make a best-effort estimate of the total time by adding up the positive iteration times (they can be negative).
169
+ // Note that for COMPUTE_RECURSIVE the "millis" field contain the total time of the SCC, not just that predicate,
170
+ // but we don't have a good way to show that in the UI, so we rely on the accumulated iteration times.
139
171
for ( const millis of event . predicateIterationMillis ?? [ ] ) {
140
172
if ( millis > 0 ) {
141
173
totalTime += millis ;
@@ -148,13 +180,16 @@ export class PerformanceOverviewScanner {
148
180
iterationCounts,
149
181
evaluationCounts,
150
182
pipelineSummaryList,
183
+ dependencyLists,
151
184
} = this . data ;
152
185
const pipelineSummaries = pipelineSummaryList [ index ] ;
186
+ const dependencyList = dependencyLists [ index ] ;
153
187
for ( const { counts, raReference } of event . pipelineRuns ?? [ ] ) {
154
188
// Get or create the pipeline summary for this RA
155
189
const pipelineSummary = ( pipelineSummaries [ raReference ] ??= {
156
190
steps : event . ra [ raReference ] ,
157
191
counts : counts . map ( ( ) => 0 ) ,
192
+ hash : getPipelineHash ( event . ra [ raReference ] ) ,
158
193
} ) ;
159
194
const { counts : totalTuplesPerStep } = pipelineSummary ;
160
195
for ( let i = 0 , length = counts . length ; i < length ; ++ i ) {
@@ -169,6 +204,25 @@ export class PerformanceOverviewScanner {
169
204
totalTuplesPerStep [ i ] += count ;
170
205
}
171
206
}
207
+ for ( const dependencyHash of Object . values ( event . dependencies ?? { } ) ) {
208
+ const dependencyIndex = this . raToIndex . get ( dependencyHash ) ;
209
+ if ( dependencyIndex != null ) {
210
+ dependencyList . push ( dependencyIndex ) ;
211
+ }
212
+ }
213
+ // For predicates in the same SCC, add two-way dependencies with an arbitrary SCC member
214
+ const sccHash =
215
+ event . mainHash ??
216
+ ( evaluationStrategy === "COMPUTE_RECURSIVE" ? raHash : null ) ;
217
+ if ( sccHash != null ) {
218
+ const mainIndex = this . mainHashToRepr . get ( sccHash ) ;
219
+ if ( mainIndex == null ) {
220
+ this . mainHashToRepr . set ( sccHash , index ) ;
221
+ } else {
222
+ dependencyLists [ index ] . push ( mainIndex ) ;
223
+ dependencyLists [ mainIndex ] . push ( index ) ;
224
+ }
225
+ }
172
226
timeCosts [ index ] += totalTime ;
173
227
tupleCosts [ index ] += totalTuples ;
174
228
iterationCounts [ index ] += event . pipelineRuns ?. length ?? 0 ;
@@ -178,3 +232,11 @@ export class PerformanceOverviewScanner {
178
232
}
179
233
}
180
234
}
235
+
236
+ function getPipelineHash ( steps : string [ ] ) {
237
+ const md5 = createHash ( "md5" ) ;
238
+ for ( const step of steps ) {
239
+ md5 . write ( step ) ;
240
+ }
241
+ return md5 . digest ( "base64" ) ;
242
+ }
0 commit comments