Skip to content

Commit c0261ec

Browse files
authored
Merge pull request #4018 from asgerf/asgerf/compare-fixes
Compare performance: Detect 'shadowed' cache hits and a few other fixes
2 parents fd73c06 + bfbf5a4 commit c0261ec

File tree

3 files changed

+257
-59
lines changed

3 files changed

+257
-59
lines changed

extensions/ql-vscode/src/log-insights/log-summary.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ type EvaluationStrategy =
1616
| "EXTENSIONAL"
1717
| "SENTINEL_EMPTY"
1818
| "CACHACA"
19-
| "CACHE_HIT";
19+
| "CACHE_HIT"
20+
| "NAMED_LOCAL";
2021

2122
interface SummaryEventBase {
2223
evaluationStrategy: EvaluationStrategy;
@@ -28,6 +29,8 @@ interface SummaryEventBase {
2829

2930
interface ResultEventBase extends SummaryEventBase {
3031
resultSize: number;
32+
dependencies?: { [key: string]: string };
33+
mainHash?: string;
3134
}
3235

3336
export interface ComputeSimple extends ResultEventBase {
@@ -60,6 +63,15 @@ export interface InLayer extends ResultEventBase {
6063
predicateIterationMillis: number[];
6164
}
6265

66+
interface NamedLocal extends ResultEventBase {
67+
evaluationStrategy: "NAMED_LOCAL";
68+
deltaSizes: number[];
69+
ra: Ra;
70+
pipelineRuns: PipelineRun[];
71+
queryCausingWork?: string;
72+
predicateIterationMillis: number[];
73+
}
74+
6375
interface ComputedExtensional extends ResultEventBase {
6476
evaluationStrategy: "COMPUTED_EXTENSIONAL";
6577
queryCausingWork?: string;
@@ -92,4 +104,5 @@ export type SummaryEvent =
92104
| Extensional
93105
| SentinelEmpty
94106
| Cachaca
95-
| CacheHit;
107+
| CacheHit
108+
| NamedLocal;

extensions/ql-vscode/src/log-insights/performance-comparison.ts

Lines changed: 83 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import { createHash } from "crypto";
12
import type { SummaryEvent } from "./log-summary";
23

34
export interface PipelineSummary {
45
steps: string[];
56
/** Total counts for each step in the RA array, across all iterations */
67
counts: number[];
8+
hash: string;
79
}
810

911
/**
@@ -26,6 +28,9 @@ export interface PerformanceComparisonDataFromLog {
2628
*/
2729
names: string[];
2830

31+
/** RA hash of the `i`th predicate event */
32+
raHashes: string[];
33+
2934
/** Number of milliseconds spent evaluating the `i`th predicate from the `names` array. */
3035
timeCosts: number[];
3136

@@ -52,41 +57,51 @@ export interface PerformanceComparisonDataFromLog {
5257
* All the pipeline runs seen for the `i`th predicate from the `names` array.
5358
*/
5459
pipelineSummaryList: Array<Record<string, PipelineSummary>>;
60+
61+
/** All dependencies of the `i`th predicate from the `names` array, encoded as a list of indices in `names`. */
62+
dependencyLists: number[][];
5563
}
5664

5765
export class PerformanceOverviewScanner {
58-
private readonly nameToIndex = new Map<string, number>();
5966
private readonly data: PerformanceComparisonDataFromLog = {
6067
names: [],
68+
raHashes: [],
6169
timeCosts: [],
6270
tupleCosts: [],
6371
cacheHitIndices: [],
6472
sentinelEmptyIndices: [],
6573
pipelineSummaryList: [],
6674
evaluationCounts: [],
6775
iterationCounts: [],
76+
dependencyLists: [],
6877
};
78+
private readonly raToIndex = new Map<string, number>();
79+
private readonly mainHashToRepr = new Map<string, number>();
80+
private readonly nameToIndex = new Map<string, number>();
6981

70-
private getPredicateIndex(name: string): number {
71-
const { nameToIndex } = this;
72-
let index = nameToIndex.get(name);
82+
private getPredicateIndex(name: string, ra: string): number {
83+
let index = this.raToIndex.get(ra);
7384
if (index === undefined) {
74-
index = nameToIndex.size;
75-
nameToIndex.set(name, index);
85+
index = this.raToIndex.size;
86+
this.raToIndex.set(ra, index);
7687
const {
7788
names,
89+
raHashes,
7890
timeCosts,
7991
tupleCosts,
8092
iterationCounts,
8193
evaluationCounts,
8294
pipelineSummaryList,
95+
dependencyLists,
8396
} = this.data;
8497
names.push(name);
98+
raHashes.push(ra);
8599
timeCosts.push(0);
86100
tupleCosts.push(0);
87101
iterationCounts.push(0);
88102
evaluationCounts.push(0);
89103
pipelineSummaryList.push({});
104+
dependencyLists.push([]);
90105
}
91106
return index;
92107
}
@@ -96,46 +111,63 @@ export class PerformanceOverviewScanner {
96111
}
97112

98113
onEvent(event: SummaryEvent): void {
99-
if (
100-
event.completionType !== undefined &&
101-
event.completionType !== "SUCCESS"
102-
) {
114+
const { completionType, evaluationStrategy, predicateName, raHash } = event;
115+
if (completionType !== undefined && completionType !== "SUCCESS") {
103116
return; // Skip any evaluation that wasn't successful
104117
}
105118

106-
switch (event.evaluationStrategy) {
107-
case "EXTENSIONAL":
119+
switch (evaluationStrategy) {
120+
case "EXTENSIONAL": {
121+
break;
122+
}
108123
case "COMPUTED_EXTENSIONAL": {
124+
if (predicateName.startsWith("cached_")) {
125+
// Add a dependency from a cached COMPUTED_EXTENSIONAL to the predicate with the actual contents.
126+
// The raHash of the this event may appear in a CACHE_HIT event in the other event log. The dependency
127+
// we're adding here is needed in order to associate the original predicate with such a cache hit.
128+
const originalName = predicateName.substring("cached_".length);
129+
const originalIndex = this.nameToIndex.get(originalName);
130+
if (originalIndex != null) {
131+
const index = this.getPredicateIndex(predicateName, raHash);
132+
this.data.dependencyLists[index].push(originalIndex);
133+
}
134+
}
109135
break;
110136
}
111137
case "CACHE_HIT":
112138
case "CACHACA": {
113139
// Record a cache hit, but only if the predicate has not been seen before.
114140
// We're mainly interested in the reuse of caches from an earlier query run as they can distort comparisons.
115-
if (!this.nameToIndex.has(event.predicateName)) {
141+
if (!this.raToIndex.has(raHash)) {
116142
this.data.cacheHitIndices.push(
117-
this.getPredicateIndex(event.predicateName),
143+
this.getPredicateIndex(predicateName, raHash),
118144
);
119145
}
120146
break;
121147
}
122148
case "SENTINEL_EMPTY": {
123-
this.data.sentinelEmptyIndices.push(
124-
this.getPredicateIndex(event.predicateName),
125-
);
149+
const index = this.getPredicateIndex(predicateName, raHash);
150+
this.data.sentinelEmptyIndices.push(index);
151+
const sentinelIndex = this.raToIndex.get(event.sentinelRaHash);
152+
if (sentinelIndex != null) {
153+
this.data.dependencyLists[index].push(sentinelIndex); // needed for matching up cache hits
154+
}
126155
break;
127156
}
128157
case "COMPUTE_RECURSIVE":
129158
case "COMPUTE_SIMPLE":
159+
case "NAMED_LOCAL":
130160
case "IN_LAYER": {
131-
const index = this.getPredicateIndex(event.predicateName);
161+
const index = this.getPredicateIndex(predicateName, raHash);
162+
this.nameToIndex.set(predicateName, index);
132163
let totalTime = 0;
133164
let totalTuples = 0;
134-
if (event.evaluationStrategy !== "IN_LAYER") {
165+
if (evaluationStrategy === "COMPUTE_SIMPLE") {
135166
totalTime += event.millis;
136167
} else {
137-
// IN_LAYER events do no record of their total time.
138-
// Make a best-effort estimate by adding up the positive iteration times (they can be negative).
168+
// Make a best-effort estimate of the total time by adding up the positive iteration times (they can be negative).
169+
// Note that for COMPUTE_RECURSIVE the "millis" field contain the total time of the SCC, not just that predicate,
170+
// but we don't have a good way to show that in the UI, so we rely on the accumulated iteration times.
139171
for (const millis of event.predicateIterationMillis ?? []) {
140172
if (millis > 0) {
141173
totalTime += millis;
@@ -148,13 +180,16 @@ export class PerformanceOverviewScanner {
148180
iterationCounts,
149181
evaluationCounts,
150182
pipelineSummaryList,
183+
dependencyLists,
151184
} = this.data;
152185
const pipelineSummaries = pipelineSummaryList[index];
186+
const dependencyList = dependencyLists[index];
153187
for (const { counts, raReference } of event.pipelineRuns ?? []) {
154188
// Get or create the pipeline summary for this RA
155189
const pipelineSummary = (pipelineSummaries[raReference] ??= {
156190
steps: event.ra[raReference],
157191
counts: counts.map(() => 0),
192+
hash: getPipelineHash(event.ra[raReference]),
158193
});
159194
const { counts: totalTuplesPerStep } = pipelineSummary;
160195
for (let i = 0, length = counts.length; i < length; ++i) {
@@ -169,6 +204,25 @@ export class PerformanceOverviewScanner {
169204
totalTuplesPerStep[i] += count;
170205
}
171206
}
207+
for (const dependencyHash of Object.values(event.dependencies ?? {})) {
208+
const dependencyIndex = this.raToIndex.get(dependencyHash);
209+
if (dependencyIndex != null) {
210+
dependencyList.push(dependencyIndex);
211+
}
212+
}
213+
// For predicates in the same SCC, add two-way dependencies with an arbitrary SCC member
214+
const sccHash =
215+
event.mainHash ??
216+
(evaluationStrategy === "COMPUTE_RECURSIVE" ? raHash : null);
217+
if (sccHash != null) {
218+
const mainIndex = this.mainHashToRepr.get(sccHash);
219+
if (mainIndex == null) {
220+
this.mainHashToRepr.set(sccHash, index);
221+
} else {
222+
dependencyLists[index].push(mainIndex);
223+
dependencyLists[mainIndex].push(index);
224+
}
225+
}
172226
timeCosts[index] += totalTime;
173227
tupleCosts[index] += totalTuples;
174228
iterationCounts[index] += event.pipelineRuns?.length ?? 0;
@@ -178,3 +232,11 @@ export class PerformanceOverviewScanner {
178232
}
179233
}
180234
}
235+
236+
function getPipelineHash(steps: string[]) {
237+
const md5 = createHash("md5");
238+
for (const step of steps) {
239+
md5.write(step);
240+
}
241+
return md5.digest("base64");
242+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy