Show the Stage ID and Task ID that corresponds to the max metric
digraph G {
0 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 0<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 962.0: task 1399))<br>remote merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 962.0: task 1399))<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 962.0: task 1399))<br>remote merged blocks fetched: 0<br>records read: 0<br>local bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 962.0: task 1399))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 962.0: task 1399))<br>remote bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 962.0: task 1399))<br>merged fetch fallback count: 0<br>local blocks read: 0<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 962.0: task 1399))<br>local merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 962.0: task 1399))<br>number of partitions: 16<br>remote reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 962.0: task 1399))<br>remote bytes read to disk total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 962.0: task 1399))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 962.0: task 1399))"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (6)\n \nduration: total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))";
2 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 0"];
}
3 [labelType="html" label="<b>SortAggregate</b><br><br>number of output rows: 2"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: total (min, med, max (stageId: taskId))\n8.1 s (119 ms, 157 ms, 248 ms (stage 961.0: task 1394))";
5 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))<br>peak memory total (min, med, max (stageId: taskId))<br>7.1 MiB (64.0 KiB, 64.0 KiB, 2.1 MiB (stage 961.0: task 1357))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))"];
}
6 [labelType="html" label="<b>StateStoreSave</b><br><br>number of shuffle partitions: 50<br>number of removed state rows: 0<br>data returned from Python workers total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))<br>number of total state rows: 7<br>number of state store instances: 50<br>memory used by state total (min, med, max (stageId: taskId))<br>27.1 KiB (432.0 B, 432.0 B, 1824.0 B (stage 961.0: task 1357))<br>count of cache hit on states cache in provider: 800<br>number of output rows: 2<br>estimated size of state only on current version total (min, med, max (stageId: taskId))<br>10.4 KiB (104.0 B, 104.0 B, 936.0 B (stage 961.0: task 1372))<br>number of rows which are dropped by watermark: 0<br>data sent to Python workers total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))<br>count of cache miss on states cache in provider: 0<br>time to commit changes total (min, med, max (stageId: taskId))<br>7.8 s (105 ms, 155 ms, 219 ms (stage 961.0: task 1395))<br>time to remove total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))<br>number of updated state rows: 2<br>time to update total (min, med, max (stageId: taskId))<br>235 ms (0 ms, 1 ms, 134 ms (stage 961.0: task 1394))<br>number of output rows: 0"];
7 [labelType="html" label="<b>SortAggregate</b><br><br>number of output rows: 2"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: total (min, med, max (stageId: taskId))\n104 ms (0 ms, 1 ms, 36 ms (stage 961.0: task 1357))";
9 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))<br>peak memory total (min, med, max (stageId: taskId))<br>7.1 MiB (64.0 KiB, 64.0 KiB, 2.1 MiB (stage 961.0: task 1357))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))"];
}
10 [labelType="html" label="<b>StateStoreRestore</b><br><br>number of output rows: 3"];
11 [labelType="html" label="<b>SortAggregate</b><br><br>number of output rows: 2"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n133 ms (0 ms, 1 ms, 29 ms (stage 961.0: task 1357))";
13 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>1 ms (0 ms, 0 ms, 1 ms (stage 961.0: task 1357))<br>peak memory total (min, med, max (stageId: taskId))<br>7.1 MiB (64.0 KiB, 64.0 KiB, 2.1 MiB (stage 961.0: task 1357))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))"];
}
14 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 3<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>4 ms (0 ms, 0 ms, 1 ms (stage 960.0: task 1345))<br>remote merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))<br>remote merged blocks fetched: 0<br>records read: 3<br>local bytes read total (min, med, max (stageId: taskId))<br>780.0 B (0.0 B, 0.0 B, 544.0 B (stage 961.0: task 1357))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))<br>remote bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))<br>merged fetch fallback count: 0<br>local blocks read: 3<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>1928.0 B (0.0 B, 0.0 B, 656.0 B (stage 960.0: task 1347))<br>local merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))<br>number of partitions: 50<br>remote reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))<br>remote bytes read to disk total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>780.0 B (0.0 B, 0.0 B, 272.0 B (stage 960.0: task 1347))"];
15 [labelType="html" label="<b>SortAggregate</b><br><br>number of output rows: 3"];
subgraph cluster16 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n2.2 s (0 ms, 0 ms, 569 ms (stage 960.0: task 1346))";
17 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 961.0: task 1351))<br>peak memory total (min, med, max (stageId: taskId))<br>6.2 MiB (0.0 B, 0.0 B, 2.1 MiB (stage 960.0: task 1347))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 961.0: task 1351))"];
18 [labelType="html" label="<br><b>Project</b><br><br>"];
}
19 [labelType="html" label="<br><b>EventTimeWatermark</b><br><br>"];
subgraph cluster20 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n2.2 s (0 ms, 0 ms, 553 ms (stage 960.0: task 1346))";
21 [labelType="html" label="<br><b>Project</b><br><br>"];
}
22 [labelType="html" label="<br><b>Project</b><br><br>"];
23 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 3"];
24 [labelType="html" label="<b>MicroBatchScan</b><br><br>number of output rows: 4<br>estimated number of fetched offsets out of range: 0<br>number of data loss error: 0"];
2->0;
3->2;
5->3;
6->5;
7->6;
9->7;
10->9;
11->10;
13->11;
14->13;
15->14;
17->15;
18->17;
19->18;
21->19;
22->21;
23->22;
24->23;
}
Project [data#23.title AS title#28, data#23.url AS url#29, data#23.uuid AS uuid#30, data#23.inventory_code AS inventory_code#32, data#23.created_at AS created_at#33, data#23.date AS date#41]
Filter ((((((isnotnull(value#8) AND NOT (RLIKE(from_json(StructField(agent,StringType,true), cast(value#8 as string), Some(Etc/UTC)).agent, Yeti|compatible|googlebot|google\.com\/bot\.html) <=> true)) AND (cast(from_json(StructField(date,StringType,true), cast(value#8 as string), Some(Etc/UTC)).date as date) = cast(from_utc_timestamp(2025-12-06 00:04:00.024, Asia/Seoul) as date))) AND (from_json(StructField(ad_type,StringType,true), cast(value#8 as string), Some(Etc/UTC)).ad_type = request)) AND isnotnull(from_json(StructField(title,StringType,true), cast(value#8 as string), Some(Etc/UTC)).title)) AND NOT (from_json(StructField(title,StringType,true), cast(value#8 as string), Some(Etc/UTC)).title = )) AND isnotnull(from_json(StructField(url,StringType,true), cast(value#8 as string), Some(Etc/UTC)).url))
MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan