digraph G {
0 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 337.0: task 494))<br>remote merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 365.0: task 528))<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 365.0: task 528))<br>remote merged blocks fetched: 0<br>records read: 1<br>local bytes read total (min, med, max (stageId: taskId))<br>144.0 B (0.0 B, 0.0 B, 144.0 B (stage 353.0: task 524))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 365.0: task 528))<br>remote bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 365.0: task 528))<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>128.0 B (0.0 B, 0.0 B, 128.0 B (stage 337.0: task 494))<br>local merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 365.0: task 528))<br>number of partitions: 16<br>remote reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 365.0: task 528))<br>remote bytes read to disk total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 365.0: task 528))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>144.0 B (0.0 B, 0.0 B, 144.0 B (stage 337.0: task 494))"];
1 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 1<br>time in aggregation build total (min, med, max (stageId: taskId))<br>8.3 s (127 ms, 159 ms, 234 ms (stage 337.0: task 495))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>number of sort fallback tasks: 0"];
2 [labelType="html" label="<b>StateStoreSave</b><br><br>number of shuffle partitions: 50<br>number of removed state rows: 0<br>data returned from Python workers total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>number of total state rows: 1<br>number of state store instances: 50<br>memory used by state total (min, med, max (stageId: taskId))<br>21.6 KiB (432.0 B, 432.0 B, 968.0 B (stage 337.0: task 494))<br>count of cache hit on states cache in provider: 200<br>number of output rows: 1<br>estimated size of state only on current version total (min, med, max (stageId: taskId))<br>5.4 KiB (104.0 B, 104.0 B, 448.0 B (stage 337.0: task 494))<br>number of rows which are dropped by watermark: 0<br>data sent to Python workers total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>count of cache miss on states cache in provider: 0<br>time to commit changes total (min, med, max (stageId: taskId))<br>8.1 s (81 ms, 158 ms, 233 ms (stage 337.0: task 495))<br>time to remove total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 337.0: task 469))<br>number of updated state rows: 1<br>time to update total (min, med, max (stageId: taskId))<br>116 ms (0 ms, 0 ms, 116 ms (stage 337.0: task 494))<br>number of output rows: 0"];
3 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 1<br>time in aggregation build total (min, med, max (stageId: taskId))<br>27 ms (0 ms, 0 ms, 27 ms (stage 337.0: task 494))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>number of sort fallback tasks: 0"];
4 [labelType="html" label="<b>StateStoreRestore</b><br><br>number of output rows: 2"];
5 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 1<br>time in aggregation build total (min, med, max (stageId: taskId))<br>11 ms (0 ms, 0 ms, 11 ms (stage 337.0: task 494))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>number of sort fallback tasks: 0"];
6 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 2<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>3 ms (0 ms, 0 ms, 2 ms (stage 336.0: task 468))<br>remote merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 337.0: task 469))<br>remote merged blocks fetched: 0<br>records read: 2<br>local bytes read total (min, med, max (stageId: taskId))<br>254.0 B (0.0 B, 0.0 B, 254.0 B (stage 337.0: task 494))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 337.0: task 469))<br>remote bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>merged fetch fallback count: 0<br>local blocks read: 2<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>264.0 B (0.0 B, 0.0 B, 136.0 B (stage 336.0: task 467))<br>local merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>number of partitions: 50<br>remote reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 337.0: task 469))<br>remote bytes read to disk total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 337.0: task 469))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>254.0 B (0.0 B, 0.0 B, 132.0 B (stage 336.0: task 467))"];
7 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 2<br>time in aggregation build total (min, med, max (stageId: taskId))<br>1.2 s (0 ms, 0 ms, 591 ms (stage 336.0: task 467))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 336.0: task 467))<br>number of sort fallback tasks: 0"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n1.2 s (0 ms, 0 ms, 591 ms (stage 336.0: task 467))";
9 [labelType="html" label="<br><b>Project</b><br><br>"];
}
10 [labelType="html" label="<br><b>EventTimeWatermark</b><br><br>"];
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n1.2 s (0 ms, 0 ms, 592 ms (stage 336.0: task 468))";
12 [labelType="html" label="<br><b>Project</b><br><br>"];
}
13 [labelType="html" label="<br><b>Project</b><br><br>"];
14 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 2"];
15 [labelType="html" label="<b>MicroBatchScan</b><br><br>number of output rows: 2<br>estimated number of fetched offsets out of range: 0<br>number of data loss error: 0"];
1->0;
2->1;
3->2;
4->3;
5->4;
6->5;
7->6;
9->7;
10->9;
12->10;
13->12;
14->13;
15->14;
}
16
Exchange RoundRobinPartitioning(16), REPARTITION_BY_NUM, [plan_id=1909]
ObjectHashAggregate(keys=[host#717, date#41], functions=[sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), collect_set(domain#40, 0, 0)])
StateStoreSave [host#717, date#41], state info [ checkpoint = file:/app/checkpoint/2025_12_06/referrals_watermark3/state, runId = 184404a1-7672-47f3-81a6-842372b55bc2, opId = 0, ver = 2, numPartitions = 50], Update, 0, 1765035446000, 2
ObjectHashAggregate(keys=[host#717, date#41], functions=[merge_sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), merge_sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), merge_sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), merge_collect_set(domain#40, 0, 0)])
StateStoreRestore [host#717, date#41], state info [ checkpoint = file:/app/checkpoint/2025_12_06/referrals_watermark3/state, runId = 184404a1-7672-47f3-81a6-842372b55bc2, opId = 0, ver = 2, numPartitions = 50], 2
ObjectHashAggregate(keys=[host#717, date#41], functions=[merge_sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), merge_sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), merge_sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), merge_collect_set(domain#40, 0, 0)])
Exchange hashpartitioning(host#717, date#41, 50), ENSURE_REQUIREMENTS, [plan_id=1903]
ObjectHashAggregate(keys=[host#717, date#41], functions=[partial_sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), partial_sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), partial_collect_set(domain#40, 0, 0)])
Project [ad_type#26, domain#40, date#41, regexp_extract(reference_link#34, ^(?:https?:\/\/)?(?:www\.)?([^\/:]+), 1) AS host#717]
WholeStageCodegen (2)
EventTimeWatermark created_at#33: timestamp, 15 minutes
Project [data#23.ad_type AS ad_type#26, data#23.created_at AS created_at#33, data#23.reference_link AS reference_link#34, data#23.domain AS domain#40, data#23.date AS date#41]
WholeStageCodegen (1)
Project [from_json(StructField(ip,StringType,true), StructField(ad_type,StringType,true), StructField(bot,IntegerType,true), StructField(title,StringType,true), StructField(url,StringType,true), StructField(uuid,StringType,true), StructField(keyword,StringType,true), StructField(inventory_code,IntegerType,true), StructField(created_at,TimestampType,true), StructField(reference_link,StringType,true), StructField(agent,StringType,true), StructField(referral_type,StringType,true), StructField(tracking_code,StringType,true), StructField(sub_id,StringType,true), StructField(click_type,StringType,true), StructField(domain,StringType,true), StructField(date,StringType,true), StructField(hour,IntegerType,true), StructField(inventory_ad_type,IntegerType,true), StructField(inventory_type_id,IntegerType,true), StructField(is_except_uuid,IntegerType,true), StructField(is_block_uuid,IntegerType,true), StructField(is_except_ip,IntegerType,true), StructField(is_block_ip,IntegerType,true), ... 8 more fields) AS data#23]
Filter ((((((isnotnull(value#8) AND NOT (RLIKE(from_json(StructField(agent,StringType,true), cast(value#8 as string), Some(Etc/UTC)).agent, Yeti|compatible|googlebot|google\.com\/bot\.html) <=> true)) AND (cast(from_json(StructField(date,StringType,true), cast(value#8 as string), Some(Etc/UTC)).date as date) = cast(from_utc_timestamp(2025-12-05 22:18:00.014, Asia/Seoul) as date))) AND isnotnull(regexp_extract(from_json(StructField(reference_link,StringType,true), cast(value#8 as string), Some(Etc/UTC)).reference_link, ^(?:https?:\/\/)?(?:www\.)?([^\/:]+), 1))) AND NOT RLIKE(regexp_extract(from_json(StructField(reference_link,StringType,true), cast(value#8 as string), Some(Etc/UTC)).reference_link, ^(?:https?:\/\/)?(?:www\.)?([^\/:]+), 1), .*\.googlesyndication\.com$)) AND isnotnull(from_json(StructField(domain,StringType,true), cast(value#8 as string), Some(Etc/UTC)).domain)) AND NOT RLIKE(from_json(StructField(domain,StringType,true), cast(value#8 as string), Some(Etc/UTC)).domain, .*\.googlesyndication\.com$))
MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan