digraph G {
0 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>1 ms (0 ms, 0 ms, 1 ms (stage 577.0: task 835))<br>remote merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 614.0: task 867))<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 614.0: task 867))<br>remote merged blocks fetched: 0<br>records read: 1<br>local bytes read total (min, med, max (stageId: taskId))<br>130.0 B (0.0 B, 0.0 B, 130.0 B (stage 578.0: task 855))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 614.0: task 867))<br>remote bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 614.0: task 867))<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>104.0 B (0.0 B, 0.0 B, 104.0 B (stage 577.0: task 835))<br>local merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 614.0: task 867))<br>number of partitions: 16<br>remote reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 614.0: task 867))<br>remote bytes read to disk total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 614.0: task 867))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>130.0 B (0.0 B, 0.0 B, 130.0 B (stage 577.0: task 835))"];
1 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 1<br>time in aggregation build total (min, med, max (stageId: taskId))<br>7.3 s (116 ms, 147 ms, 185 ms (stage 577.0: task 824))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>number of sort fallback tasks: 0"];
2 [labelType="html" label="<b>StateStoreSave</b><br><br>number of shuffle partitions: 50<br>number of removed state rows: 0<br>data returned from Python workers total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>number of total state rows: 3<br>number of state store instances: 50<br>memory used by state total (min, med, max (stageId: taskId))<br>22.1 KiB (432.0 B, 432.0 B, 808.0 B (stage 577.0: task 830))<br>count of cache hit on states cache in provider: 400<br>number of output rows: 1<br>estimated size of state only on current version total (min, med, max (stageId: taskId))<br>6.0 KiB (104.0 B, 104.0 B, 448.0 B (stage 577.0: task 830))<br>number of rows which are dropped by watermark: 0<br>data sent to Python workers total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>count of cache miss on states cache in provider: 0<br>time to commit changes total (min, med, max (stageId: taskId))<br>7.2 s (87 ms, 147 ms, 184 ms (stage 577.0: task 823))<br>time to remove total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 577.0: task 809))<br>number of updated state rows: 1<br>time to update total (min, med, max (stageId: taskId))<br>69 ms (0 ms, 0 ms, 69 ms (stage 577.0: task 835))<br>number of output rows: 0"];
3 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 1<br>time in aggregation build total (min, med, max (stageId: taskId))<br>8 ms (0 ms, 0 ms, 8 ms (stage 577.0: task 835))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>number of sort fallback tasks: 0"];
4 [labelType="html" label="<b>StateStoreRestore</b><br><br>number of output rows: 1"];
5 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 1<br>time in aggregation build total (min, med, max (stageId: taskId))<br>14 ms (0 ms, 0 ms, 14 ms (stage 577.0: task 835))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>number of sort fallback tasks: 0"];
6 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>1 ms (0 ms, 0 ms, 1 ms (stage 576.0: task 804))<br>remote merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 577.0: task 809))<br>remote merged blocks fetched: 0<br>records read: 1<br>local bytes read total (min, med, max (stageId: taskId))<br>146.0 B (0.0 B, 0.0 B, 146.0 B (stage 577.0: task 835))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 577.0: task 809))<br>remote bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>144.0 B (0.0 B, 0.0 B, 144.0 B (stage 576.0: task 804))<br>local merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>number of partitions: 50<br>remote reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 577.0: task 809))<br>remote bytes read to disk total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>146.0 B (0.0 B, 0.0 B, 146.0 B (stage 576.0: task 804))"];
7 [labelType="html" label="<b>ObjectHashAggregate</b><br><br>number of output rows: 1<br>time in aggregation build total (min, med, max (stageId: taskId))<br>584 ms (0 ms, 0 ms, 584 ms (stage 576.0: task 804))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 577.0: task 809))<br>number of sort fallback tasks: 0"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n584 ms (0 ms, 0 ms, 584 ms (stage 576.0: task 804))";
9 [labelType="html" label="<br><b>Project</b><br><br>"];
}
10 [labelType="html" label="<br><b>EventTimeWatermark</b><br><br>"];
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n584 ms (0 ms, 0 ms, 584 ms (stage 576.0: task 804))";
12 [labelType="html" label="<br><b>Project</b><br><br>"];
}
13 [labelType="html" label="<br><b>Project</b><br><br>"];
14 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1"];
15 [labelType="html" label="<b>MicroBatchScan</b><br><br>number of output rows: 1<br>estimated number of fetched offsets out of range: 0<br>number of data loss error: 0"];
1->0;
2->1;
3->2;
4->3;
5->4;
6->5;
7->6;
9->7;
10->9;
12->10;
13->12;
14->13;
15->14;
}
16
Exchange RoundRobinPartitioning(16), REPARTITION_BY_NUM, [plan_id=3165]
ObjectHashAggregate(keys=[host#717, date#41], functions=[sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), collect_set(domain#40, 0, 0)])
StateStoreSave [host#717, date#41], state info [ checkpoint = file:/app/checkpoint/2025_12_06/referrals_watermark3/state, runId = 184404a1-7672-47f3-81a6-842372b55bc2, opId = 0, ver = 4, numPartitions = 50], Update, 1765036357000, 1765037036000, 2
ObjectHashAggregate(keys=[host#717, date#41], functions=[merge_sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), merge_sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), merge_sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), merge_collect_set(domain#40, 0, 0)])
StateStoreRestore [host#717, date#41], state info [ checkpoint = file:/app/checkpoint/2025_12_06/referrals_watermark3/state, runId = 184404a1-7672-47f3-81a6-842372b55bc2, opId = 0, ver = 4, numPartitions = 50], 2
ObjectHashAggregate(keys=[host#717, date#41], functions=[merge_sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), merge_sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), merge_sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), merge_collect_set(domain#40, 0, 0)])
Exchange hashpartitioning(host#717, date#41, 50), ENSURE_REQUIREMENTS, [plan_id=3159]
ObjectHashAggregate(keys=[host#717, date#41], functions=[partial_sum(CASE WHEN (ad_type#26 = request) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (ad_type#26 = impression) THEN 1 ELSE 0 END), partial_sum(CASE WHEN NOT ad_type#26 IN (impression,request) THEN 1 ELSE 0 END), partial_collect_set(domain#40, 0, 0)])
Project [ad_type#26, domain#40, date#41, regexp_extract(reference_link#34, ^(?:https?:\/\/)?(?:www\.)?([^\/:]+), 1) AS host#717]
WholeStageCodegen (2)
EventTimeWatermark created_at#33: timestamp, 15 minutes
Project [data#23.ad_type AS ad_type#26, data#23.created_at AS created_at#33, data#23.reference_link AS reference_link#34, data#23.domain AS domain#40, data#23.date AS date#41]
WholeStageCodegen (1)
Project [from_json(StructField(ip,StringType,true), StructField(ad_type,StringType,true), StructField(bot,IntegerType,true), StructField(title,StringType,true), StructField(url,StringType,true), StructField(uuid,StringType,true), StructField(keyword,StringType,true), StructField(inventory_code,IntegerType,true), StructField(created_at,TimestampType,true), StructField(reference_link,StringType,true), StructField(agent,StringType,true), StructField(referral_type,StringType,true), StructField(tracking_code,StringType,true), StructField(sub_id,StringType,true), StructField(click_type,StringType,true), StructField(domain,StringType,true), StructField(date,StringType,true), StructField(hour,IntegerType,true), StructField(inventory_ad_type,IntegerType,true), StructField(inventory_type_id,IntegerType,true), StructField(is_except_uuid,IntegerType,true), StructField(is_block_uuid,IntegerType,true), StructField(is_except_ip,IntegerType,true), StructField(is_block_ip,IntegerType,true), ... 8 more fields) AS data#23]
Filter ((((((isnotnull(value#8) AND NOT (RLIKE(from_json(StructField(agent,StringType,true), cast(value#8 as string), Some(Etc/UTC)).agent, Yeti|compatible|googlebot|google\.com\/bot\.html) <=> true)) AND (cast(from_json(StructField(date,StringType,true), cast(value#8 as string), Some(Etc/UTC)).date as date) = cast(from_utc_timestamp(2025-12-05 23:04:00.022, Asia/Seoul) as date))) AND isnotnull(regexp_extract(from_json(StructField(reference_link,StringType,true), cast(value#8 as string), Some(Etc/UTC)).reference_link, ^(?:https?:\/\/)?(?:www\.)?([^\/:]+), 1))) AND NOT RLIKE(regexp_extract(from_json(StructField(reference_link,StringType,true), cast(value#8 as string), Some(Etc/UTC)).reference_link, ^(?:https?:\/\/)?(?:www\.)?([^\/:]+), 1), .*\.googlesyndication\.com$)) AND isnotnull(from_json(StructField(domain,StringType,true), cast(value#8 as string), Some(Etc/UTC)).domain)) AND NOT RLIKE(from_json(StructField(domain,StringType,true), cast(value#8 as string), Some(Etc/UTC)).domain, .*\.googlesyndication\.com$))
MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan