with event as (
select
aid,
data_type,
lat,
lon,
time,
time_part,
lat_part,
lon_part
from(
select
aid,
data_type,
lat,
lon,
time,
floor(unix_timestamp(time)/10) as time_part,
floor(lat/0.001) as lat_part,
floor(lon/0.001) as lon_part,
row_number() over(partition by aid,time,location order by time) as rn
from test where data_type in ('man','woman')
) total
where rn=1
),
source_pre as(
select
aid,
data_type,
lat,
lon,
time,
time_part,
ad.lat_part as lat_part,
ad.lon_part as lon_part
from (
select
aid,
data_type,
lat,
lon,
time,
time_part,
lat_part,
lon_part
from event
where data_type='man'
) source_original
lateral view explode(split(concat(lat_part-1,',',lat_part,',',lat_part+1),',')) ad
as lat_part
lateral view explode(split(concat(lon_part-1,',',lon_part,',',lon_part+1),',')) ad
as lon_part
),
target as (
select
aid,
data_type,
lat,
lon,
time,
cast(time_part as string) as time_part,
cast(lat_part as string) as lat_part,
cast(lon_part as string) as lon_part
from event where data_type='women'
),
source as (
select
aid,
data_type,
lat,
lon,
time,
ad.time_part as time_part,
lat_part,
lon_part
from (
select
aid,
data_type,
lat,
lon,
time,
time_part,
lat_part,
lon_part
from source_pre left semi join target
on source_pre.lat_part=target.lat_part and source_pre.lon_part=target.lon_part
)source_original
lateral view explode(split(concat(time_part-1,',',time_part,',',time_part+1),',')) ad
as time_part
)
select
source_aid,
target_aid
from (select
abs(unix_timestamp(source.time)-unix_timestamp(target.time)) as time_diff,
round(6378138*2*asin(sqrt(pow(sin((source.lat*pi()/180-target.lat*pi()/180)/2),2)+cos(source.lat*pi()/180)*cos(target.lat*pi()/180)* pow(sin((source.lon*pi()/180-target.lon*pi()/180)/2),2)))) as site_diff,
source.aid as source_aid,
target.aid as target_aid
from source join target
on source.lon_part=target.lon_part and source.lat_part=target.lat_part and source.time_part=target.time_part
) diff
where time_diff<=10 and site_diff<=150
评论