import vaex
import numpy as np
from .utils import _ensure_strings_from_expressions, _ensure_string_from_expression, as_flat_array
from .dataframe import docsubst
[docs]class DataFrameAccessorGeo(object):
"""Geometry/geographic helper methods
Example:
>>> df_xyz = df.geo.spherical2cartesian(df.longitude, df.latitude, df.distance)
>>> df_xyz.x.mean()
"""
[docs] def __init__(self, df):
self.df = df
[docs] @docsubst
def spherical2cartesian(self, alpha, delta, distance, xname="x", yname="y", zname="z",
propagate_uncertainties=False,
center=[0, 0, 0], radians=False, inplace=False):
"""Convert spherical to cartesian coordinates.
:param alpha:
:param delta: polar angle, ranging from the -90 (south pole) to 90 (north pole)
:param distance: radial distance, determines the units of x, y and z
:param xname:
:param yname:
:param zname:
:param propagate_uncertainties: {propagate_uncertainties}
:param center:
:param radians:
:return: New dataframe (in inplace is False) with new x,y,z columns
"""
df = self.df if inplace else self.df.copy()
alpha = df._expr(alpha)
delta = df._expr(delta)
distance = df._expr(distance)
if not radians:
df.add_variable('pi', np.pi)
alpha = alpha * df._expr('pi')/180
delta = delta * df._expr('pi')/180
# TODO: use sth like .optimize by default to get rid of the +0 ?
if center[0]:
df[xname] = np.cos(alpha) * np.cos(delta) * distance + center[0]
else:
df[xname] = np.cos(alpha) * np.cos(delta) * distance
if center[1]:
df[yname] = np.sin(alpha) * np.cos(delta) * distance + center[1]
else:
df[yname] = np.sin(alpha) * np.cos(delta) * distance
if center[2]:
df[zname] = np.sin(delta) * distance + center[2]
else:
df[zname] = np.sin(delta) * distance
if propagate_uncertainties:
df.propagate_uncertainties([df[xname], df[yname], df[zname]])
return df
[docs] def cartesian2spherical(self, x="x", y="y", z="z", alpha="l", delta="b", distance="distance", radians=False, center=None, center_name="solar_position", inplace=False):
"""Convert cartesian to spherical coordinates.
:param x:
:param y:
:param z:
:param alpha:
:param delta: name for polar angle, ranges from -90 to 90 (or -pi to pi when radians is True).
:param distance:
:param radians:
:param center:
:param center_name:
:return:
"""
df = self.df if inplace else self.df.copy()
df.add_variable('pi', np.pi)
transform = "" if radians else "*180./pi"
if center is not None:
df.add_variable(center_name, center)
if center is not None and center[0] != 0:
x = "({x} - {center_name}[0])".format(**locals())
if center is not None and center[1] != 0:
y = "({y} - {center_name}[1])".format(**locals())
if center is not None and center[2] != 0:
z = "({z} - {center_name}[2])".format(**locals())
df.add_virtual_column(distance, "sqrt({x}**2 + {y}**2 + {z}**2)".format(**locals()))
# df.add_virtual_column(alpha, "((arctan2({y}, {x}) + 2*pi) % (2*pi)){transform}".format(**locals()))
df.add_virtual_column(alpha, "arctan2({y}, {x}){transform}".format(**locals()))
df.add_virtual_column(delta, "(-arccos({z}/{distance})+pi/2){transform}".format(**locals()))
return df
[docs] def cartesian_to_polar(self, x="x", y="y", radius_out="r_polar", azimuth_out="phi_polar",
propagate_uncertainties=False,
radians=False, inplace=False):
"""Convert cartesian to polar coordinates
:param x: expression for x
:param y: expression for y
:param radius_out: name for the virtual column for the radius
:param azimuth_out: name for the virtual column for the azimuth angle
:param propagate_uncertainties: {propagate_uncertainties}
:param radians: if True, azimuth is in radians, defaults to degrees
:return:
"""
df = self.df if inplace else self.df.copy()
x = df._expr(x)
y = df._expr(y)
if radians:
to_degrees = ""
else:
df.add_variable('pi', np.pi)
to_degrees = "*180/pi"
r = np.sqrt(x**2 + y**2)
df[radius_out] = r
phi = np.arctan2(y, x)
if not radians:
phi = phi * 180/np.pi
df[azimuth_out] = phi
if propagate_uncertainties:
df.propagate_uncertainties([df[radius_out], df[azimuth_out]])
return df
[docs] @docsubst
def velocity_polar2cartesian(self, x='x', y='y', azimuth=None, vr='vr_polar', vazimuth='vphi_polar', vx_out='vx', vy_out='vy', propagate_uncertainties=False, inplace=False):
""" Convert cylindrical polar velocities to Cartesian.
:param x:
:param y:
:param azimuth: Optional expression for the azimuth in degrees , may lead to a better performance when given.
:param vr:
:param vazimuth:
:param vx_out:
:param vy_out:
:param propagate_uncertainties: {propagate_uncertainties}
"""
df = self.df if inplace else self.df.copy()
x = df._expr(x)
y = df._expr(y)
vr = df._expr(vr)
vazimuth = df._expr(vazimuth)
if azimuth is not None:
azimuth = df._expr(azimuth)
azimuth = np.deg2rad(azimuth)
else:
azimuth = np.arctan2(y, x)
azimuth = df._expr(azimuth)
df[vx_out] = vr * np.cos(azimuth) - vazimuth * np.sin(azimuth)
df[vy_out] = vr * np.sin(azimuth) + vazimuth * np.cos(azimuth)
if propagate_uncertainties:
df.propagate_uncertainties([df[vx_out], df[vy_out]])
return df
[docs] @docsubst
def velocity_cartesian2polar(self, x="x", y="y", vx="vx", radius_polar=None, vy="vy", vr_out="vr_polar", vazimuth_out="vphi_polar",
propagate_uncertainties=False, inplace=False):
"""Convert cartesian to polar velocities.
:param x:
:param y:
:param vx:
:param radius_polar: Optional expression for the radius, may lead to a better performance when given.
:param vy:
:param vr_out:
:param vazimuth_out:
:param propagate_uncertainties: {propagate_uncertainties}
:return:
"""
df = self.df if inplace else self.df.copy()
x = df._expr(x)
y = df._expr(y)
vx = df._expr(vx)
vy = df._expr(vy)
if radius_polar is None:
radius_polar = np.sqrt(x**2 + y**2)
radius_polar = df._expr(radius_polar)
df[vr_out] = (x*vx + y*vy) / radius_polar
df[vazimuth_out] = (x*vy - y*vx) / radius_polar
if propagate_uncertainties:
df.propagate_uncertainties([df[vr_out], df[vazimuth_out]])
return df
[docs] def velocity_cartesian2spherical(self, x="x", y="y", z="z", vx="vx", vy="vy", vz="vz", vr="vr", vlong="vlong", vlat="vlat", distance=None, inplace=False):
"""Convert velocities from a cartesian to a spherical coordinate system
TODO: uncertainty propagation
:param x: name of x column (input)
:param y: y
:param z: z
:param vx: vx
:param vy: vy
:param vz: vz
:param vr: name of the column for the radial velocity in the r direction (output)
:param vlong: name of the column for the velocity component in the longitude direction (output)
:param vlat: name of the column for the velocity component in the latitude direction, positive points to the north pole (output)
:param distance: Expression for distance, if not given defaults to sqrt(x**2+y**2+z**2), but if this column already exists, passing this expression may lead to a better performance
:return:
"""
# see http://www.astrosurf.com/jephem/library/li110spherCart_en.htm
df = self.df if inplace else self.df.copy()
if distance is None:
distance = "sqrt({x}**2+{y}**2+{z}**2)".format(**locals())
df.add_virtual_column(vr, "({x}*{vx}+{y}*{vy}+{z}*{vz})/{distance}".format(**locals()))
df.add_virtual_column(vlong, "-({vx}*{y}-{x}*{vy})/sqrt({x}**2+{y}**2)".format(**locals()))
df.add_virtual_column(vlat, "-({z}*({x}*{vx}+{y}*{vy}) - ({x}**2+{y}**2)*{vz})/( {distance}*sqrt({x}**2+{y}**2) )".format(**locals()))
return df
[docs] def project_aitoff(self, alpha, delta, x, y, radians=True, inplace=False):
"""Add aitoff (https://en.wikipedia.org/wiki/Aitoff_projection) projection
:param alpha: azimuth angle
:param delta: polar angle
:param x: output name for x coordinate
:param y: output name for y coordinate
:param radians: input and output in radians (True), or degrees (False)
:return:
"""
df = self.df if inplace else self.df.copy()
transform = "" if radians else "*pi/180."
aitoff_alpha = "__aitoff_alpha_%s_%s" % (alpha, delta)
# sanatize
aitoff_alpha = re.sub("[^a-zA-Z_]", "_", aitoff_alpha)
df.add_virtual_column(aitoff_alpha, "arccos(cos({delta}{transform})*cos({alpha}{transform}/2))".format(**locals()))
df.add_virtual_column(x, "2*cos({delta}{transform})*sin({alpha}{transform}/2)/sinc({aitoff_alpha}/pi)/pi".format(**locals()))
df.add_virtual_column(y, "sin({delta}{transform})/sinc({aitoff_alpha}/pi)/pi".format(**locals()))
return df
[docs] def project_gnomic(self, alpha, delta, alpha0=0, delta0=0, x="x", y="y", radians=False, postfix="", inplace=False):
"""Adds a gnomic projection to the DataFrame"""
df = self.df if inplace else self.df.copy()
df.add_variable('pi', np.pi)
if not radians:
alpha = "pi/180.*%s" % alpha
delta = "pi/180.*%s" % delta
alpha0 = alpha0 * np.pi / 180
delta0 = delta0 * np.pi / 180
transform = "" if radians else "*180./pi"
# aliases
ra = alpha
dec = delta
ra_center = alpha0
dec_center = delta0
gnomic_denominator = 'sin({dec_center}) * tan({dec}) + cos({dec_center}) * cos({ra} - {ra_center})'.format(**locals())
denominator_name = 'gnomic_denominator' + postfix
xi = 'sin({ra} - {ra_center})/{denominator_name}{transform}'.format(**locals())
eta = '(cos({dec_center}) * tan({dec}) - sin({dec_center}) * cos({ra} - {ra_center}))/{denominator_name}{transform}'.format(**locals())
df.add_virtual_column(denominator_name, gnomic_denominator)
df.add_virtual_column(x, xi)
df.add_virtual_column(y, eta)
return df
[docs] def rotation_2d(self, x, y, xnew, ynew, angle_degrees, propagate_uncertainties=False, inplace=False):
"""Rotation in 2d.
:param str x: Name/expression of x column
:param str y: idem for y
:param str xnew: name of transformed x column
:param str ynew:
:param float angle_degrees: rotation in degrees, anti clockwise
:return:
"""
df = self.df if inplace else self.df.copy()
x = _ensure_string_from_expression(x)
y = _ensure_string_from_expression(y)
theta = np.radians(angle_degrees)
matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
m = matrix_name = x + "_" + y + "_rot"
for i in range(2):
for j in range(2):
df.set_variable(matrix_name + "_%d%d" % (i, j), matrix[i, j].item())
df[xnew] = df._expr("{m}_00 * {x} + {m}_01 * {y}".format(**locals()))
df[ynew] = df._expr("{m}_10 * {x} + {m}_11 * {y}".format(**locals()))
if propagate_uncertainties:
df.propagate_uncertainties([df[xnew], df[ynew]])
return df
[docs] def bearing(self, lon1, lat1, lon2, lat2, bearing="bearing", inplace=False):
"""Calculates a bearing, based on http://www.movable-type.co.uk/scripts/latlong.html"""
df = self.df if inplace else self.df.copy()
df.add_variable('pi', np.pi)
lon1 = "(pickup_longitude * pi / 180)"
lon2 = "(dropoff_longitude * pi / 180)"
lat1 = "(pickup_latitude * pi / 180)"
lat2 = "(dropoff_latitude * pi / 180)"
p1 = lat1
p2 = lat2
l1 = lon1
l2 = lon2
# from http://www.movable-type.co.uk/scripts/latlong.html
expr = "arctan2(sin({l2}-{l1}) * cos({p2}), cos({p1})*sin({p2}) - sin({p1})*cos({p2})*cos({l2}-{l1}))" \
.format(**locals())
df.add_virtual_column(bearing, expr)
return df
[docs] @docsubst
def inside_which_polygons(self, x, y, pxss, pyss=None, any=True):
"""Find in which set of polygons (0 based index) a point resides.
If any=True, it will be the first matching polygon set index, if any=False, it will
be the first index that matches all polygons in the set.
>>> import vaex
>>> import numpy as np
>>> df = vaex.from_arrays(x=[1, 2, 3], y=[2, 3, 4])
>>> px = np.array([1.5, 2.5, 2.5, 1.5])
>>> py = np.array([2.5, 2.5, 3.5, 3.5])
>>> polygonA = [px, py]
>>> polygonB = [px + 1, py + 1]
>>> pxs = [[polygonA, polygonB], [polygonA]]
>>> df['polygon_index'] = df.geo.inside_which_polygons(df.x, df.y, pxs, any=True)
>>> df
# x y polygon_index
0 1 2 --
1 2 3 0
2 3 4 0
>>> df['polygon_index'] = df.geo.inside_which_polygons(df.x, df.y, pxs, any=False)
>>> df
# x y polygon_index
0 1 2 --
1 2 3 1
2 3 4 --
:param x: {expression_one}
:param y: {expression_one}
:param px: list of N ndarrays with x coordinates for the polygon, N is the number of polygons
:param px: list of N ndarrays with y coordinates for the polygon, if None, the shape of the ndarrays
of the last dimention of the x arrays should be 2 (i.e. have the x and y coordinates)
:param any: test if point it in any polygon (logically or), or all polygons (logically and)
:return: Expression, 0 based index to which polygon the point belongs (or missing/masked value)
"""
if pyss is None:
list_of_polygons = pxss
# polygon is a list of vertices shape: (N, 2)
list_of_polygons = [[as_flat_array(np.asarray(polygon), np.float64) for polygon in polygons]
for polygons in list_of_polygons]
pxss = [[as_flat_array(polygon[0]) for polygon in polygons] for polygons in list_of_polygons]
pyss = [[as_flat_array(polygon[1]) for polygon in polygons] for polygons in list_of_polygons]
else:
pxss = [[as_flat_array(np.asarray(pxs), np.float64) for pxs in polygons_x]
for polygons_x in pxss]
pyss = [[as_flat_array(np.asarray(pys), np.float64) for pys in polygons_y]
for polygons_y in pyss]
N = len(pxss)
# iterate over polygon sets
meanxss = []
meanyss = []
radiii = []
for region_index in range(N):
pxs = pxss[region_index]
pys = pyss[region_index]
meanxs = []
meanys = []
radii = []
for polygons_index in range(len(pxs)):
meanx = pxs[polygons_index].mean()
meany = pys[polygons_index].mean()
radius = np.sqrt((meanx - pxs[polygons_index])**2 + (meany - pys[polygons_index])**2).max()
meanxs.append(meanx)
meanys.append(meany)
radii.append(radius)
meanxss.append(meanxs)
meanyss.append(meanys)
radiii.append(radii)
# this method is simply a wrapper around the true function, which also wants to have the
# radii, which are precalculated for performance reasons
return self.df.func.geo_inside_which_polygons(x, y, pxss, pyss, meanxss, meanyss, radiii, any=any)
@vaex.register_function(df_accessor=DataFrameAccessorGeo, name='inside_polygon')
def geo_inside_polygon(x, y, px, py):
"""Test if points defined by x and y are inside the polygon px, py
Example:
>>> import vaex
>>> import numpy as np
>>> df = vaex.from_arrays(x=[1, 2, 3], y=[2, 3, 4])
>>> px = np.array([1.5, 2.5, 2.5, 1.5])
>>> py = np.array([2.5, 2.5, 3.5, 3.5])
>>> df['inside'] = df.geo.inside_polygon(df.x, df.y, px, py)
>>> df
# x y inside
0 1 2 False
1 2 3 True
2 3 4 False
:param x: {expression_one}
:param y: {expression_one}
:param px: list of x coordinates for the polygon
:param px: list of y coordinates for the polygon
:return: Expression, which is true if point is inside, else false.
"""
x = vaex.array_types.to_numpy(x)
y = vaex.array_types.to_numpy(y)
x = as_flat_array(x, np.float64)
y = as_flat_array(y, np.float64)
px = as_flat_array(np.asarray(px), np.float64)
py = as_flat_array(np.asarray(py), np.float64)
mask = np.zeros(len(x), dtype=bool)
meanx = px.mean()
meany = py.mean()
radius = np.sqrt((meanx - px)**2 + (meany - py)**2).max()
vaex.vaexfast.pnpoly(px, py, x, y, mask, meanx, meany, radius)
return mask
@docsubst
@vaex.register_function(df_accessor=DataFrameAccessorGeo, name='inside_polygons')
def geo_inside_polygons(x, y, pxs, pys, any=True):
"""Test if points defined by x and y are inside all or any of the the polygons px, py
Example:
>>> import vaex
>>> import numpy as np
>>> df = vaex.from_arrays(x=[1, 2, 3], y=[2, 3, 4])
>>> px = np.array([1.5, 2.5, 2.5, 1.5])
>>> py = np.array([2.5, 2.5, 3.5, 3.5])
>>> df['inside'] = df.geo.inside_polygons(df.x, df.y, [px, px + 1], [py, py + 1], any=True)
>>> df
# x y inside
0 1 2 False
1 2 3 True
2 3 4 True
:param x: {expression_one}
:param y: {expression_one}
:param pxs: list of N ndarrays with x coordinates for the polygon, N is the number of polygons
:param pxs: list of N ndarrays with y coordinates for the polygon
:param any: return true if in any polygon, or all polygons
:return: Expression , which is true if point is inside, else false.
"""
x = vaex.array_types.to_numpy(x)
y = vaex.array_types.to_numpy(y)
x = as_flat_array(x, np.float64)
y = as_flat_array(y, np.float64)
mask = np.zeros(len(x), dtype=bool)
N = len(pxs)
submask = np.zeros(len(x), dtype=bool)
if N > 0:
pxs = [vaex.arrow.numpy_dispatch.unwrap(k) for k in pxs]
pys = [vaex.arrow.numpy_dispatch.unwrap(k) for k in pys]
for i in range(0, N):
px = as_flat_array(pxs[i], np.float64)
py = as_flat_array(pys[i], np.float64)
meanx = px.mean()
meany = py.mean()
radius = np.sqrt((meanx - px)**2 + (meany - py)**2).max()
vaex.vaexfast.pnpoly(px, py, x, y, submask if i > 0 else mask, meanx, meany, radius)
if i > 0:
if any:
mask = mask | submask
else:
mask = mask & submask
return mask
@docsubst
@vaex.register_function(df_accessor=DataFrameAccessorGeo, name='inside_which_polygon')
def geo_inside_which_polygon(x, y, pxs, pys):
"""Find in which polygon (0 based index) a point resides
Example:
>>> import vaex
>>> import numpy as np
>>> df = vaex.from_arrays(x=[1, 2, 3], y=[2, 3, 4])
>>> px = np.array([1.5, 2.5, 2.5, 1.5])
>>> py = np.array([2.5, 2.5, 3.5, 3.5])
>>> df['polygon_index'] = df.geo.inside_which_polygon(df.x, df.y, [px, px + 1], [py, py + 1])
>>> df
# x y polygon_index
0 1 2 --
1 2 3 0
2 3 4 1
:param x: {expression_one}
:param y: {expression_one}
:param px: list of N ndarrays with x coordinates for the polygon, N is the number of polygons
:param px: list of N ndarrays with y coordinates for the polygon
:return: Expression, 0 based index to which polygon the point belongs (or missing/masked value)
"""
x = vaex.array_types.to_numpy(x)
y = vaex.array_types.to_numpy(y)
x = as_flat_array(x, np.float64)
y = as_flat_array(y, np.float64)
# list do not get unwrapped
pxs = [vaex.arrow.numpy_dispatch.unwrap(k) for k in pxs]
pys = [vaex.arrow.numpy_dispatch.unwrap(k) for k in pys]
pxs = as_flat_array(np.asarray(pxs), np.float64)
pys = as_flat_array(np.asarray(pys), np.float64)
polygon_indices = np.zeros(len(x), dtype=np.int32)
polygon_mask = np.ones(len(x), dtype=bool)
polygon_indices = np.ma.array(polygon_indices, mask=polygon_mask)
indices = np.arange(len(x), dtype=np.uint32)
inside_mask = np.zeros(len(x), dtype=bool)
meanx = pxs.mean()
meany = pys.mean()
radius = np.sqrt((meanx - pxs)**2 + (meany - pys)**2).max()
N = len(pxs)
# mask = mask | submask
for i in range(N):
vaex.vaexfast.pnpoly(pxs[i], pys[i], x, y, inside_mask, meanx, meany, radius)
# mark all points that are inside with the index of the polygon
polygon_indices.data[indices[inside_mask]] = i
polygon_indices.mask[indices[inside_mask]] = False
# now remove all values that found a matching polygon
x = x[~inside_mask]
y = y[~inside_mask]
# and keep track of where they point to in the polygon_indices array
indices = indices[~inside_mask]
# trim mask
inside_mask = inside_mask[:len(x)]
if len(x) == 0:
break
return polygon_indices
@docsubst
@vaex.register_function()
def geo_inside_which_polygons(x, y, pxss, pyss, meanxss, meanyss, radiii, any):
# real implementation of geo.inside_which_polygon
x = vaex.array_types.to_numpy(x)
y = vaex.array_types.to_numpy(y)
x = as_flat_array(x, np.float64)
y = as_flat_array(y, np.float64)
polygon_indices = np.zeros(len(x), dtype=np.int32)
polygon_mask = np.ones(len(x), dtype=bool)
polygon_indices = np.ma.array(polygon_indices, mask=polygon_mask)
indices = np.arange(len(x), dtype=np.uint32)
inside_mask = np.zeros(len(x), dtype=bool)
N = len(pxss)
for i in range(N):
pxs = pxss[i]
pys = pyss[i]
pxs = [vaex.arrow.numpy_dispatch.unwrap(k) for k in pxs]
pys = [vaex.arrow.numpy_dispatch.unwrap(k) for k in pys]
meanxs = meanxss[i]
meanys = meanyss[i]
radii = radiii[i]
M = len(pxs)
if M > 0:
vaex.vaexfast.pnpoly(pxs[0], pys[0], x, y, inside_mask, meanxs[0], meanys[0], radii[0])
if M > 1:
inside_sub_mask = np.zeros(len(x), dtype=bool)
for j in range(1, M):
vaex.vaexfast.pnpoly(pxs[j], pys[j], x, y, inside_sub_mask, meanxs[j], meanys[j], radii[j])
if any:
inside_mask = inside_mask | inside_sub_mask
else:
inside_mask = inside_mask & inside_sub_mask
# mark all points that are inside with the index of the polygon
polygon_indices.data[indices[inside_mask]] = i
polygon_indices.mask[indices[inside_mask]] = False
# no remove all values that found a matching polygon
x = x[~inside_mask]
y = y[~inside_mask]
# and keep track of where they point to in the polygon_indices array
indices = indices[~inside_mask]
# trim mask
inside_mask = inside_mask[:len(x)]
if len(x) == 0:
break
return polygon_indices