Vaex server¶
Why¶
There are various cases where the calculations and/or aggregations need to happen on a different computer than where the (aggregated) data is needed. For instance, when making a dashboard, the dashboard server might not be powerful enough for the calculations. Another example is where the client lives in a different process, such as a browser.
Starting the dataframe server¶
Use our server first
You can skip running your own server and first try out using https://dataframe.vaex.io
The vaex (web) server can be started from the command line like:
$ vaex server --port 8082 /data/taxi/yellow_taxi_2012.hdf5 gaia=/data/gaia/gaia-edr3-x-ps1.hdf5
INFO:MainThread:vaex.server:yellow_taxi_2012: http://0.0.0.0:8082/dataset/yellow_taxi_2012 for REST or ws://0.0.0.0:8082/yellow_taxi_2012 for websocket
INFO:MainThread:vaex.server:gaia: http://0.0.0.0:8082/dataset/gaia for REST or ws://0.0.0.0:8082/gaia for websocket
INFO: Started server process [617048]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:8082 (Press CTRL+C to quit)
Pass files on the command line, or query help by passing the --help
flag.
Python API¶
When the client is a Python program, the easiest API is the remote dataframe in the vaex
packages itself. This does not use the REST API, but communicates over a websocket for low latency bi-directional communication.
import vaex
# the data is kept remote
df = vaex.open('vaex+wss://dataframe.vaex.io/example')
# only the result of the aggregations are send over the wire
df.x.mean()
This means you can use almost all features of a normal (local) Vaex dataframe, without having to download the data.
REST API¶
When the client is non-Python, or when you want to avoid the vaex
dependency, the REST API can be used.
A Vaex server is running at dataframe.vaex.io
and it’s API documentation can be browsed at https://dataframe.vaex.io/docs
Some endpoints can be easily queries using curl
$ curl -i https://dataframe.vaex.io/histogram/example/x\?shape\=16
HTTP/1.1 200 OK
Server: nginx/1.18.0 (Ubuntu)
Date: Thu, 01 Apr 2021 11:23:16 GMT
Content-Type: application/json
Content-Length: 430
Connection: keep-alive
x-process-time: 0.03632664680480957
x-data-passes: 2
{"dataset_id":"example","centers":[-71.61332178115845,-58.57391309738159,-45.534504413604736,-32.49509572982788,-19.455687046051025,-6.41627836227417,6.6231303215026855,19.66253900527954,32.7019476890564,45.74135637283325,58.78076505661011,71.82017374038696,84.85958242416382,97.89899110794067,110.93839979171753,123.97780847549438],"values":[3.0,0.0,3.0,917.0,13706.0,154273.0,147171.0,12963.0,960.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0]}
While the POST
method might be more convenient from a Javascript client or using a HTTP Library.
Python using requests¶
Requests is an easy to use HTTP library.
import requests
data = {
'dataset_id': 'gaia-dr2',
'expression_x': 'l',
'expression_y': 'b',
'filter': None,
'virtual_columns': [],
'min_x': 0,
'max_x': 360,
'min_y': -90,
'max_y': 90,
'shape_x': 512,
'shape_y': 256,
}
response = requests.post('https://dataframe.vaex.io/heatmap', json=data)
response.json()
assert response.status_code == 200, 'oops, something went wrong'
{'dataset_id': 'gaia-dr2',
'centers_x': [22.5, 67.5, 112.5, 157.5, 202.5, 247.5, 292.5, 337.5],
'centers_y': [-67.5, -22.5, 22.5, 67.5],
'values': [[3508786.0,
2711710.0,
2287021.0,
2042114.0,
2009057.0,
2448207.0,
3716951.0,
3644323.0],
[250883466.0,
100064757.0,
49538929.0,
28273970.0,
30521201.0,
53214391.0,
159460735.0,
251170124.0],
[166984543.0,
110774989.0,
43475771.0,
31343345.0,
31584354.0,
44061582.0,
108436851.0,
189699927.0],
[3388522.0,
2848641.0,
2221241.0,
1997993.0,
1941090.0,
2215271.0,
3061986.0,
3387287.0]]}
var inputData = {
dataset_id: 'gaia-dr2',
expression_x: 'l',
expression_y: 'b',
filter: null,
virtual_columns: [],
min_x: 0,
max_x: 360,
min_y: -90,
max_y: 90,
shape: [512, 256],
};
var result = await fetch("https://dataframe.vaex.io/heatmap", {method: 'POST', body: JSON.stringify(inputData)})
var data = await result.json();
console.log(data);
{dataset_id: "gaia-dr2", centers_x: Array(512), centers_y: Array(256), values: Array(256)}
centers_x: (512) [0.3515625, 1.0546875, 1.7578125, 2.4609375, 3.1640625, 3.8671875, 4.5703125, 5.2734375, 5.9765625, 6.6796875, 7.3828125, 8.0859375, 8.7890625, 9.4921875, 10.1953125, 10.8984375, 11.6015625, 12.3046875, 13.0078125, 13.7109375, 14.4140625, 15.1171875, 15.8203125, 16.5234375, 17.2265625, 17.9296875, 18.6328125, 19.3359375, 20.0390625, 20.7421875, 21.4453125, 22.1484375, 22.8515625, 23.5546875, 24.2578125, 24.9609375, 25.6640625, 26.3671875, 27.0703125, 27.7734375, 28.4765625, 29.1796875, 29.8828125, 30.5859375, 31.2890625, 31.9921875, 32.6953125, 33.3984375, 34.1015625, 34.8046875, 35.5078125, 36.2109375, 36.9140625, 37.6171875, 38.3203125, 39.0234375, 39.7265625, 40.4296875, 41.1328125, 41.8359375, 42.5390625, 43.2421875, 43.9453125, 44.6484375, 45.3515625, 46.0546875, 46.7578125, 47.4609375, 48.1640625, 48.8671875, 49.5703125, 50.2734375, 50.9765625, 51.6796875, 52.3828125, 53.0859375, 53.7890625, 54.4921875, 55.1953125, 55.8984375, 56.6015625, 57.3046875, 58.0078125, 58.7109375, 59.4140625, 60.1171875, 60.8203125, 61.5234375, 62.2265625, 62.9296875, 63.6328125, 64.3359375, 65.0390625, 65.7421875, 66.4453125, 67.1484375, 67.8515625, 68.5546875, 69.2578125, 69.9609375, …]
centers_y: (256) [-89.6484375, -88.9453125, -88.2421875, -87.5390625, -86.8359375, -86.1328125, -85.4296875, -84.7265625, -84.0234375, -83.3203125, -82.6171875, -81.9140625, -81.2109375, -80.5078125, -79.8046875, -79.1015625, -78.3984375, -77.6953125, -76.9921875, -76.2890625, -75.5859375, -74.8828125, -74.1796875, -73.4765625, -72.7734375, -72.0703125, -71.3671875, -70.6640625, -69.9609375, -69.2578125, -68.5546875, -67.8515625, -67.1484375, -66.4453125, -65.7421875, -65.0390625, -64.3359375, -63.6328125, -62.9296875, -62.2265625, -61.5234375, -60.8203125, -60.1171875, -59.4140625, -58.7109375, -58.0078125, -57.3046875, -56.6015625, -55.8984375, -55.1953125, -54.4921875, -53.7890625, -53.0859375, -52.3828125, -51.6796875, -50.9765625, -50.2734375, -49.5703125, -48.8671875, -48.1640625, -47.4609375, -46.7578125, -46.0546875, -45.3515625, -44.6484375, -43.9453125, -43.2421875, -42.5390625, -41.8359375, -41.1328125, -40.4296875, -39.7265625, -39.0234375, -38.3203125, -37.6171875, -36.9140625, -36.2109375, -35.5078125, -34.8046875, -34.1015625, -33.3984375, -32.6953125, -31.9921875, -31.2890625, -30.5859375, -29.8828125, -29.1796875, -28.4765625, -27.7734375, -27.0703125, -26.3671875, -25.6640625, -24.9609375, -24.2578125, -23.5546875, -22.8515625, -22.1484375, -21.4453125, -20.7421875, -20.0390625, …]
dataset_id: "gaia-dr2"
values: (256) [ …]
__proto__: Object
Example using plotly.js¶
Combining the previous with the plotly.js library we can make an interactive plot:
Sky map¶
First, make sure we have a div
<div id="plotlyHeatmap"></div>
Then load the data, and plot it using plotly.js:
var skyMapInput = {
dataset_id: 'gaia-dr2',
expression_x: 'l',
expression_y: 'b',
virtual_columns: {
distance: "1/parallax"
},
filter: this.filter,
min_x: 0,
max_x: 360,
min_y: -90,
max_y: 90,
shape: [512, 256],
};
async function loadData(heatmapInput) {
const result = await fetch("https://dataframe-dev.vaex.io/heatmap", {method: 'POST', body: JSON.stringify(heatmapInput)})
const data = await result.json();
return data;
}
function plotData(elementId, data, log, xaxis, yaxis) {
const trace_data = {
x: data.centers_x,
y: data.centers_y,
z: log ? data.values.map((ar1d) => ar1d.map(Math.log1p)) : data.values,
type: 'heatmap',
colorscale: 'plasma',
transpose: true,
};
var layout = {
xaxis: {
title: {
text: data.expression_x,
},
...xaxis
},
yaxis: {
title: {
text: data.expression_y,
},
...yaxis
}
};
Plotly.react(elementId, [trace_data], layout);
}
async function plot(elementId, heatmapInput, xaxis, yaxis) {
const heatmapOutput = await loadData(heatmapInput);
await plotData(elementId, heatmapOutput, true, xaxis, yaxis);
}
plot('plotlyHeatmap', skyMapInput);
Adding an event handler, will refine the data when we zoom in:
function addZoomHandler(elementId, heatmapInput) {
document.getElementById(elementId).on('plotly_relayout', async (e) => {
// mutate input data
heatmapInput.min_x = e["xaxis.range[0]"]
heatmapInput.max_x = e["xaxis.range[1]"]
heatmapInput.min_y = e["yaxis.range[0]"]
heatmapInput.max_y = e["yaxis.range[1]"]
// and plot again
plot(elementId, heatmapInput);
})
}
CMD¶
We can now easily add a second heatmap
<div id="plotlyHeatmapCMD"></div>
And plot a different heatmap (a color-magnitude diagram) on this div.
var cmdInput = {
dataset_id: 'gaia-dr2',
expression_x: 'phot_bp_mean_mag-phot_rp_mean_mag',
expression_y: 'M_g',
virtual_columns: {
distance: "1/parallax",
M_g: "phot_g_mean_mag-(5*log10(distance)+10)"
},
filter: '((pmra**2+pmdec**2)<100)&(parallax_over_error>10)&(abs(b)>20)',
min_x: -1,
max_x: 5,
min_y: 15,
max_y: -5,
shape_x: 256,
shape_y: 256,
};
async () => {
await plot('plotlyHeatmapCMD', cmdInput);
addZoomHandler('plotlyHeatmapCMD', cmdInput);
}