{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dask\n", "\n", "If you want to try out this notebook with a live Python kernel, use mybinder:\n", "\n", "\"https://mybinder.org/badge_logo.svg\"\n", "\n", "## Dask.array\n", "A vaex dataframe can be lazily converted to a [dask.array](https://docs.dask.org/en/latest/array.html) using [DataFrame.to_dask_array](../api.rst#vaex.dataframe.DataFrame.to_dask_array)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# x y z vx vy vz E L Lz FeH
0 -0.7774707672.10626292 1.93743467 53.276722 288.386047 -95.2649078-121238.171875 831.0799560546875 -336.426513671875 -2.309227609164518
1 3.77427316 2.23387194 3.76209331 252.810791 -69.9498444-56.3121033-100819.91406251435.1839599609375-828.7567749023438 -1.788735491591229
2 1.3757627 -6.3283844 2.63250017 96.276474 226.440201 -34.7527161-100559.96093751039.2989501953125920.802490234375 -0.7618109022478798
3 -7.06737804 1.31737781 -6.10543537 204.968842 -205.679016-58.9777031-70174.8515625 2441.724853515625 1183.5899658203125 -1.5208778422936413
4 0.243441463 -0.822781682-0.206593871-311.742371-238.41217 186.824127 -144138.75 374.8164367675781 -314.5353088378906 -2.655341358427361
... ... ... ... ... ... ... ... ... ... ...
329,9953.76883793 4.66251659 -4.42904139 107.432999 -2.1377129617.5130272 -119687.3203125746.8833618164062 -508.96484375 -1.6499842518381402
329,9969.17409325 -8.87091351 -8.61707687 32.0 108.089264 179.060638 -68933.8046875 2395.633056640625 1275.490234375 -1.4336036247720836
329,997-1.14041007 -8.4957695 2.25749826 8.46711349 -38.2765236-127.541473-112580.359375 1182.436279296875 115.58557891845703 -1.9306227597361942
329,998-14.2985935 -5.51750422 -8.65472317 110.221558 -31.392559186.2726822 -74862.90625 1324.59265136718751057.017333984375 -1.225019818838568
329,99910.5450506 -8.86106777 -4.65835428 -2.10541415-27.61088563.80799961 -95361.765625 351.0955505371094 -309.81439208984375-2.5689636894079477
" ], "text/plain": [ "# x y z vx vy vz E L Lz FeH\n", "0 -0.777470767 2.10626292 1.93743467 53.276722 288.386047 -95.2649078 -121238.171875 831.0799560546875 -336.426513671875 -2.309227609164518\n", "1 3.77427316 2.23387194 3.76209331 252.810791 -69.9498444 -56.3121033 -100819.9140625 1435.1839599609375 -828.7567749023438 -1.788735491591229\n", "2 1.3757627 -6.3283844 2.63250017 96.276474 226.440201 -34.7527161 -100559.9609375 1039.2989501953125 920.802490234375 -0.7618109022478798\n", "3 -7.06737804 1.31737781 -6.10543537 204.968842 -205.679016 -58.9777031 -70174.8515625 2441.724853515625 1183.5899658203125 -1.5208778422936413\n", "4 0.243441463 -0.822781682 -0.206593871 -311.742371 -238.41217 186.824127 -144138.75 374.8164367675781 -314.5353088378906 -2.655341358427361\n", "... ... ... ... ... ... ... ... ... ... ...\n", "329,995 3.76883793 4.66251659 -4.42904139 107.432999 -2.13771296 17.5130272 -119687.3203125 746.8833618164062 -508.96484375 -1.6499842518381402\n", "329,996 9.17409325 -8.87091351 -8.61707687 32.0 108.089264 179.060638 -68933.8046875 2395.633056640625 1275.490234375 -1.4336036247720836\n", "329,997 -1.14041007 -8.4957695 2.25749826 8.46711349 -38.2765236 -127.541473 -112580.359375 1182.436279296875 115.58557891845703 -1.9306227597361942\n", "329,998 -14.2985935 -5.51750422 -8.65472317 110.221558 -31.3925591 86.2726822 -74862.90625 1324.5926513671875 1057.017333984375 -1.225019818838568\n", "329,999 10.5450506 -8.86106777 -4.65835428 -2.10541415 -27.6108856 3.80799961 -95361.765625 351.0955505371094 -309.81439208984375 -2.5689636894079477" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import vaex\n", "df = vaex.example()\n", "df" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Array Chunk
Bytes 7.92 MB 7.92 MB
Shape (330000, 3) (330000, 3)
Count 2 Tasks 1 Chunks
Type float64 numpy.ndarray
\n", "
\n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 3\n", " 330000\n", "\n", "
" ], "text/plain": [ "dask.array" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# convert a set of columns in the dataframe to a 2d dask array\n", "A = df[['x', 'y', 'z']].to_dask_array()\n", "A" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Array Chunk
Bytes 2.64 MB 2.64 MB
Shape (330000,) (330000,)
Count 11 Tasks 1 Chunks
Type float64 numpy.ndarray
\n", "
\n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 330000\n", " 1\n", "\n", "
" ], "text/plain": [ "dask.array" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import dask.array as da\n", "# lazily compute with dask\n", "r = da.sqrt(A[:,0]**2 + A[:,1]**2 + A[:,2]**2)\n", "r" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# materialize the data\n", "r_computed = r.compute()\n", "r_computed" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
# x y z vx vy vz E L Lz FeH r
0 -0.7774707672.10626292 1.93743467 53.276722 288.386047 -95.2649078-121238.171875 831.0799560546875 -336.426513671875 -2.309227609164518 2.9655450396553587
1 3.77427316 2.23387194 3.76209331 252.810791 -69.9498444-56.3121033-100819.91406251435.1839599609375-828.7567749023438 -1.788735491591229 5.77829281049018
2 1.3757627 -6.3283844 2.63250017 96.276474 226.440201 -34.7527161-100559.96093751039.2989501953125920.802490234375 -0.76181090224787986.99079603950256
3 -7.06737804 1.31737781 -6.10543537 204.968842 -205.679016-58.9777031-70174.8515625 2441.724853515625 1183.5899658203125 -1.52087784229364139.431842752707537
4 0.243441463 -0.822781682-0.206593871-311.742371-238.41217 186.824127 -144138.75 374.8164367675781 -314.5353088378906 -2.655341358427361 0.8825613121347967
... ... ... ... ... ... ... ... ... ... ... ...
329,9953.76883793 4.66251659 -4.42904139 107.432999 -2.1377129617.5130272 -119687.3203125746.8833618164062 -508.96484375 -1.64998425183814027.453831761514681
329,9969.17409325 -8.87091351 -8.61707687 32.0 108.089264 179.060638 -68933.8046875 2395.633056640625 1275.490234375 -1.433603624772083615.398412491068198
329,997-1.14041007 -8.4957695 2.25749826 8.46711349 -38.2765236-127.541473-112580.359375 1182.436279296875 115.58557891845703 -1.93062275973619428.864250273925633
329,998-14.2985935 -5.51750422 -8.65472317 110.221558 -31.392559186.2726822 -74862.90625 1324.59265136718751057.017333984375 -1.225019818838568 17.601047186042507
329,99910.5450506 -8.86106777 -4.65835428 -2.10541415-27.61088563.80799961 -95361.765625 351.0955505371094 -309.81439208984375-2.568963689407947714.540181524970293
" ], "text/plain": [ "# x y z vx vy vz E L Lz FeH r\n", "0 -0.777470767 2.10626292 1.93743467 53.276722 288.386047 -95.2649078 -121238.171875 831.0799560546875 -336.426513671875 -2.309227609164518 2.9655450396553587\n", "1 3.77427316 2.23387194 3.76209331 252.810791 -69.9498444 -56.3121033 -100819.9140625 1435.1839599609375 -828.7567749023438 -1.788735491591229 5.77829281049018\n", "2 1.3757627 -6.3283844 2.63250017 96.276474 226.440201 -34.7527161 -100559.9609375 1039.2989501953125 920.802490234375 -0.7618109022478798 6.99079603950256\n", "3 -7.06737804 1.31737781 -6.10543537 204.968842 -205.679016 -58.9777031 -70174.8515625 2441.724853515625 1183.5899658203125 -1.5208778422936413 9.431842752707537\n", "4 0.243441463 -0.822781682 -0.206593871 -311.742371 -238.41217 186.824127 -144138.75 374.8164367675781 -314.5353088378906 -2.655341358427361 0.8825613121347967\n", "... ... ... ... ... ... ... ... ... ... ... ...\n", "329,995 3.76883793 4.66251659 -4.42904139 107.432999 -2.13771296 17.5130272 -119687.3203125 746.8833618164062 -508.96484375 -1.6499842518381402 7.453831761514681\n", "329,996 9.17409325 -8.87091351 -8.61707687 32.0 108.089264 179.060638 -68933.8046875 2395.633056640625 1275.490234375 -1.4336036247720836 15.398412491068198\n", "329,997 -1.14041007 -8.4957695 2.25749826 8.46711349 -38.2765236 -127.541473 -112580.359375 1182.436279296875 115.58557891845703 -1.9306227597361942 8.864250273925633\n", "329,998 -14.2985935 -5.51750422 -8.65472317 110.221558 -31.3925591 86.2726822 -74862.90625 1324.5926513671875 1057.017333984375 -1.225019818838568 17.601047186042507\n", "329,999 10.5450506 -8.86106777 -4.65835428 -2.10541415 -27.6108856 3.80799961 -95361.765625 351.0955505371094 -309.81439208984375 -2.5689636894079477 14.540181524970293" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# put it back in the dataframe\n", "df['r'] = r_computed\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }