import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsxRuntime classic */

/* @jsx mdx */

import DefaultLayout from "/opt/build/repo/src/templates/pageTemplate.js";
import Quote from "../../components/text-decorations/quote";
import Information from "../../components/text-decorations/information";
import Note from "../../components/text-decorations/note";
export const _frontmatter = {};
const layoutProps = {
  _frontmatter
};
const MDXLayout = DefaultLayout;
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...layoutProps} {...props} components={components} mdxType="MDXLayout">



    <p>{`Dask was developed to scale libraries such as Pandas, NumPy, Scikit-Learn, etc. It can help you scale beyond a single machine. Because Dask has a familiar API, it's easier to scale your work with minimal code rewriting, saving you time.`}</p>
    <p>{`You can deploy Dask in-house, on the cloud or HPC super-computers. It supports encryption and authentication using TSL/SSL certificates.`}</p>
    <Quote mdxType="Quote">
 <p>It is resilient and can handle the failure of worker nodes gracefully and is elastic, and so can take advantage of new nodes added on-the-fly.</p>
 <a href="https://docs.dask.org/en/latest/why.html">Dask docs - Why Dask?</a>
    </Quote>
    <h2 {...{
      "id": "scaling-down-to-a-single-computer",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h2" {...{
        "href": "#scaling-down-to-a-single-computer",
        "aria-label": "scaling down to a single computer permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Scaling down to a single computer`}</h2>
    <p>{`Now you might be thinking that Dask is only suited for big and expensive clusters. The great news is that Dask is also suitable to use in a single computer. `}</p>
    <p>{`Our computers have become more powerful, having access to multi-core CPUs, large amounts of RAM and Nvme SSD drives. This means that you can use large datasets and use the data however you want with your computer.`}</p>
    <Quote mdxType="Quote">
  <p>Dask can enable efficient parallel computations on single machines by leveraging their multi-core CPUs and streaming data efficiently from disk. It can run on a distributed cluster, but it doesn’t have to.</p>
  <a href="https://docs.dask.org/en/latest/why.html">Dask docs - Why Dask?</a>
    </Quote>
    <p>{`You can run single-machine schedulers that are light, require no setup and can run in the same process as the user session. Dask is also good at finding ways to avoid using too much memory.`}</p>
    <h2 {...{
      "id": "schedulers-and-workers",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h2" {...{
        "href": "#schedulers-and-workers",
        "aria-label": "schedulers and workers permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Schedulers and Workers`}</h2>
    <p>{`You can run a distributed scheduler in a single machine without the need to create a cluster or connect to the cloud.`}</p>
    <p>{`To start your scheduler you can use the CLI command`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-shell"
      }}>{`dask-scheduler
`}</code></pre>
    <p>{`This will give you a scheduler address that will look like: `}<inlineCode parentName="p">{`tcp://<ip address>:<port>`}</inlineCode>{` and a dashboard at  `}<inlineCode parentName="p">{`http://<ip adress>:<port>`}</inlineCode>{`, if running locally it will use localhost and port 8786 by default.`}</p>
    <p>{`To create a worker you can open a new terminal window and use the CLI command`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-shell"
      }}>{`dask-worker tcp://<ip adress>:<port>
`}</code></pre>
    <p>{`You will see on your dask scheduler that a new connection was established. You can now open more terminal windows to create more workers.`}</p>
    <p>{`You might be wondering, why this is relevant. Well, you can use SSH to create workers on a different machine. So if you have a desktop and a laptop, you can run the scheduler and some workers on the desktop and add more workers from the laptop.`}</p>
    <p>{`You can start an SHH connection with the CLI command:`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-shell"
      }}>{`dask-ssh <ip address> <ip address>
`}</code></pre>
    <Information mdxType="Information">
Read more info on the Dask docs on <a href="https://docs.dask.org/en/latest/setup/ssh.html">how to setup an ssh connection</a>.
    </Information>
    <h2 {...{
      "id": "comparing-dask-with-pandas",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h2" {...{
        "href": "#comparing-dask-with-pandas",
        "aria-label": "comparing dask with pandas permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Comparing Dask with Pandas`}</h2>
    <p>{`As mentioned before Dask uses a familiar API which means that is easy to start using if you already know other libraries like Pandas or Numpy. `}</p>
    <p>{`Let's have a look at how you can create a DataFrame with Pandas and then Dask:`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-python"
      }}>{`import pandas as pd
import dask.dataframe as dd

# Create a pandas dataframe
df = pd.read_csv('2015-01-01.csv')

# Create a dask dataframe
df = dd.read_csv('2015-01-01.csv')

`}</code></pre>
    <Note mdxType="Note">
  You can see this example on the official <a href="https://docs.dask.org/en/latest/">Dask docs</a>.
    </Note>
    <p>{`There are some differences that are worth to mention:`}</p>
    <ul>
      <li parentName="ul">{`In Dask you need to run `}<inlineCode parentName="li">{`.compute()`}</inlineCode>{` to get a result back because data is lazyloaded`}
        <ul parentName="li">
          <li parentName="ul">{`For example to calculate the mean of a dataframe you need to run: `}<inlineCode parentName="li">{`df.groupby(df.user\\_id).value.mean().compute()`}</inlineCode></li>
        </ul>
      </li>
      <li parentName="ul">{`You can load multiple files to your dataframe by using `}<inlineCode parentName="li">{`*.csv`}</inlineCode>
        <ul parentName="li">
          <li parentName="ul">{`For example: `}<inlineCode parentName="li">{`df = dd.read_csv('2015-*-*.csv')`}</inlineCode></li>
        </ul>
      </li>
    </ul>
    <p>{`As you can see, the Dask API is familiar enough that you can use the same code with some small changes.`}</p>
    <h2 {...{
      "id": "how-to-install-dask",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h2" {...{
        "href": "#how-to-install-dask",
        "aria-label": "how to install dask permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`How to install Dask`}</h2>
    <p>{`You have three ways to install dask:`}</p>
    <ul>
      <li parentName="ul">{`Using Anaconda`}
        <ul parentName="li">
          <li parentName="ul"><inlineCode parentName="li">{`conda install dask`}</inlineCode></li>
        </ul>
      </li>
      <li parentName="ul">{`Pip`}
        <ul parentName="li">
          <li parentName="ul"><inlineCode parentName="li">{`python -m pip install "dask\\[complete\\]"`}</inlineCode></li>
        </ul>
      </li>
      <li parentName="ul">{`From source`}
        <ul parentName="li">
          <li parentName="ul"><inlineCode parentName="li">{`git clone https://github.com/dask/dask.git && cd dask && python -m pip install`}</inlineCode></li>
        </ul>
      </li>
    </ul>
    <p>{`Have a look at the docs on `}<a parentName="p" {...{
        "href": "https://docs.dask.org/en/latest/install.html"
      }}>{`how to install Dask`}</a>{` for a better in-depth explanation on how to install Dask, since the docs show how you can install just some things.`}</p>
    <h2 {...{
      "id": "watch-an-example",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h2" {...{
        "href": "#watch-an-example",
        "aria-label": "watch an example permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Watch an example`}</h2>
    <p>{`I'd recommend you to watch this quick video explaining how to setup Dask on your machine.`}</p>
    <iframe className="center" width="560" height="315" src="https://www.youtube.com/embed/TQM9zIBzNBo" frameBorder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowFullScreen />
    <hr></hr>
    <p><strong parentName="p">{`References:`}</strong></p>
    <ul>
      <li parentName="ul"><a parentName="li" {...{
          "href": "https://docs.dask.org/en/latest/install.html"
        }}>{`Dask - Install Dask `}</a></li>
      <li parentName="ul"><a parentName="li" {...{
          "href": "https://docs.dask.org/en/latest/setup/cli.html"
        }}>{`Dask - CLI commands`}</a></li>
      <li parentName="ul"><a parentName="li" {...{
          "href": "https://docs.dask.org/en/latest/setup/ssh.html"
        }}>{`Dask - Setting up SSH`}</a></li>
      <li parentName="ul"><a parentName="li" {...{
          "href": "https://www.youtube.com/watch?v=TQM9zIBzNBo"
        }}>{`Video - Dask setup: Introduction`}</a></li>
    </ul>

    </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      