import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsxRuntime classic */

/* @jsx mdx */

import DefaultLayout from "/opt/build/repo/src/templates/projectsTemplate.js";
export const _frontmatter = {};
const layoutProps = {
  _frontmatter
};
const MDXLayout = DefaultLayout;
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...layoutProps} {...props} components={components} mdxType="MDXLayout">


    <p>{`NLTK is a powerful library that allows you to work with human language data. With it you can process text for classification, tokenization, stemming, tagging and parsing. Since I was taking the Datacamp course - `}<em parentName="p">{`Natural Language Processing Fundamentals in Python`}</em>{` and was learning about NLTK I decided to use this library to create a Tweet classification script to be used on a term.`}</p>
    <h1 {...{
      "id": "the-good",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#the-good",
        "aria-label": "the good permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`The Good`}</h1>
    <ul>
      <li parentName="ul">{`Can deal with repeated tweets on a subject`}</li>
      <li parentName="ul">{`Once the classifier is trained the script can run the classification quickly`}</li>
      <li parentName="ul">{`It provides a good overall feeling about a subject`}</li>
      <li parentName="ul">{`It saves the searched tweets in a file so it can be used instead of querying the Twitter database again`}</li>
    </ul>
    <h1 {...{
      "id": "the-not-so-good",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#the-not-so-good",
        "aria-label": "the not so good permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`The Not So Good`}</h1>
    <ul>
      <li parentName="ul">{`NLTK is a bit slow - training the classifier is a bit slow and could be improved further`}</li>
      <li parentName="ul">{`The method used for the classifier needs a large amount of data to increase the accuracy of the classifier`}</li>
      <li parentName="ul">{`Due to the data used only a rating of "Good" or "Bad" is returned`}</li>
      <li parentName="ul">{`Bag of words used - the classifier is unable to categorise sarcasm and double negatives properly`}</li>
    </ul>
    <h1 {...{
      "id": "how-it-all-came-to-be",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#how-it-all-came-to-be",
        "aria-label": "how it all came to be permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`How it all came to be`}</h1>
    <p>{`One of the Pybites challenges was to create a Twitter sentiment analysis to get a ratting about a subject. After the analysis is done the searched term would get a rating for Good, Bad, Neutral.`}</p>
    <p>{`Pybites solution uses the textblob library, but since I was learning about NLTK at the time I have decided to use this library and implement my version of a classifier. Working on this exercise was extremely fun and made me learn a lot.`}</p>
    <h1 {...{
      "id": "info",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#info",
        "aria-label": "info permalink",
        "className": "anchor before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "focusable": "false",
          "height": "16",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "16"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Info`}</h1>
    <p><strong parentName="p">{`GitHub repo:`}</strong>{` `}<a parentName="p" {...{
        "href": "https://github.com/FabioRosado/tweetnalytic"
      }}>{`https://github.com/FabioRosado/tweetnalytic`}</a></p>
    <p><strong parentName="p">{`Pybites challenge 07:`}</strong>{` `}<a parentName="p" {...{
        "href": "https://pybit.es/codechallenge07.html"
      }}>{`https://pybit.es/codechallenge07.html`}</a></p>
    <p><strong parentName="p">{`Image credits:`}</strong>{` `}<a parentName="p" {...{
        "href": "https://unsplash.com/photos/FumjLlfuvhg"
      }}>{`Unsplash`}</a></p>

    </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      