{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pandas en 10 minutes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div id=\"my_id_menu_nb\">run previous cell, wait for 2 seconds</div>\n",
       "<script>\n",
       "function repeat_indent_string(n){\n",
       "    var a = \"\" ;\n",
       "    for ( ; n > 0 ; --n)\n",
       "        a += \"    \";\n",
       "    return a;\n",
       "}\n",
       "// look up into all sections and builds an automated menu //\n",
       "var update_menu_string = function(begin, lfirst, llast, sformat, send, keep_item, begin_format, end_format) {\n",
       "    var anchors = document.getElementsByClassName(\"section\");\n",
       "    if (anchors.length == 0) {\n",
       "        anchors = document.getElementsByClassName(\"text_cell_render rendered_html\");\n",
       "    }\n",
       "    var i,t;\n",
       "    var text_menu = begin;\n",
       "    var text_memo = \"<pre>\\nlength:\" + anchors.length + \"\\n\";\n",
       "    var ind = \"\";\n",
       "    var memo_level = 1;\n",
       "    var href;\n",
       "    var tags = [];\n",
       "    var main_item = 0;\n",
       "    var format_open = 0;\n",
       "    for (i = 0; i <= llast; i++)\n",
       "        tags.push(\"h\" + i);\n",
       "\n",
       "    for (i = 0; i < anchors.length; i++) {\n",
       "        text_memo += \"**\" + anchors[i].id + \"--\\n\";\n",
       "\n",
       "        var child = null;\n",
       "        for(t = 0; t < tags.length; t++) {\n",
       "            var r = anchors[i].getElementsByTagName(tags[t]);\n",
       "            if (r.length > 0) {\n",
       "child = r[0];\n",
       "break;\n",
       "            }\n",
       "        }\n",
       "        if (child == null) {\n",
       "            text_memo += \"null\\n\";\n",
       "            continue;\n",
       "        }\n",
       "        if (anchors[i].hasAttribute(\"id\")) {\n",
       "            // when converted in RST\n",
       "            href = anchors[i].id;\n",
       "            text_memo += \"#1-\" + href;\n",
       "            // passer à child suivant (le chercher)\n",
       "        }\n",
       "        else if (child.hasAttribute(\"id\")) {\n",
       "            // in a notebook\n",
       "            href = child.id;\n",
       "            text_memo += \"#2-\" + href;\n",
       "        }\n",
       "        else {\n",
       "            text_memo += \"#3-\" + \"*\" + \"\\n\";\n",
       "            continue;\n",
       "        }\n",
       "        var title = child.textContent;\n",
       "        var level = parseInt(child.tagName.substring(1,2));\n",
       "\n",
       "        text_memo += \"--\" + level + \"?\" + lfirst + \"--\" + title + \"\\n\";\n",
       "\n",
       "        if ((level < lfirst) || (level > llast)) {\n",
       "            continue ;\n",
       "        }\n",
       "        if (title.endsWith('¶')) {\n",
       "            title = title.substring(0,title.length-1).replace(\"<\", \"&lt;\")\n",
       "         .replace(\">\", \"&gt;\").replace(\"&\", \"&amp;\");\n",
       "        }\n",
       "        if (title.length == 0) {\n",
       "            continue;\n",
       "        }\n",
       "\n",
       "        while (level < memo_level) {\n",
       "            text_menu += end_format + \"</ul>\\n\";\n",
       "            format_open -= 1;\n",
       "            memo_level -= 1;\n",
       "        }\n",
       "        if (level == lfirst) {\n",
       "            main_item += 1;\n",
       "        }\n",
       "        if (keep_item != -1 && main_item != keep_item + 1) {\n",
       "            // alert(main_item + \" - \" + level + \" - \" + keep_item);\n",
       "            continue;\n",
       "        }\n",
       "        while (level > memo_level) {\n",
       "            text_menu += \"<ul>\\n\";\n",
       "            memo_level += 1;\n",
       "        }\n",
       "        text_menu += repeat_indent_string(level-2);\n",
       "        text_menu += begin_format + sformat.replace(\"__HREF__\", href).replace(\"__TITLE__\", title);\n",
       "        format_open += 1;\n",
       "    }\n",
       "    while (1 < memo_level) {\n",
       "        text_menu += end_format + \"</ul>\\n\";\n",
       "        memo_level -= 1;\n",
       "        format_open -= 1;\n",
       "    }\n",
       "    text_menu += send;\n",
       "    //text_menu += \"\\n\" + text_memo;\n",
       "\n",
       "    while (format_open > 0) {\n",
       "        text_menu += end_format;\n",
       "        format_open -= 1;\n",
       "    }\n",
       "    return text_menu;\n",
       "};\n",
       "var update_menu = function() {\n",
       "    var sbegin = \"\";\n",
       "    var sformat = '<a href=\"#__HREF__\">__TITLE__</a>';\n",
       "    var send = \"\";\n",
       "    var begin_format = '<li>';\n",
       "    var end_format = '</li>';\n",
       "    var keep_item = -1;\n",
       "    var text_menu = update_menu_string(sbegin, 2, 4, sformat, send, keep_item,\n",
       "       begin_format, end_format);\n",
       "    var menu = document.getElementById(\"my_id_menu_nb\");\n",
       "    menu.innerHTML=text_menu;\n",
       "};\n",
       "window.setTimeout(update_menu,2000);\n",
       "            </script>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " #Pour intégrer les graphes à votre notebook, il suffit de faire\n",
    "%matplotlib inline\n",
    "\n",
    "from jyquickhelper import add_notebook_menu\n",
    "add_notebook_menu()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On importe générallement les librairies suivantes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Création d'objets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On créé une 'Series' en lui passant une liste de valeurs, en laissant pandas créer un index d'entiers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    3.0\n",
       "2    5.0\n",
       "3    NaN\n",
       "4    6.0\n",
       "5    8.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series([1,3,5,np.nan,6,8])\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On créé un DataFrame en passant un array numpy, avec un index sur sur une date et des colonnes labellisées"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
      "               '2013-01-05', '2013-01-06'],\n",
      "              dtype='datetime64[ns]', freq='D')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>-1.184356</td>\n",
       "      <td>-0.723850</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>0.749447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>0.358945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>-0.047994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>-0.254731</td>\n",
       "      <td>-1.270130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "      <td>0.313692</td>\n",
       "      <td>1.190966</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>-0.582905</td>\n",
       "      <td>0.818028</td>\n",
       "      <td>-0.259498</td>\n",
       "      <td>0.305170</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2013-01-01 -1.184356 -0.723850  0.391051  0.749447\n",
       "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
       "2013-01-03  0.121083  1.633698 -0.134070 -0.047994\n",
       "2013-01-04  1.247574  0.102763 -0.254731 -1.270130\n",
       "2013-01-05  0.605311  1.339097  0.313692  1.190966\n",
       "2013-01-06 -0.582905  0.818028 -0.259498  0.305170"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates = pd.date_range('20130101', periods=6)\n",
    "print(dates)\n",
    "\n",
    "df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On peut également créer un DataFrame en passant un dictionnaire d'objets qui peut être converti en sorte de série."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>test</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>train</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>test</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>train</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A          B    C  D      E    F\n",
       "0  1.0 2013-01-02  1.0  3   test  foo\n",
       "1  1.0 2013-01-02  1.0  3  train  foo\n",
       "2  1.0 2013-01-02  1.0  3   test  foo\n",
       "3  1.0 2013-01-02  1.0  3  train  foo"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = pd.DataFrame({ 'A' : 1.,\n",
    "'B' : pd.Timestamp('20130102'),\n",
    "'C' : pd.Series(1,index=list(range(4)),dtype='float32'),\n",
    "'D' : np.array([3] * 4,dtype='int32'),\n",
    "'E' : pd.Categorical([\"test\",\"train\",\"test\",\"train\"]),\n",
    "'F' : 'foo' })\n",
    "\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Chaque colonne a son propre dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A           float64\n",
       "B    datetime64[ns]\n",
       "C           float32\n",
       "D             int32\n",
       "E          category\n",
       "F            object\n",
       "dtype: object"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On peut afficher les premières lignes et les dernières"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                   A         B         C         D\n",
      "2013-01-01 -1.184356 -0.723850  0.391051  0.749447\n",
      "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
      "2013-01-03  0.121083  1.633698 -0.134070 -0.047994\n",
      "2013-01-04  1.247574  0.102763 -0.254731 -1.270130\n",
      "2013-01-05  0.605311  1.339097  0.313692  1.190966\n",
      "                   A         B         C         D\n",
      "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
      "2013-01-03  0.121083  1.633698 -0.134070 -0.047994\n",
      "2013-01-04  1.247574  0.102763 -0.254731 -1.270130\n",
      "2013-01-05  0.605311  1.339097  0.313692  1.190966\n",
      "2013-01-06 -0.582905  0.818028 -0.259498  0.305170\n"
     ]
    }
   ],
   "source": [
    "print(df.head())\n",
    "print(df.tail())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On peut afficher l'index, les colonnes et les données numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
      "               '2013-01-05', '2013-01-06'],\n",
      "              dtype='datetime64[ns]', freq='D')\n",
      "Index(['A', 'B', 'C', 'D'], dtype='object')\n",
      "[[-1.18435607 -0.72384974  0.39105091  0.74944703]\n",
      " [ 0.02750198  0.24202818  0.93718807  0.35894525]\n",
      " [ 0.12108336  1.63369769 -0.13407011 -0.04799366]\n",
      " [ 1.24757372  0.10276272 -0.25473099 -1.27013015]\n",
      " [ 0.60531084  1.33909665  0.3136916   1.19096551]\n",
      " [-0.58290532  0.81802772 -0.25949756  0.30516996]]\n"
     ]
    }
   ],
   "source": [
    "print(df.index)\n",
    "print(df.columns)\n",
    "print(df.values)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "La méthode describe permet d'afficher un résumé des données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.039035</td>\n",
       "      <td>0.568627</td>\n",
       "      <td>0.165605</td>\n",
       "      <td>0.214401</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.856924</td>\n",
       "      <td>0.869988</td>\n",
       "      <td>0.472291</td>\n",
       "      <td>0.841637</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-1.184356</td>\n",
       "      <td>-0.723850</td>\n",
       "      <td>-0.259498</td>\n",
       "      <td>-1.270130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-0.430303</td>\n",
       "      <td>0.137579</td>\n",
       "      <td>-0.224566</td>\n",
       "      <td>0.040297</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.074293</td>\n",
       "      <td>0.530028</td>\n",
       "      <td>0.089811</td>\n",
       "      <td>0.332058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.484254</td>\n",
       "      <td>1.208829</td>\n",
       "      <td>0.371711</td>\n",
       "      <td>0.651822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>1.190966</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              A         B         C         D\n",
       "count  6.000000  6.000000  6.000000  6.000000\n",
       "mean   0.039035  0.568627  0.165605  0.214401\n",
       "std    0.856924  0.869988  0.472291  0.841637\n",
       "min   -1.184356 -0.723850 -0.259498 -1.270130\n",
       "25%   -0.430303  0.137579 -0.224566  0.040297\n",
       "50%    0.074293  0.530028  0.089811  0.332058\n",
       "75%    0.484254  1.208829  0.371711  0.651822\n",
       "max    1.247574  1.633698  0.937188  1.190966"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On peut faire la transposée, trier en fonction d'un axe ou des valeurs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05  2013-01-06\n",
      "A   -1.184356    0.027502    0.121083    1.247574    0.605311   -0.582905\n",
      "B   -0.723850    0.242028    1.633698    0.102763    1.339097    0.818028\n",
      "C    0.391051    0.937188   -0.134070   -0.254731    0.313692   -0.259498\n",
      "D    0.749447    0.358945   -0.047994   -1.270130    1.190966    0.305170\n"
     ]
    }
   ],
   "source": [
    "print(df.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>D</th>\n",
       "      <th>C</th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>0.749447</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>-0.723850</td>\n",
       "      <td>-1.184356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.358945</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.027502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>-0.047994</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>0.121083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>-1.270130</td>\n",
       "      <td>-0.254731</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>1.247574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>1.190966</td>\n",
       "      <td>0.313692</td>\n",
       "      <td>1.339097</td>\n",
       "      <td>0.605311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>0.305170</td>\n",
       "      <td>-0.259498</td>\n",
       "      <td>0.818028</td>\n",
       "      <td>-0.582905</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   D         C         B         A\n",
       "2013-01-01  0.749447  0.391051 -0.723850 -1.184356\n",
       "2013-01-02  0.358945  0.937188  0.242028  0.027502\n",
       "2013-01-03 -0.047994 -0.134070  1.633698  0.121083\n",
       "2013-01-04 -1.270130 -0.254731  0.102763  1.247574\n",
       "2013-01-05  1.190966  0.313692  1.339097  0.605311\n",
       "2013-01-06  0.305170 -0.259498  0.818028 -0.582905"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_index(axis=1, ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>-1.184356</td>\n",
       "      <td>-0.723850</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>0.749447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>-0.254731</td>\n",
       "      <td>-1.270130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>0.358945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>-0.582905</td>\n",
       "      <td>0.818028</td>\n",
       "      <td>-0.259498</td>\n",
       "      <td>0.305170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "      <td>0.313692</td>\n",
       "      <td>1.190966</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>-0.047994</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2013-01-01 -1.184356 -0.723850  0.391051  0.749447\n",
       "2013-01-04  1.247574  0.102763 -0.254731 -1.270130\n",
       "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
       "2013-01-06 -0.582905  0.818028 -0.259498  0.305170\n",
       "2013-01-05  0.605311  1.339097  0.313692  1.190966\n",
       "2013-01-03  0.121083  1.633698 -0.134070 -0.047994"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(by='B')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Selection des données"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Getting"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Selection d'une colonne (équivalent à df.A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2013-01-01   -1.184356\n",
      "2013-01-02    0.027502\n",
      "2013-01-03    0.121083\n",
      "2013-01-04    1.247574\n",
      "2013-01-05    0.605311\n",
      "2013-01-06   -0.582905\n",
      "Freq: D, Name: A, dtype: float64\n",
      "                   A         B         C         D\n",
      "2013-01-01 -1.184356 -0.723850  0.391051  0.749447\n",
      "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
      "2013-01-03  0.121083  1.633698 -0.134070 -0.047994\n",
      "                   A         B         C         D\n",
      "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
      "2013-01-03  0.121083  1.633698 -0.134070 -0.047994\n",
      "2013-01-04  1.247574  0.102763 -0.254731 -1.270130\n"
     ]
    }
   ],
   "source": [
    "print(df['A'])\n",
    "print(df[0:3])\n",
    "print(df['20130102':'20130104'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Selection par Label"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "En utilisant un label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A   -1.184356\n",
       "B   -0.723850\n",
       "C    0.391051\n",
       "D    0.749447\n",
       "Name: 2013-01-01 00:00:00, dtype: float64"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[dates[0]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Selection de plusieurs axes par label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>-1.184356</td>\n",
       "      <td>-0.723850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>-0.582905</td>\n",
       "      <td>0.818028</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "2013-01-01 -1.184356 -0.723850\n",
       "2013-01-02  0.027502  0.242028\n",
       "2013-01-03  0.121083  1.633698\n",
       "2013-01-04  1.247574  0.102763\n",
       "2013-01-05  0.605311  1.339097\n",
       "2013-01-06 -0.582905  0.818028"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[:,['A','B']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Avec le label slicing, les deux points de terminaisons sont INCLUS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "2013-01-02  0.027502  0.242028\n",
       "2013-01-03  0.121083  1.633698\n",
       "2013-01-04  1.247574  0.102763"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['20130102':'20130104', ['A','B']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Obtenir une valeur scalaire"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.184356071097634"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[dates[0],'A']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Acces plus rapide (méthode équivalente à la précédente)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.184356071097634"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.at[dates[0],'A']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Selection par position"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Integer : "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    1.247574\n",
       "B    0.102763\n",
       "C   -0.254731\n",
       "D   -1.270130\n",
       "Name: 2013-01-04 00:00:00, dtype: float64"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Tranches d'entiers, similaire à numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "2013-01-04  1.247574  0.102763\n",
       "2013-01-05  0.605311  1.339097"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[3:5,0:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Par liste d'entiers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.937188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>-0.134070</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>0.313692</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         C\n",
       "2013-01-02  0.027502  0.937188\n",
       "2013-01-03  0.121083 -0.134070\n",
       "2013-01-05  0.605311  0.313692"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[[1,2,4],[0,2]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Découpage de ligne explicite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>0.358945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>-0.047994</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
       "2013-01-03  0.121083  1.633698 -0.134070 -0.047994"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[1:3,:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Obtenir une valeur explicitement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.24202817975998675"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[1,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Acces rapide au scalaire"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.24202817975998675"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iat[1,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Indexation booléenne"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "En utilisant une valeur sur une colonne : "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>0.358945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>-0.047994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>-0.254731</td>\n",
       "      <td>-1.270130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "      <td>0.313692</td>\n",
       "      <td>1.190966</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
       "2013-01-03  0.121083  1.633698 -0.134070 -0.047994\n",
       "2013-01-04  1.247574  0.102763 -0.254731 -1.270130\n",
       "2013-01-05  0.605311  1.339097  0.313692  1.190966"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.A > 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Opérateur where : "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>0.749447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>0.358945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "      <td>0.313692</td>\n",
       "      <td>1.190966</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.818028</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.305170</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2013-01-01       NaN       NaN  0.391051  0.749447\n",
       "2013-01-02  0.027502  0.242028  0.937188  0.358945\n",
       "2013-01-03  0.121083  1.633698       NaN       NaN\n",
       "2013-01-04  1.247574  0.102763       NaN       NaN\n",
       "2013-01-05  0.605311  1.339097  0.313692  1.190966\n",
       "2013-01-06       NaN  0.818028       NaN  0.305170"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df > 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pour filter, on utilise la méthode isin()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                   A         B         C         D      E\n",
      "2013-01-01 -1.184356 -0.723850  0.391051  0.749447    one\n",
      "2013-01-02  0.027502  0.242028  0.937188  0.358945    one\n",
      "2013-01-03  0.121083  1.633698 -0.134070 -0.047994    two\n",
      "2013-01-04  1.247574  0.102763 -0.254731 -1.270130  three\n",
      "2013-01-05  0.605311  1.339097  0.313692  1.190966   four\n",
      "2013-01-06 -0.582905  0.818028 -0.259498  0.305170  three\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>-0.047994</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "      <td>0.313692</td>\n",
       "      <td>1.190966</td>\n",
       "      <td>four</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D     E\n",
       "2013-01-03  0.121083  1.633698 -0.134070 -0.047994   two\n",
       "2013-01-05  0.605311  1.339097  0.313692  1.190966  four"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.copy()\n",
    "df2['E'] = ['one', 'one','two','three','four','three']\n",
    "print(df2)\n",
    "\n",
    "df2[df2['E'].isin(['two','four'])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Ajouter / modifier valeurs / colonnes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ajouter une nouvelle colonne automatiquement aligne les données par index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2013-01-02    1\n",
      "2013-01-03    2\n",
      "2013-01-04    3\n",
      "2013-01-05    4\n",
      "2013-01-06    5\n",
      "2013-01-07    6\n",
      "Freq: D, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))\n",
    "print(s1)\n",
    "df['F'] = s1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modifier une valeur par label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.at[dates[0],'A'] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[dates[0],'A']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modifier une valeur par position"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.iat[0,1] = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modifier une valeur en assignant un tableau numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.loc[:,'D'] = np.array([5] * len(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>-0.254731</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>0.605311</td>\n",
       "      <td>1.339097</td>\n",
       "      <td>0.313692</td>\n",
       "      <td>5</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>-0.582905</td>\n",
       "      <td>0.818028</td>\n",
       "      <td>-0.259498</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F\n",
       "2013-01-01  0.000000  0.000000  0.391051  5  NaN\n",
       "2013-01-02  0.027502  0.242028  0.937188  5  1.0\n",
       "2013-01-03  0.121083  1.633698 -0.134070  5  2.0\n",
       "2013-01-04  1.247574  0.102763 -0.254731  5  3.0\n",
       "2013-01-05  0.605311  1.339097  0.313692  5  4.0\n",
       "2013-01-06 -0.582905  0.818028 -0.259498  5  5.0"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Gérer les données manquantes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pandas utilise le type np.nan pour représenter les valeurs manquantes. Ce n'est pas codé pour faire des calculs."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Reindex permet de changer/ajouter/supprimer les index d'un axe. Cette fonction retourne une copie des données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>-0.254731</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F    E\n",
       "2013-01-01  0.000000  0.000000  0.391051  5  NaN  1.0\n",
       "2013-01-02  0.027502  0.242028  0.937188  5  1.0  1.0\n",
       "2013-01-03  0.121083  1.633698 -0.134070  5  2.0  NaN\n",
       "2013-01-04  1.247574  0.102763 -0.254731  5  3.0  NaN"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])\n",
    "df1.loc[dates[0]:dates[1],'E'] = 1\n",
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pour supprimer les lignes contenant des NaN :"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F    E\n",
       "2013-01-02  0.027502  0.242028  0.937188  5  1.0  1.0"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.dropna(how='any')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remplacement des valeurs manquantes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>0.937188</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.121083</td>\n",
       "      <td>1.633698</td>\n",
       "      <td>-0.134070</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.247574</td>\n",
       "      <td>0.102763</td>\n",
       "      <td>-0.254731</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F    E\n",
       "2013-01-01  0.000000  0.000000  0.391051  5  5.0  1.0\n",
       "2013-01-02  0.027502  0.242028  0.937188  5  1.0  1.0\n",
       "2013-01-03  0.121083  1.633698 -0.134070  5  2.0  5.0\n",
       "2013-01-04  1.247574  0.102763 -0.254731  5  3.0  5.0"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.fillna(value=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Obtenir le masque de booléen de l'emplacement des nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                A      B      C      D      F      E\n",
       "2013-01-01  False  False  False  False   True  False\n",
       "2013-01-02  False  False  False  False  False  False\n",
       "2013-01-03  False  False  False  False  False   True\n",
       "2013-01-04  False  False  False  False  False   True"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.isnull(df1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Opérations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Stats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Les opérations excluent généralement les données manquantes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "A    0.236427\n",
      "B    0.689269\n",
      "C    0.165605\n",
      "D    5.000000\n",
      "F    3.000000\n",
      "dtype: float64\n",
      "2013-01-01    1.347763\n",
      "2013-01-02    1.441344\n",
      "2013-01-03    1.724142\n",
      "2013-01-04    1.819121\n",
      "2013-01-05    2.251620\n",
      "2013-01-06    1.995125\n",
      "Freq: D, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(df.mean())\n",
    "print(df.mean(1)) #Autre axe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Situation avec des objets de dimmension différentes. En plus, pandas va automatiquement étendre la donnée sur la dimension spécifiée"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2013-01-01    NaN\n",
      "2013-01-02    NaN\n",
      "2013-01-03    1.0\n",
      "2013-01-04    3.0\n",
      "2013-01-05    5.0\n",
      "2013-01-06    NaN\n",
      "Freq: D, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>-0.878917</td>\n",
       "      <td>0.633698</td>\n",
       "      <td>-1.134070</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>-1.752426</td>\n",
       "      <td>-2.897237</td>\n",
       "      <td>-3.254731</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>-4.394689</td>\n",
       "      <td>-3.660903</td>\n",
       "      <td>-4.686308</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C    D    F\n",
       "2013-01-01       NaN       NaN       NaN  NaN  NaN\n",
       "2013-01-02       NaN       NaN       NaN  NaN  NaN\n",
       "2013-01-03 -0.878917  0.633698 -1.134070  4.0  1.0\n",
       "2013-01-04 -1.752426 -2.897237 -3.254731  2.0  0.0\n",
       "2013-01-05 -4.394689 -3.660903 -4.686308  0.0 -1.0\n",
       "2013-01-06       NaN       NaN       NaN  NaN  NaN"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)\n",
    "\n",
    "print(s)\n",
    "\n",
    "df.sub(s, axis='index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on method sub in module pandas.core.ops:\n",
      "\n",
      "sub(other, axis='columns', level=None, fill_value=None) method of pandas.core.frame.DataFrame instance\n",
      "    Get Subtraction of dataframe and other, element-wise (binary operator `sub`).\n",
      "    \n",
      "    Equivalent to ``dataframe - other``, but with support to substitute a fill_value\n",
      "    for missing data in one of the inputs. With reverse version, `rsub`.\n",
      "    \n",
      "    Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to\n",
      "    arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`.\n",
      "    \n",
      "    Parameters\n",
      "    ----------\n",
      "    other : scalar, sequence, Series, or DataFrame\n",
      "        Any single or multiple element data structure, or list-like object.\n",
      "    axis : {0 or 'index', 1 or 'columns'}\n",
      "        Whether to compare by the index (0 or 'index') or columns\n",
      "        (1 or 'columns'). For Series input, axis to match Series index on.\n",
      "    level : int or label\n",
      "        Broadcast across a level, matching Index values on the\n",
      "        passed MultiIndex level.\n",
      "    fill_value : float or None, default None\n",
      "        Fill existing missing (NaN) values, and any new element needed for\n",
      "        successful DataFrame alignment, with this value before computation.\n",
      "        If data in both corresponding DataFrame locations is missing\n",
      "        the result will be missing.\n",
      "    \n",
      "    Returns\n",
      "    -------\n",
      "    DataFrame\n",
      "        Result of the arithmetic operation.\n",
      "    \n",
      "    See Also\n",
      "    --------\n",
      "    DataFrame.add : Add DataFrames.\n",
      "    DataFrame.sub : Subtract DataFrames.\n",
      "    DataFrame.mul : Multiply DataFrames.\n",
      "    DataFrame.div : Divide DataFrames (float division).\n",
      "    DataFrame.truediv : Divide DataFrames (float division).\n",
      "    DataFrame.floordiv : Divide DataFrames (integer division).\n",
      "    DataFrame.mod : Calculate modulo (remainder after division).\n",
      "    DataFrame.pow : Calculate exponential power.\n",
      "    \n",
      "    Notes\n",
      "    -----\n",
      "    Mismatched indices will be unioned together.\n",
      "    \n",
      "    Examples\n",
      "    --------\n",
      "    >>> df = pd.DataFrame({'angles': [0, 3, 4],\n",
      "    ...                    'degrees': [360, 180, 360]},\n",
      "    ...                   index=['circle', 'triangle', 'rectangle'])\n",
      "    >>> df\n",
      "               angles  degrees\n",
      "    circle          0      360\n",
      "    triangle        3      180\n",
      "    rectangle       4      360\n",
      "    \n",
      "    Add a scalar with operator version which return the same\n",
      "    results.\n",
      "    \n",
      "    >>> df + 1\n",
      "               angles  degrees\n",
      "    circle          1      361\n",
      "    triangle        4      181\n",
      "    rectangle       5      361\n",
      "    \n",
      "    >>> df.add(1)\n",
      "               angles  degrees\n",
      "    circle          1      361\n",
      "    triangle        4      181\n",
      "    rectangle       5      361\n",
      "    \n",
      "    Divide by constant with reverse version.\n",
      "    \n",
      "    >>> df.div(10)\n",
      "               angles  degrees\n",
      "    circle        0.0     36.0\n",
      "    triangle      0.3     18.0\n",
      "    rectangle     0.4     36.0\n",
      "    \n",
      "    >>> df.rdiv(10)\n",
      "                 angles   degrees\n",
      "    circle          inf  0.027778\n",
      "    triangle   3.333333  0.055556\n",
      "    rectangle  2.500000  0.027778\n",
      "    \n",
      "    Subtract a list and Series by axis with operator version.\n",
      "    \n",
      "    >>> df - [1, 2]\n",
      "               angles  degrees\n",
      "    circle         -1      358\n",
      "    triangle        2      178\n",
      "    rectangle       3      358\n",
      "    \n",
      "    >>> df.sub([1, 2], axis='columns')\n",
      "               angles  degrees\n",
      "    circle         -1      358\n",
      "    triangle        2      178\n",
      "    rectangle       3      358\n",
      "    \n",
      "    >>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']),\n",
      "    ...        axis='index')\n",
      "               angles  degrees\n",
      "    circle         -1      359\n",
      "    triangle        2      179\n",
      "    rectangle       3      359\n",
      "    \n",
      "    Multiply a DataFrame of different shape with operator version.\n",
      "    \n",
      "    >>> other = pd.DataFrame({'angles': [0, 3, 4]},\n",
      "    ...                      index=['circle', 'triangle', 'rectangle'])\n",
      "    >>> other\n",
      "               angles\n",
      "    circle          0\n",
      "    triangle        3\n",
      "    rectangle       4\n",
      "    \n",
      "    >>> df * other\n",
      "               angles  degrees\n",
      "    circle          0      NaN\n",
      "    triangle        9      NaN\n",
      "    rectangle      16      NaN\n",
      "    \n",
      "    >>> df.mul(other, fill_value=0)\n",
      "               angles  degrees\n",
      "    circle          0      0.0\n",
      "    triangle        9      0.0\n",
      "    rectangle      16      0.0\n",
      "    \n",
      "    Divide by a MultiIndex by level.\n",
      "    \n",
      "    >>> df_multindex = pd.DataFrame({'angles': [0, 3, 4, 4, 5, 6],\n",
      "    ...                              'degrees': [360, 180, 360, 360, 540, 720]},\n",
      "    ...                             index=[['A', 'A', 'A', 'B', 'B', 'B'],\n",
      "    ...                                    ['circle', 'triangle', 'rectangle',\n",
      "    ...                                     'square', 'pentagon', 'hexagon']])\n",
      "    >>> df_multindex\n",
      "                 angles  degrees\n",
      "    A circle          0      360\n",
      "      triangle        3      180\n",
      "      rectangle       4      360\n",
      "    B square          4      360\n",
      "      pentagon        5      540\n",
      "      hexagon         6      720\n",
      "    \n",
      "    >>> df.div(df_multindex, level=1, fill_value=0)\n",
      "                 angles  degrees\n",
      "    A circle        NaN      1.0\n",
      "      triangle      1.0      1.0\n",
      "      rectangle     1.0      1.0\n",
      "    B square        0.0      0.0\n",
      "      pentagon      0.0      0.0\n",
      "      hexagon       0.0      0.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "help(df.sub)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>-0.878917</td>\n",
       "      <td>0.633698</td>\n",
       "      <td>-1.134070</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>-1.752426</td>\n",
       "      <td>-2.897237</td>\n",
       "      <td>-3.254731</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>-4.394689</td>\n",
       "      <td>-3.660903</td>\n",
       "      <td>-4.686308</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C    D    F\n",
       "2013-01-01       NaN       NaN       NaN  NaN  NaN\n",
       "2013-01-02       NaN       NaN       NaN  NaN  NaN\n",
       "2013-01-03 -0.878917  0.633698 -1.134070  4.0  1.0\n",
       "2013-01-04 -1.752426 -2.897237 -3.254731  2.0  0.0\n",
       "2013-01-05 -4.394689 -3.660903 -4.686308  0.0 -1.0\n",
       "2013-01-06       NaN       NaN       NaN  NaN  NaN"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sub(s, axis='index')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Apply"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Appliquer des foncitons aux données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2013-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.391051</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-02</th>\n",
       "      <td>0.027502</td>\n",
       "      <td>0.242028</td>\n",
       "      <td>1.328239</td>\n",
       "      <td>10</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-03</th>\n",
       "      <td>0.148585</td>\n",
       "      <td>1.875726</td>\n",
       "      <td>1.194169</td>\n",
       "      <td>15</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-04</th>\n",
       "      <td>1.396159</td>\n",
       "      <td>1.978489</td>\n",
       "      <td>0.939438</td>\n",
       "      <td>20</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-05</th>\n",
       "      <td>2.001470</td>\n",
       "      <td>3.317585</td>\n",
       "      <td>1.253129</td>\n",
       "      <td>25</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013-01-06</th>\n",
       "      <td>1.418565</td>\n",
       "      <td>4.135613</td>\n",
       "      <td>0.993632</td>\n",
       "      <td>30</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C   D     F\n",
       "2013-01-01  0.000000  0.000000  0.391051   5   NaN\n",
       "2013-01-02  0.027502  0.242028  1.328239  10   1.0\n",
       "2013-01-03  0.148585  1.875726  1.194169  15   3.0\n",
       "2013-01-04  1.396159  1.978489  0.939438  20   6.0\n",
       "2013-01-05  2.001470  3.317585  1.253129  25  10.0\n",
       "2013-01-06  1.418565  4.135613  0.993632  30  15.0"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.apply(np.cumsum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    1.830479\n",
       "B    1.633698\n",
       "C    1.196686\n",
       "D    0.000000\n",
       "F    4.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.apply((lambda x: x.max() - x.min()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Histogramme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    3\n",
      "1    2\n",
      "2    3\n",
      "3    3\n",
      "4    5\n",
      "5    3\n",
      "6    4\n",
      "7    0\n",
      "8    1\n",
      "9    2\n",
      "dtype: int64\n",
      "3    4\n",
      "2    2\n",
      "0    1\n",
      "1    1\n",
      "4    1\n",
      "5    1\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "s = pd.Series(np.random.randint(0, 7, size=10))\n",
    "print(s)\n",
    "print(s.value_counts())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Methodes String"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Les séries sont équipées de méthodes pour traiter les strings avec l'attribut str qui rend facile la manipulation de chaque élémen d'un tableau. On utilise régulièrement des expressions régulières."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       a\n",
       "1       b\n",
       "2       c\n",
       "3    aaba\n",
       "4    baca\n",
       "5     NaN\n",
       "6    caba\n",
       "7     dog\n",
       "8     cat\n",
       "dtype: object"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])\n",
    "s.str.lower()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Regrouper"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Concaténation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pandas fournit des methodes pour facilement combiner des Series, DatFrame et des Panel objets avec des types variés de set logique pour les indexes et des fonctionnalités d'algèbre dans le cas de jointure / regroupement"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On peut concaténer des objets pandas avec concat()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0  0.069013 -0.040383 -0.607478 -1.286906\n",
      "1  1.270398 -0.523822  0.858627  0.751044\n",
      "2  1.480067 -0.005535  0.244562 -0.224817\n",
      "3 -0.730252  0.188720  0.105386 -0.600060\n",
      "4 -0.844170  1.736848  0.190962  0.451052\n",
      "5  0.181550  0.136675  0.892205  0.701368\n",
      "6 -0.485632 -0.894081  0.532846 -1.228718\n",
      "7  0.324976 -1.670890 -0.082170 -0.383374\n",
      "8  0.589956 -0.859584  0.737789  1.193208\n",
      "9  1.124989 -1.301511  2.010070  0.593592\n"
     ]
    }
   ],
   "source": [
    "df = pd.DataFrame(np.random.randn(10, 4))\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pieces = [df[:3], df[3:7], df[7:]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.069013</td>\n",
       "      <td>-0.040383</td>\n",
       "      <td>-0.607478</td>\n",
       "      <td>-1.286906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.270398</td>\n",
       "      <td>-0.523822</td>\n",
       "      <td>0.858627</td>\n",
       "      <td>0.751044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.480067</td>\n",
       "      <td>-0.005535</td>\n",
       "      <td>0.244562</td>\n",
       "      <td>-0.224817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.730252</td>\n",
       "      <td>0.188720</td>\n",
       "      <td>0.105386</td>\n",
       "      <td>-0.600060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.844170</td>\n",
       "      <td>1.736848</td>\n",
       "      <td>0.190962</td>\n",
       "      <td>0.451052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.181550</td>\n",
       "      <td>0.136675</td>\n",
       "      <td>0.892205</td>\n",
       "      <td>0.701368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-0.485632</td>\n",
       "      <td>-0.894081</td>\n",
       "      <td>0.532846</td>\n",
       "      <td>-1.228718</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.324976</td>\n",
       "      <td>-1.670890</td>\n",
       "      <td>-0.082170</td>\n",
       "      <td>-0.383374</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.589956</td>\n",
       "      <td>-0.859584</td>\n",
       "      <td>0.737789</td>\n",
       "      <td>1.193208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1.124989</td>\n",
       "      <td>-1.301511</td>\n",
       "      <td>2.010070</td>\n",
       "      <td>0.593592</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3\n",
       "0  0.069013 -0.040383 -0.607478 -1.286906\n",
       "1  1.270398 -0.523822  0.858627  0.751044\n",
       "2  1.480067 -0.005535  0.244562 -0.224817\n",
       "3 -0.730252  0.188720  0.105386 -0.600060\n",
       "4 -0.844170  1.736848  0.190962  0.451052\n",
       "5  0.181550  0.136675  0.892205  0.701368\n",
       "6 -0.485632 -0.894081  0.532846 -1.228718\n",
       "7  0.324976 -1.670890 -0.082170 -0.383374\n",
       "8  0.589956 -0.859584  0.737789  1.193208\n",
       "9  1.124989 -1.301511  2.010070  0.593592"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(pieces)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Jointures"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On peut merger à la manière de requete SQL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   key  lval\n",
      "0  foo     1\n",
      "1  foo     2\n",
      "   key  rval\n",
      "0  foo     4\n",
      "1  foo     5\n",
      "   key  lval  rval\n",
      "0  foo     1     4\n",
      "1  foo     1     5\n",
      "2  foo     2     4\n",
      "3  foo     2     5\n"
     ]
    }
   ],
   "source": [
    "left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})\n",
    "right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})\n",
    "\n",
    "print(left)\n",
    "print(right)\n",
    "\n",
    "print(pd.merge(left, right, on='key'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Append"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.080456</td>\n",
       "      <td>0.981786</td>\n",
       "      <td>0.880026</td>\n",
       "      <td>1.144814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.665425</td>\n",
       "      <td>-0.577014</td>\n",
       "      <td>1.265051</td>\n",
       "      <td>-0.515127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.342626</td>\n",
       "      <td>-0.635112</td>\n",
       "      <td>1.558737</td>\n",
       "      <td>1.070910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.236688</td>\n",
       "      <td>-1.178135</td>\n",
       "      <td>0.371970</td>\n",
       "      <td>0.268590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.478343</td>\n",
       "      <td>0.015331</td>\n",
       "      <td>0.835794</td>\n",
       "      <td>0.553637</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-1.115995</td>\n",
       "      <td>-0.086228</td>\n",
       "      <td>-0.959499</td>\n",
       "      <td>-1.168225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.615278</td>\n",
       "      <td>-0.550461</td>\n",
       "      <td>-0.328022</td>\n",
       "      <td>0.753208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-0.308374</td>\n",
       "      <td>1.024291</td>\n",
       "      <td>-1.437537</td>\n",
       "      <td>1.189244</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         D\n",
       "0 -1.080456  0.981786  0.880026  1.144814\n",
       "1 -1.665425 -0.577014  1.265051 -0.515127\n",
       "2  0.342626 -0.635112  1.558737  1.070910\n",
       "3 -0.236688 -1.178135  0.371970  0.268590\n",
       "4 -0.478343  0.015331  0.835794  0.553637\n",
       "5 -1.115995 -0.086228 -0.959499 -1.168225\n",
       "6  0.615278 -0.550461 -0.328022  0.753208\n",
       "7 -0.308374  1.024291 -1.437537  1.189244"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.080456</td>\n",
       "      <td>0.981786</td>\n",
       "      <td>0.880026</td>\n",
       "      <td>1.144814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.665425</td>\n",
       "      <td>-0.577014</td>\n",
       "      <td>1.265051</td>\n",
       "      <td>-0.515127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.342626</td>\n",
       "      <td>-0.635112</td>\n",
       "      <td>1.558737</td>\n",
       "      <td>1.070910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.236688</td>\n",
       "      <td>-1.178135</td>\n",
       "      <td>0.371970</td>\n",
       "      <td>0.268590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.478343</td>\n",
       "      <td>0.015331</td>\n",
       "      <td>0.835794</td>\n",
       "      <td>0.553637</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-1.115995</td>\n",
       "      <td>-0.086228</td>\n",
       "      <td>-0.959499</td>\n",
       "      <td>-1.168225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.615278</td>\n",
       "      <td>-0.550461</td>\n",
       "      <td>-0.328022</td>\n",
       "      <td>0.753208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-0.308374</td>\n",
       "      <td>1.024291</td>\n",
       "      <td>-1.437537</td>\n",
       "      <td>1.189244</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-0.236688</td>\n",
       "      <td>-1.178135</td>\n",
       "      <td>0.371970</td>\n",
       "      <td>0.268590</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         D\n",
       "0 -1.080456  0.981786  0.880026  1.144814\n",
       "1 -1.665425 -0.577014  1.265051 -0.515127\n",
       "2  0.342626 -0.635112  1.558737  1.070910\n",
       "3 -0.236688 -1.178135  0.371970  0.268590\n",
       "4 -0.478343  0.015331  0.835794  0.553637\n",
       "5 -1.115995 -0.086228 -0.959499 -1.168225\n",
       "6  0.615278 -0.550461 -0.328022  0.753208\n",
       "7 -0.308374  1.024291 -1.437537  1.189244\n",
       "8 -0.236688 -1.178135  0.371970  0.268590"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = df.iloc[3]\n",
    "df.append(s, ignore_index=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Groupement"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Le regroupement comprend les étapes suivantes :\n",
    "   * Séparation de la donnée en groupes\n",
    "   * Appliquer une fonction a chaque group indépendamment\n",
    "   * Combiner les resultats dans une structure de données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',\n",
    "'foo', 'bar', 'foo', 'foo'],\n",
    "'B' : ['one', 'one', 'two', 'three',\n",
    "'two', 'two', 'one', 'three'],\n",
    "'C' : np.random.randn(8),\n",
    "'D' : np.random.randn(8)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>foo</td>\n",
       "      <td>one</td>\n",
       "      <td>0.438443</td>\n",
       "      <td>0.479237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>bar</td>\n",
       "      <td>one</td>\n",
       "      <td>-0.457745</td>\n",
       "      <td>-0.363322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>foo</td>\n",
       "      <td>two</td>\n",
       "      <td>0.684435</td>\n",
       "      <td>-0.421288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>bar</td>\n",
       "      <td>three</td>\n",
       "      <td>-0.620241</td>\n",
       "      <td>0.267481</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>foo</td>\n",
       "      <td>two</td>\n",
       "      <td>-0.032591</td>\n",
       "      <td>2.801863</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>bar</td>\n",
       "      <td>two</td>\n",
       "      <td>0.878103</td>\n",
       "      <td>-0.064529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>foo</td>\n",
       "      <td>one</td>\n",
       "      <td>-0.000896</td>\n",
       "      <td>-1.695125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>foo</td>\n",
       "      <td>three</td>\n",
       "      <td>1.341618</td>\n",
       "      <td>-0.165415</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A      B         C         D\n",
       "0  foo    one  0.438443  0.479237\n",
       "1  bar    one -0.457745 -0.363322\n",
       "2  foo    two  0.684435 -0.421288\n",
       "3  bar  three -0.620241  0.267481\n",
       "4  foo    two -0.032591  2.801863\n",
       "5  bar    two  0.878103 -0.064529\n",
       "6  foo    one -0.000896 -1.695125\n",
       "7  foo  three  1.341618 -0.165415"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Groupement et somme des groupes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>bar</th>\n",
       "      <td>-0.199883</td>\n",
       "      <td>-0.160370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>foo</th>\n",
       "      <td>2.431011</td>\n",
       "      <td>0.999271</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            C         D\n",
       "A                      \n",
       "bar -0.199883 -0.160370\n",
       "foo  2.431011  0.999271"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('A').sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Groupement de multiple colonnes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">bar</th>\n",
       "      <th>one</th>\n",
       "      <td>-0.457745</td>\n",
       "      <td>-0.363322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>-0.620241</td>\n",
       "      <td>0.267481</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>0.878103</td>\n",
       "      <td>-0.064529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">foo</th>\n",
       "      <th>one</th>\n",
       "      <td>0.437547</td>\n",
       "      <td>-1.215888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>1.341618</td>\n",
       "      <td>-0.165415</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>0.651845</td>\n",
       "      <td>2.380574</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  C         D\n",
       "A   B                        \n",
       "bar one   -0.457745 -0.363322\n",
       "    three -0.620241  0.267481\n",
       "    two    0.878103 -0.064529\n",
       "foo one    0.437547 -1.215888\n",
       "    three  1.341618 -0.165415\n",
       "    two    0.651845  2.380574"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['A','B']).sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Reformation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Stack"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <th>second</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">bar</th>\n",
       "      <th>one</th>\n",
       "      <td>-0.347244</td>\n",
       "      <td>-0.780456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-0.394013</td>\n",
       "      <td>-0.561465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">baz</th>\n",
       "      <th>one</th>\n",
       "      <td>1.055322</td>\n",
       "      <td>2.620028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-1.443417</td>\n",
       "      <td>0.514155</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     A         B\n",
       "first second                    \n",
       "bar   one    -0.347244 -0.780456\n",
       "      two    -0.394013 -0.561465\n",
       "baz   one     1.055322  2.620028\n",
       "      two    -1.443417  0.514155"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',\n",
    "'foo', 'foo', 'qux', 'qux'],\n",
    "['one', 'two', 'one', 'two',\n",
    "'one', 'two', 'one', 'two']]))\n",
    "\n",
    "index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])\n",
    "df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])\n",
    "df2 = df[:4]\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "La méthode stack() compresses un level dans les colonnes du dataframe\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "first  second   \n",
       "bar    one     A   -0.347244\n",
       "               B   -0.780456\n",
       "       two     A   -0.394013\n",
       "               B   -0.561465\n",
       "baz    one     A    1.055322\n",
       "               B    2.620028\n",
       "       two     A   -1.443417\n",
       "               B    0.514155\n",
       "dtype: float64"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stacked = df2.stack()\n",
    "stacked"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Avec une 'stacked' dataframe ou série, l'opération inverse est unstack()"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "interpreter": {
   "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
  },
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}