Unverified Commit fc7a990c authored by Joachim Krois's avatar Joachim Krois Committed by GitHub
Browse files

Delete SWC-2018-02-22-Applied Data Analysis I - Bascis.ipynb

parent 75bd5425
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Applied Data Analysis I"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* _function_ $\\to$ `OBJECT = pd.function_name(agrs1, arg2, ...)`\n",
"* _method_ $\\to$ `OBJECT.method_name(agrs1, arg2, ...)`\n",
"* _attribute_ $\\to$ `OBJECT.attribute` $\\qquad$ _Note that the attribute is called without parenthesis_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# The `pandas` library"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`numpy` but with labled rows and columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"one dimensional `pd.Series` object and two dimensional `pd.DataFrame` object"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"\n",
"## The `pd.Series` object\n",
"\n",
"* _function_ $\\to$ `OBJECT = pd.function_name(agrs1, arg2, ...)`"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"??pd.Series"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"`pd.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)` \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3, -8, -8, -4, 7, 9, 0, -9, -10, 7, 5, -1, -10,\n",
" 4, -10, 5, 9, 4, -6, -10, 6, -6, 7, -7, -8, -3])"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from numpy import random\n",
"random.seed(123)\n",
"my_data = random.randint(low=-10, high=10, size=26,)\n",
"my_data"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 3\n",
"1 -8\n",
"2 -8\n",
"3 -4\n",
"4 7\n",
"5 9\n",
"6 0\n",
"7 -9\n",
"8 -10\n",
"9 7\n",
"10 5\n",
"11 -1\n",
"12 -10\n",
"13 4\n",
"14 -10\n",
"15 5\n",
"16 9\n",
"17 4\n",
"18 -6\n",
"19 -10\n",
"20 6\n",
"21 -6\n",
"22 7\n",
"23 -7\n",
"24 -8\n",
"25 -3\n",
"Name: my_pandas_series, dtype: int32"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pd.Series(data=my_data, name=\"my_pandas_series\")\n",
"s"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Element-wise arithmeitic**"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.3\n",
"1 -0.8\n",
"2 -0.8\n",
"3 -0.4\n",
"4 0.7\n",
"5 0.9\n",
"6 0.0\n",
"7 -0.9\n",
"8 -1.0\n",
"9 0.7\n",
"10 0.5\n",
"11 -0.1\n",
"12 -1.0\n",
"13 0.4\n",
"14 -1.0\n",
"15 0.5\n",
"16 0.9\n",
"17 0.4\n",
"18 -0.6\n",
"19 -1.0\n",
"20 0.6\n",
"21 -0.6\n",
"22 0.7\n",
"23 -0.7\n",
"24 -0.8\n",
"25 -0.3\n",
"Name: my_pandas_series, dtype: float64"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s*0.1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"\n",
"### `pd.Series` attribues\n",
"\n",
"* _attribute_ $\\to$ `OBJECT.attribute` $\\qquad$ _Note that the attribute is called without parenthesis_"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('int32')"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=26, step=1)"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.index"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Selection and slicing by index"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-8"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[2]"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2 -8\n",
"3 -4\n",
"4 7\n",
"5 9\n",
"Name: my_pandas_series, dtype: int32"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[2:6]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Challenge: \n",
"> Change the index to (arbitrary) letters of the alphabet"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ABCDEFGHIJKLMNOPQRSTUVWXYZ'"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import string\n",
"letters = string.ascii_uppercase\n",
"letters"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 3\n",
"B -8\n",
"C -8\n",
"D -4\n",
"E 7\n",
"F 9\n",
"G 0\n",
"H -9\n",
"I -10\n",
"J 7\n",
"K 5\n",
"L -1\n",
"M -10\n",
"N 4\n",
"O -10\n",
"P 5\n",
"Q 9\n",
"R 4\n",
"S -6\n",
"T -10\n",
"U 6\n",
"V -6\n",
"W 7\n",
"X -7\n",
"Y -8\n",
"Z -3\n",
"Name: my_pandas_series, dtype: int32"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.index = [l for l in letters]\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',\n",
" 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'],\n",
" dtype='object')"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.index"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-8"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[\"C\"]"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"C -8\n",
"D -4\n",
"E 7\n",
"F 9\n",
"G 0\n",
"H -9\n",
"I -10\n",
"J 7\n",
"K 5\n",
"Name: my_pandas_series, dtype: int32"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s[\"C\":\"K\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"### `pd.Series` methods\n",
"* _method_ $\\to$ `OBJECT.method_name(agrs1, arg2, ...)`"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 3\n",
"B -8\n",
"C -8\n",
"D -4\n",
"E 7\n",
"F 9\n",
"G 0\n",
"H -9\n",
"I -10\n",
"J 7\n",
"K 5\n",
"L -1\n",
"M -10\n",
"N 4\n",
"O -10\n",
"P 5\n",
"Q 9\n",
"R 4\n",
"S -6\n",
"T -10\n",
"U 6\n",
"V -6\n",
"W 7\n",
"X -7\n",
"Y -8\n",
"Z -3\n",
"Name: my_pandas_series, dtype: int32"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-34"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.sum()"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1.3076923076923077"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.mean()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.max()"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-10"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.min()"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-2.0"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.median()"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-2.0"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.quantile(q=0.5)"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.25 -8.0\n",