Commit 003ec220 authored by Maximilian Dolling's avatar Maximilian Dolling
Browse files

changed dependencies handling

parent 4f1269a2
Pipeline #11929 passed with stage
in 40 seconds
pandas==1.0.5 pandas == 1.0.5
matplotlib==3.2.2 matplotlib == 3.2.2
\ No newline at end of file \ No newline at end of file
...@@ -7,10 +7,10 @@ from datetime import date ...@@ -7,10 +7,10 @@ from datetime import date
from os import makedirs from os import makedirs
from pathlib import Path from pathlib import Path
import matplotlib.pyplot as plt import matplotlib.pyplot
import pandas as pd import pandas
plt.style.use("ggplot") matplotlib.pyplot.style.use("ggplot")
_ASTRONAUT_DATA = "data/astronauts.json" _ASTRONAUT_DATA = "data/astronauts.json"
_OUTPUT_PATH = "results" _OUTPUT_PATH = "results"
...@@ -18,7 +18,7 @@ _OUTPUT_PATH = "results" ...@@ -18,7 +18,7 @@ _OUTPUT_PATH = "results"
## ##
# Data preparation functions # Data preparation functions
## ##
def prepare_data_set(data_frame: pd.DataFrame) -> pd.DataFrame: def prepare_data_set(data_frame: pandas.DataFrame) -> pandas.DataFrame:
""" """
Prepares the raw data by: Prepares the raw data by:
- dropping NaN's - dropping NaN's
...@@ -37,9 +37,9 @@ def prepare_data_set(data_frame: pd.DataFrame) -> pd.DataFrame: ...@@ -37,9 +37,9 @@ def prepare_data_set(data_frame: pd.DataFrame) -> pd.DataFrame:
# Set pandas dtypes for columns with date or time # Set pandas dtypes for columns with date or time
data_frame = data_frame.dropna(subset=["time_in_space"]) data_frame = data_frame.dropna(subset=["time_in_space"])
data_frame["time_in_space"] = data_frame["time_in_space"].astype(int) data_frame["time_in_space"] = data_frame["time_in_space"].astype(int)
data_frame["time_in_space"] = pd.to_timedelta(data_frame["time_in_space"], unit="m") data_frame["time_in_space"] = pandas.to_timedelta(data_frame["time_in_space"], unit="m")
data_frame["birthdate"] = pd.to_datetime(data_frame["birthdate"]) data_frame["birthdate"] = pandas.to_datetime(data_frame["birthdate"])
data_frame["date_of_death"] = pd.to_datetime(data_frame["date_of_death"]) data_frame["date_of_death"] = pandas.to_datetime(data_frame["date_of_death"])
data_frame.sort_values("birthdate", inplace=True) data_frame.sort_values("birthdate", inplace=True)
# Calculate extra columns from the original data # Calculate extra columns from the original data
...@@ -76,12 +76,12 @@ def is_alive(date_of_death) -> bool: ...@@ -76,12 +76,12 @@ def is_alive(date_of_death) -> bool:
Returns: Returns:
bool bool
""" """
if pd.isnull(date_of_death): if pandas.isnull(date_of_death):
return True return True
return False return False
def calculate_age(born: pd.Timestamp) -> int: def calculate_age(born: pandas.Timestamp) -> int:
""" """
Calculates an age from a date. Calculates an age from a date.
...@@ -92,14 +92,14 @@ def calculate_age(born: pd.Timestamp) -> int: ...@@ -92,14 +92,14 @@ def calculate_age(born: pd.Timestamp) -> int:
int int
""" """
if not isinstance(born, pd.Timestamp): if not isinstance(born, pandas.Timestamp):
raise TypeError(f'expected {pd.Timestamp}, got {type(born)}') raise TypeError(f'expected {pandas.Timestamp}, got {type(born)}')
today = date.today() today = date.today()
return today.year - born.year - ((today.month, today.day) < (born.month, born.day)) return today.year - born.year - ((today.month, today.day) < (born.month, born.day))
def died_with_age(row: pd.Series): def died_with_age(row: pandas.Series):
""" """
Calculates an age from a birthdate and date_of_death. Calculates an age from a birthdate and date_of_death.
...@@ -109,7 +109,7 @@ def died_with_age(row: pd.Series): ...@@ -109,7 +109,7 @@ def died_with_age(row: pd.Series):
Returns: Returns:
int int
""" """
if pd.isnull(row["date_of_death"]): if pandas.isnull(row["date_of_death"]):
return None return None
born = row["birthdate"] born = row["birthdate"]
today = row["date_of_death"] today = row["date_of_death"]
...@@ -141,7 +141,7 @@ def create_age_histogram(age_data_frame, died_data_frame): ...@@ -141,7 +141,7 @@ def create_age_histogram(age_data_frame, died_data_frame):
in the categories 'age at dead' and 'age alive'. in the categories 'age at dead' and 'age alive'.
""" """
fig, axs = plt.subplots(1, 1) fig, axs = matplotlib.pyplot.subplots(1, 1)
axs.hist( axs.hist(
[died_data_frame["died_with_age"], age_data_frame["age"]], [died_data_frame["died_with_age"], age_data_frame["age"]],
bins=70, bins=70,
...@@ -160,16 +160,16 @@ def create_age_boxplot(age_data_frame, died_data_frame): ...@@ -160,16 +160,16 @@ def create_age_boxplot(age_data_frame, died_data_frame):
in the categories dead and alive. in the categories dead and alive.
""" """
fig, axs = plt.subplots(1, 1) fig, axs = matplotlib.pyplot.subplots(1, 1)
axs.boxplot([died_data_frame["died_with_age"], age_data_frame["age"]]) axs.boxplot([died_data_frame["died_with_age"], age_data_frame["age"]])
axs.set_title("Age distribution; Dead vs. Alive astronauts") axs.set_title("Age distribution; Dead vs. Alive astronauts")
axs.set_xlabel("Category") axs.set_xlabel("Category")
plt.setp(axs, xticks=[1, 2], xticklabels=["Dead", "Alive"]) matplotlib.pyplot.setp(axs, xticks=[1, 2], xticklabels=["Dead", "Alive"])
axs.set_ylabel("Age") axs.set_ylabel("Age")
save(fig, "boxplot.png") save(fig, "boxplot.png")
def save(fig: plt.Figure, filename: str): def save(fig: matplotlib.pyplot.Figure, filename: str):
""" """
Saves a matplotlib Figure to a file. It overwrites existing files with the same filename. Saves a matplotlib Figure to a file. It overwrites existing files with the same filename.
...@@ -185,7 +185,7 @@ def perform_analysis(): ...@@ -185,7 +185,7 @@ def perform_analysis():
# Set up directory structure and preprocess data # Set up directory structure and preprocess data
makedirs(_OUTPUT_PATH, exist_ok=True) makedirs(_OUTPUT_PATH, exist_ok=True)
data_frame = pd.read_json(Path(_ASTRONAUT_DATA).resolve()) data_frame = pandas.read_json(Path(_ASTRONAUT_DATA).resolve())
data_frame = prepare_data_set(data_frame) data_frame = prepare_data_set(data_frame)
# Male humans in space # Male humans in space
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment