diff --git a/src/hw/assignment1.py b/src/hw/assignment1.py new file mode 100644 index 0000000..58a14f8 --- /dev/null +++ b/src/hw/assignment1.py @@ -0,0 +1,62 @@ +import click +from dotenv import load_dotenv +from data.main import paths, connect +import pandas as pd +import math + +@click.group() +def cli(): + ... + +@cli.command('hw1:simple') +def simple_mean(): + data = pd.read_csv(paths('data') / 'hw' / 'q1.csv', sep="|").sort_values('salary').reset_index(drop=True) + mean = sum(data.salary) / len(data.salary) + print(f"mean: {mean:.1f}") + + count = data.groupby('salary')['salary'].count() + weighted_mean = sum([a * b for a, b in zip(list(count.index), list(count))]) / len(data) + print(f"weighted: {weighted_mean:.1f}") + + total = 1 + for i in data.salary: + total *= i + geometric = total ** (1 / len(data)) + print(f"geometric: {geometric:.1f}") + + median = data.iloc[len(data) // 2]['salary'] + print(f"median: {median}") + + counts = dict(zip(list(count.index), list(count))) + mode = max(counts, key=counts.get) + print(f"mode: {mode}") + + variance = sum(((data - mean) ** 2)['salary']) / len(data) + print(f"variance: {variance:.1f}") + + std = math.sqrt(variance) + print(f"std: {std:.2f}") + + z_scores = round((data - mean) / std, 2) + z_scores = list(zip(data.salary, z_scores.salary)) + print(f"z_scores: {z_scores}") + + + coeff_v = std / mean * 100 + print(f"coeff. of var.: {coeff_v:.2f}%") + + q_1 = data.iloc[len(data) // 4]['salary'] + print(f"first quartile: {q_1}") + + q_3 = data.iloc[(len(data) // 4) * 3]['salary'] + print(f"third quartile: {q_3}") + + + data = pd.read_csv(paths('data') / 'hw' / 'a1_q2.csv', sep="|") + mode = (3 *data['median']) - (2 * data['mean']) + print(f"mode: {mode.values}") + + +if __name__ == "__main__": + load_dotenv() + cli()