add hw assignment 1
This commit is contained in:
parent
81f4f37c9d
commit
7cb5b392d1
|
@ -0,0 +1,62 @@
|
||||||
|
import click
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from data.main import paths, connect
|
||||||
|
import pandas as pd
|
||||||
|
import math
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
def cli():
|
||||||
|
...
|
||||||
|
|
||||||
|
@cli.command('hw1:simple')
|
||||||
|
def simple_mean():
|
||||||
|
data = pd.read_csv(paths('data') / 'hw' / 'q1.csv', sep="|").sort_values('salary').reset_index(drop=True)
|
||||||
|
mean = sum(data.salary) / len(data.salary)
|
||||||
|
print(f"mean: {mean:.1f}")
|
||||||
|
|
||||||
|
count = data.groupby('salary')['salary'].count()
|
||||||
|
weighted_mean = sum([a * b for a, b in zip(list(count.index), list(count))]) / len(data)
|
||||||
|
print(f"weighted: {weighted_mean:.1f}")
|
||||||
|
|
||||||
|
total = 1
|
||||||
|
for i in data.salary:
|
||||||
|
total *= i
|
||||||
|
geometric = total ** (1 / len(data))
|
||||||
|
print(f"geometric: {geometric:.1f}")
|
||||||
|
|
||||||
|
median = data.iloc[len(data) // 2]['salary']
|
||||||
|
print(f"median: {median}")
|
||||||
|
|
||||||
|
counts = dict(zip(list(count.index), list(count)))
|
||||||
|
mode = max(counts, key=counts.get)
|
||||||
|
print(f"mode: {mode}")
|
||||||
|
|
||||||
|
variance = sum(((data - mean) ** 2)['salary']) / len(data)
|
||||||
|
print(f"variance: {variance:.1f}")
|
||||||
|
|
||||||
|
std = math.sqrt(variance)
|
||||||
|
print(f"std: {std:.2f}")
|
||||||
|
|
||||||
|
z_scores = round((data - mean) / std, 2)
|
||||||
|
z_scores = list(zip(data.salary, z_scores.salary))
|
||||||
|
print(f"z_scores: {z_scores}")
|
||||||
|
|
||||||
|
|
||||||
|
coeff_v = std / mean * 100
|
||||||
|
print(f"coeff. of var.: {coeff_v:.2f}%")
|
||||||
|
|
||||||
|
q_1 = data.iloc[len(data) // 4]['salary']
|
||||||
|
print(f"first quartile: {q_1}")
|
||||||
|
|
||||||
|
q_3 = data.iloc[(len(data) // 4) * 3]['salary']
|
||||||
|
print(f"third quartile: {q_3}")
|
||||||
|
|
||||||
|
|
||||||
|
data = pd.read_csv(paths('data') / 'hw' / 'a1_q2.csv', sep="|")
|
||||||
|
mode = (3 *data['median']) - (2 * data['mean'])
|
||||||
|
print(f"mode: {mode.values}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
load_dotenv()
|
||||||
|
cli()
|
Loading…
Reference in New Issue