-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPandasHelper.py
62 lines (52 loc) · 2.12 KB
/
PandasHelper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pandas as pd
class PandasDataFrameHelper:
@staticmethod
def analyzeNums4Rows(df):
return df.shape[0]
@staticmethod
def analyzeNums4Cols(df):
return df.shape[1]
@staticmethod
def parseObjectList2DataFrame(objList, obj2DictFunc, desig_col_list=None):
dict_list = list()
for obj in objList:
# dictObj = obj.__dict__
# for key in dictObj.keys():
# print(key)
dict = obj2DictFunc(obj)
dict_list.append(dict)
return PandasDataFrameHelper.parseDictList2DataFrame(dict_list, desig_col_list)
@staticmethod
def parseDictList2DataFrame(dictList, desig_col_list=None):
if dictList == None or len(dictList) == 0:
return None
if desig_col_list == None:
desig_col_list = PandasDataFrameHelper.__obtainDictColList(dictList)
col_values_dict = dict()
for desig_col in desig_col_list:
col_values = col_values_dict.get(desig_col)
if col_values == None:
col_values = list()
for dictInfo in dictList:
col_value = dictInfo.get(desig_col)
col_values.append(col_value)
col_values_dict[desig_col] = col_values
return pd.DataFrame(col_values_dict)
@staticmethod
def __obtainDictColList(dictList):
dict_example = dictList.pop(0)
return list(dict_example.keys())
@staticmethod
def processDataNormalizationByMean(df, desig_col_list):
normalized_df = df
desig_col_df = normalized_df[desig_col_list]
normalized_desig_col_df = (desig_col_df - desig_col_df.mean()) / desig_col_df.std()
normalized_df[desig_col_list] = normalized_desig_col_df
return normalized_df
@staticmethod
def processDataNormalizationByMaxMin(df, desig_col_list):
normalized_df = df
desig_col_df = df[desig_col_list]
normalized_desig_col_df = (desig_col_df - desig_col_df.min()) / (desig_col_df.max() - desig_col_df.min())
normalized_df[desig_col_list] = normalized_desig_col_df
return normalized_df