-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjoin_answers.py
35 lines (23 loc) · 861 Bytes
/
join_answers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- coding: utf-8 -*-
"""It joins training file with answers"""
from data_io import DataIO
import pandas as pd
from os.path import join as path_join
dio = DataIO("Settings_submission.json")
training_data = pd.read_csv(
path_join(dio.data_dir, "data", "raw", "train.csv")
)
data_path = dio.train_file.split("/")[:-1]
filename = path_join(dio.data_dir, "data", "raw", "train_answers.csv")
training_answers = pd.read_csv(filename)
train_with_answers = pd.merge(training_data, training_answers, on='writer')
#train_with_answers.to_csv(
#path_join(dio.data_dir, "data", "processed", "train_w_answers.csv")
#)
store = pd.HDFStore(
path_join(dio.data_dir, "data", "processed", "train_w_answers.h5")
)
store["train_w_answers"] = train_with_answers
test = pd.read_csv(dio.data_dir, "data", "raw", "test.csv")
store["test"] = test
store.close()