-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_parquets.py
32 lines (31 loc) · 1 KB
/
parse_parquets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pandas as pd
import os
dfs = []
parquets_dir = "zkevm_txs" # "zkevm_txs", "arb_txs"
for filename in os.listdir(parquets_dir):
file_path = os.path.join(parquets_dir, filename)
if os.path.isfile(file_path):
try:
df = pd.read_parquet(file_path)
dfs.append(df)
except:
try:
df = pd.read_parquet(file_path, engine="fastparquet")
dfs.append(df)
except:
print(file_path)
final = pd.concat(dfs)
# final.drop(columns=["type"], inplace=True) # saving errors, drop for now
final = final[["from", "to", "value"]]
print("before:", len(final))
# final = final.loc[final["input"] == "0x"]
print("after:", len(final))
# final.to_csv("parsed_sample_zkevm.csv", index=False)
try:
final.to_parquet(
"parsed_sample_zkevm.parquet", engine="fastparquet", index=False, append=False
)
except:
final.to_parquet(
"parsed_sample_zkevm.parquet", engine="pyarrow", index=False, append=False
)