Skip to content

Commit

Permalink
select regex upd
Browse files Browse the repository at this point in the history
Select based on regex instead of string substitutions. Robust if the column name is a substring of an existing one.
  • Loading branch information
agalitsyna authored Oct 25, 2022
1 parent 3ba2423 commit 1e8d518
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions pairtools/lib/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,11 @@ def evaluate_stream(
for i, col in enumerate(column_names):
if col in TYPES:
col_type = TYPES[col]
condition = condition.replace(col, "{}(COLS[{}])".format(col_type, i))
condition = re.sub(r"\b%s\b" % col , "{}(COLS[{}])".format(col_type, i), condition)
#condition.replace(col, "{}(COLS[{}])".format(col_type, i))
else:
condition = condition.replace(col, "COLS[{}]".format(i))
condition = re.sub(r"\b%s\b" % col, "COLS[{}]".format(i), condition)
#condition = condition.replace(col, "COLS[{}]".format(i))

# Compile the filtering expression:
match_func = compile(condition, "<string>", "eval")
Expand Down Expand Up @@ -121,7 +123,8 @@ def evaluate_df(df, condition, type_cast=(), startup_code=None, engine="pandas")
else:
# Set up the columns indexing
for i, col in enumerate(df.columns):
condition = condition.replace(col, "COLS[{}]".format(i))
condition = re.sub(r"\b%s\b" % col, "COLS[{}]".format(i), condition)
#condition = condition.replace(col, "COLS[{}]".format(i))

filter_passed_output = []
match_func = compile(condition, "<string>", "eval")
Expand Down

0 comments on commit 1e8d518

Please sign in to comment.