Skip to content

Commit

Permalink
Release YDF 0.4.3 and TF-DF 1.9.1
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 631693889
  • Loading branch information
achoum authored and copybara-github committed May 8, 2024
1 parent 12f70f5 commit ca91a9a
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 24 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 1.9.1 - 2024-05-07

### Fix

- Solve dependency collision of YDF Proto between PYDF and TF-DF.

## 1.9.0 - 2024-03-12

### Fix
Expand Down
5 changes: 4 additions & 1 deletion configure/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
This file is used by tools/build_pip_package.sh.
"""

import platform
import setuptools
from setuptools.command.install import install
from setuptools.dist import Distribution

_VERSION = "1.9.0"
_VERSION = "1.9.1"

with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
Expand All @@ -35,6 +36,7 @@
"wheel",
"wurlitzer",
"tf_keras~=2.16",
"ydf",
]


Expand All @@ -54,6 +56,7 @@ def has_ext_modules(self):
def is_pure(self):
return False


try:
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel

Expand Down
1 change: 1 addition & 0 deletions documentation/known_issues.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ The following table shows the compatibility between

tensorflow_decision_forests | tensorflow
--------------------------- | ---------------
1.9.1 | 2.16.1
1.9.0 | 2.16.1
1.8.0 - 1.8.1 | 2.15.0
1.6.0 - 1.7.0 | 2.14.0
Expand Down
2 changes: 1 addition & 1 deletion tensorflow_decision_forests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"""

__version__ = "1.9.0"
__version__ = "1.9.1"
__author__ = "Mathieu Guillame-Bert"

compatible_tf_versions = ["2.16.1"]
Expand Down
72 changes: 63 additions & 9 deletions tensorflow_decision_forests/keras/wrappers_pre_generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,18 @@ class CartModel(core.CoreModel):
expressed in seconds. Each learning algorithm is free to use this
parameter at it sees fit. Enabling maximum training duration makes the
model training non-deterministic. Default: -1.0.
mhld_oblique_max_num_attributes: For MHLD oblique splits i.e.
`split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection.
Increasing this value increases the training time. Decreasing this value
acts as a regularization. The value should be in [2,
num_numerical_features]. If the value is above the total number of
numerical features, the value is capped automatically. The value 1 is
allowed but results in ordinary (non-oblique) splits. Default: None.
mhld_oblique_sample_attributes: For MHLD oblique splits i.e.
`split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling
controlled by the "num_candidate_attributes" or
"num_candidate_attributes_ratio" parameters. If false, all the attributes
are tested. Default: None.
min_examples: Minimum number of examples in a node. Default: 5.
missing_value_policy: Method used to handle missing attribute values. -
`GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
Expand Down Expand Up @@ -345,9 +357,11 @@ class CartModel(core.CoreModel):
split_axis: What structure of split to consider for numerical features. -
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
Sparse oblique splits (i.e. splits one a small number of features) from
"Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default:
"AXIS_ALIGNED".
Sparse oblique splits (i.e. random splits one a small number of features)
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
et al., 2029 Default: "AXIS_ALIGNED".
uplift_min_examples_in_treatment: For uplift models only. Minimum number of
examples per treatment in a node. Default: 5.
uplift_split_score: For uplift models only. Splitter score i.e. score
Expand Down Expand Up @@ -402,6 +416,8 @@ def __init__(
max_num_nodes: Optional[int] = None,
maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0,
maximum_training_duration_seconds: Optional[float] = -1.0,
mhld_oblique_max_num_attributes: Optional[int] = None,
mhld_oblique_sample_attributes: Optional[bool] = None,
min_examples: Optional[int] = 5,
missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION",
num_candidate_attributes: Optional[int] = 0,
Expand Down Expand Up @@ -445,6 +461,8 @@ def __init__(
maximum_model_size_in_memory_in_bytes
),
"maximum_training_duration_seconds": maximum_training_duration_seconds,
"mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes,
"mhld_oblique_sample_attributes": mhld_oblique_sample_attributes,
"min_examples": min_examples,
"missing_value_policy": missing_value_policy,
"num_candidate_attributes": num_candidate_attributes,
Expand Down Expand Up @@ -1124,6 +1142,18 @@ class GradientBoostedTreesModel(core.CoreModel):
expressed in seconds. Each learning algorithm is free to use this
parameter at it sees fit. Enabling maximum training duration makes the
model training non-deterministic. Default: -1.0.
mhld_oblique_max_num_attributes: For MHLD oblique splits i.e.
`split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection.
Increasing this value increases the training time. Decreasing this value
acts as a regularization. The value should be in [2,
num_numerical_features]. If the value is above the total number of
numerical features, the value is capped automatically. The value 1 is
allowed but results in ordinary (non-oblique) splits. Default: None.
mhld_oblique_sample_attributes: For MHLD oblique splits i.e.
`split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling
controlled by the "num_candidate_attributes" or
"num_candidate_attributes_ratio" parameters. If false, all the attributes
are tested. Default: None.
min_examples: Minimum number of examples in a node. Default: 5.
missing_value_policy: Method used to handle missing attribute values. -
`GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
Expand Down Expand Up @@ -1232,9 +1262,11 @@ class GradientBoostedTreesModel(core.CoreModel):
split_axis: What structure of split to consider for numerical features. -
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
Sparse oblique splits (i.e. splits one a small number of features) from
"Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default:
"AXIS_ALIGNED".
Sparse oblique splits (i.e. random splits one a small number of features)
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
et al., 2029 Default: "AXIS_ALIGNED".
subsample: Ratio of the dataset (sampling without replacement) used to train
individual trees for the random sampling method. If \\"subsample\\" is set
and if \\"sampling_method\\" is NOT set or set to \\"NONE\\", then
Expand Down Expand Up @@ -1324,6 +1356,8 @@ def __init__(
max_num_nodes: Optional[int] = None,
maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0,
maximum_training_duration_seconds: Optional[float] = -1.0,
mhld_oblique_max_num_attributes: Optional[int] = None,
mhld_oblique_sample_attributes: Optional[bool] = None,
min_examples: Optional[int] = 5,
missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION",
num_candidate_attributes: Optional[int] = -1,
Expand Down Expand Up @@ -1397,6 +1431,8 @@ def __init__(
maximum_model_size_in_memory_in_bytes
),
"maximum_training_duration_seconds": maximum_training_duration_seconds,
"mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes,
"mhld_oblique_sample_attributes": mhld_oblique_sample_attributes,
"min_examples": min_examples,
"missing_value_policy": missing_value_policy,
"num_candidate_attributes": num_candidate_attributes,
Expand Down Expand Up @@ -2213,6 +2249,18 @@ class RandomForestModel(core.CoreModel):
expressed in seconds. Each learning algorithm is free to use this
parameter at it sees fit. Enabling maximum training duration makes the
model training non-deterministic. Default: -1.0.
mhld_oblique_max_num_attributes: For MHLD oblique splits i.e.
`split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection.
Increasing this value increases the training time. Decreasing this value
acts as a regularization. The value should be in [2,
num_numerical_features]. If the value is above the total number of
numerical features, the value is capped automatically. The value 1 is
allowed but results in ordinary (non-oblique) splits. Default: None.
mhld_oblique_sample_attributes: For MHLD oblique splits i.e.
`split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling
controlled by the "num_candidate_attributes" or
"num_candidate_attributes_ratio" parameters. If false, all the attributes
are tested. Default: None.
min_examples: Minimum number of examples in a node. Default: 5.
missing_value_policy: Method used to handle missing attribute values. -
`GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
Expand Down Expand Up @@ -2315,9 +2363,11 @@ class RandomForestModel(core.CoreModel):
split_axis: What structure of split to consider for numerical features. -
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
Sparse oblique splits (i.e. splits one a small number of features) from
"Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default:
"AXIS_ALIGNED".
Sparse oblique splits (i.e. random splits one a small number of features)
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
et al., 2029 Default: "AXIS_ALIGNED".
uplift_min_examples_in_treatment: For uplift models only. Minimum number of
examples per treatment in a node. Default: 5.
uplift_split_score: For uplift models only. Splitter score i.e. score
Expand Down Expand Up @@ -2380,6 +2430,8 @@ def __init__(
max_num_nodes: Optional[int] = None,
maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0,
maximum_training_duration_seconds: Optional[float] = -1.0,
mhld_oblique_max_num_attributes: Optional[int] = None,
mhld_oblique_sample_attributes: Optional[bool] = None,
min_examples: Optional[int] = 5,
missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION",
num_candidate_attributes: Optional[int] = 0,
Expand Down Expand Up @@ -2433,6 +2485,8 @@ def __init__(
maximum_model_size_in_memory_in_bytes
),
"maximum_training_duration_seconds": maximum_training_duration_seconds,
"mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes,
"mhld_oblique_sample_attributes": mhld_oblique_sample_attributes,
"min_examples": min_examples,
"missing_value_policy": missing_value_policy,
"num_candidate_attributes": num_candidate_attributes,
Expand Down
67 changes: 67 additions & 0 deletions tensorflow_decision_forests/tools/run_e2e_tfdf_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.



# Converts a non-submitted CL to a standalone Bazel project in a local
# directory, compile the project and run the tests.
#
# Usage example:
# third_party/tensorflow_decision_forests/tools/run_e2e_tfdf_test.sh

set -vex

LOCAL_DIR="/usr/local/google/home/${USER}/git/decision-forests"

CL=$(hg exportedcl)
echo "Current CL: ${CL}"
echo "Make sure the CL is synced!"

function export_project() {
COPYBARA="/google/bin/releases/copybara/public/copybara/copybara"

# Test the copy bara configuration.
bazel test third_party/tensorflow_decision_forests:copybara_test

echo "Export a Bazel project locally"
echo "=============================="

rm -fr ${LOCAL_DIR}
${COPYBARA} third_party/tensorflow_decision_forests/copy.bara.sky presubmit_piper_to_gerrit ${CL} \
--dry-run --init-history --squash --force \
--git-destination-path ${LOCAL_DIR} --ignore-noop

/google/bin/releases/opensource/thirdparty/cross/cross ${LOCAL_DIR}
}

echo "Test the project"
echo "================"

run_all() {
cd ${LOCAL_DIR}

# Start the Docker
sudo ./tools/start_compile_docker.sh /bin/bash

# In the docker, you can now trigger the builder with the following line in
# the docker:
# RUN_TESTS=1 PY_VERSION=3.9 TF_VERSION=2.16.1 ./tools/test_bazel.sh

# Alternatively, you can trigger the build directly with:
# sudo ./tools/start_compile_docker.sh "RUN_TESTS=1 PY_VERSION=3.8 TF_VERSION=2.10.0 ./tools/test_bazel.sh && chmod -R a+xrw . && /bin/bash"
}

export_project
run_all
19 changes: 10 additions & 9 deletions tools/build_pip_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,16 @@ function assemble_files() {
# Distribution server binaries
cp ${SRCBIN}/keras/grpc_worker_main ${SRCPK}/tensorflow_decision_forests/keras/

# YDF's proto wrappers.
YDFSRCBIN="bazel-bin/external/ydf/yggdrasil_decision_forests"
mkdir -p ${SRCPK}/yggdrasil_decision_forests
pushd ${YDFSRCBIN}
find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \;
popd

# Add __init__.py to all exported Yggdrasil sub-directories.
find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \;
# Note: Starting with TF-DF 0.9.1, the YDF Protos are included by (P)YDF.
# TODO: Remove this block.
# # YDF's proto wrappers.
# YDFSRCBIN="bazel-bin/external/ydf/yggdrasil_decision_forests"
# mkdir -p ${SRCPK}/yggdrasil_decision_forests
# pushd ${YDFSRCBIN}
# find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \;
# popd
# # Add __init__.py to all exported Yggdrasil sub-directories.
# find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \;
}

# Build a pip package.
Expand Down
16 changes: 12 additions & 4 deletions tools/start_compile_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,23 @@
# ./tools/build_pip_package.sh ALL_VERSIONS_ALREADY_ASSEMBLED
#
# https://hub.docker.com/r/tensorflow/build/tags?page=1
DOCKER=tensorflow/build:2.17-python3.9

# Current directory
# Useful if Yggdrasil Decision Forests is available locally in a neighbor
# directory.
TFDF_DIRNAME=${PWD##*/}

# Download docker
docker pull ${DOCKER}
DOCKER_IMAGE=tensorflow/build:2.16-python3.9
DOCKER_CONTAINER=compile_tfdf

echo "Available containers:"
sudo sudo docker container ls -a --size

set +e # Ignore error if the container already exist
CREATE_DOCKER_FLAGS="-i -t -p 8889:8889 --network host -v ${PWD}/..:/working_dir -w /working_dir/${TFDF_DIRNAME}"
sudo docker create ${CREATE_DOCKER_FLAGS} --name ${DOCKER_CONTAINER} ${DOCKER_IMAGE}
sudo docker start ${DOCKER_CONTAINER}
set -e

# Start docker
docker run -it -v ${PWD}/..:/working_dir -w /working_dir/${TFDF_DIRNAME} ${DOCKER} $@
sudo docker exec -it ${DOCKER_CONTAINER} /bin/bash -c $@

0 comments on commit ca91a9a

Please sign in to comment.