From 4e4ec4c8ab8ea2e4340ae94f0f6c3f51653bccbe Mon Sep 17 00:00:00 2001
From: James Widman <james.widman@gmail.com>
Date: Fri, 4 Oct 2024 16:45:44 -0700
Subject: [PATCH 01/12] Introduce new tool: `-t compdb-targets`

Fixes #1544

Co-authored-by: Linkun Chen <lkchen@google.com>
Co-authored-by: csmoe <csmoe@msn.com>
Co-authored-by: James Widman <james.widman@gmail.com>
---
 doc/manual.asciidoc     |   5 ++
 misc/output_test.py     | 162 +++++++++++++++++++++++++++++-----------
 src/command_collector.h |  65 ++++++++++++++++
 src/graph_test.cc       |  49 ++++++++++++
 src/ninja.cc            | 148 +++++++++++++++++++++++++++++++-----
 src/util.cc             |  16 ++++
 src/util.h              |   3 +
 7 files changed, 386 insertions(+), 62 deletions(-)
 create mode 100644 src/command_collector.h

diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc
index e8b66807cf..bcd83a3f2f 100644
--- a/doc/manual.asciidoc
+++ b/doc/manual.asciidoc
@@ -289,6 +289,11 @@ http://clang.llvm.org/docs/JSONCompilationDatabase.html[JSON format] expected
 by the Clang tooling interface.
 _Available since Ninja 1.2._
 
+`compdb-targets`:: like `compdb`, but takes a list of targets instead of rules,
+and expects at least one target. The resulting compilation database contains
+all commands required to build the indicated targets, and _only_ those
+commands.
+
 `deps`:: show all dependencies stored in the `.ninja_deps` file. When given a
 target, show just the target's dependencies. _Available since Ninja 1.4._
 
diff --git a/misc/output_test.py b/misc/output_test.py
index 81e49067c8..b9ded383f1 100755
--- a/misc/output_test.py
+++ b/misc/output_test.py
@@ -12,7 +12,7 @@
 import tempfile
 import unittest
 from textwrap import dedent
-from typing import Dict
+import typing as T
 
 default_env = dict(os.environ)
 default_env.pop('NINJA_STATUS', None)
@@ -20,6 +20,41 @@
 default_env['TERM'] = ''
 NINJA_PATH = os.path.abspath('./ninja')
 
+def remove_non_visible_lines(raw_output: bytes) -> str:
+  # When running in a smart terminal, Ninja uses CR (\r) to
+  # return the cursor to the start of the current line, prints
+  # something, then uses `\x1b[K` to clear everything until
+  # the end of the line.
+  #
+  # Thus printing 'FOO', 'BAR', 'ZOO' on the same line, then
+  # jumping to the next one results in the following output
+  # on Posix:
+  #
+  # '\rFOO\x1b[K\rBAR\x1b[K\rZOO\x1b[K\r\n'
+  #
+  # The following splits the output at both \r, \n and \r\n
+  # boundaries, which gives:
+  #
+  #  [ '\r', 'FOO\x1b[K\r', 'BAR\x1b[K\r', 'ZOO\x1b[K\r\n' ]
+  #
+  decoded_lines = raw_output.decode('utf-8').splitlines(True)
+
+  # Remove any item that ends with a '\r' as this means its
+  # content will be overwritten by the next item in the list.
+  # For the previous example, this gives:
+  #
+  #  [ 'ZOO\x1b[K\r\n' ]
+  #
+  final_lines = [ l for l in decoded_lines if not l.endswith('\r') ]
+
+  # Return a single string that concatenates all filtered lines
+  # while removing any remaining \r in it. Needed to transform
+  # \r\n into \n.
+  #
+  #  "ZOO\x1b[K\n'
+  #
+  return ''.join(final_lines).replace('\r', '')
+
 class BuildDir:
     def __init__(self, build_ninja: str):
         self.build_ninja = dedent(build_ninja)
@@ -35,12 +70,18 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.d.cleanup()
 
+    @property
+    def path(self) -> str:
+        return os.path.realpath(self.d.name)
+
+
     def run(
         self,
-        flags: str = '',
+        flags: T.Optional[str] = None,
         pipe: bool = False,
         raw_output: bool = False,
-        env: Dict[str, str] = default_env,
+        env: T.Dict[str, str] = default_env,
+        print_err_output = True,
     ) -> str:
         """Run Ninja command, and get filtered output.
 
@@ -56,13 +97,17 @@ def run(
 
           env: Optional environment dictionary to run the command in.
 
+          print_err_output: set to False if the test expects ninja to print
+            something to stderr. (Otherwise, an error message from Ninja
+            probably represents a failed test.)
+
         Returns:
           A UTF-8 string corresponding to the output (stdout only) of the
           Ninja command. By default, partial lines that were overwritten
           are removed according to the rules described in the comments
           below.
         """
-        ninja_cmd = '{} {}'.format(NINJA_PATH, flags)
+        ninja_cmd = '{} {}'.format(NINJA_PATH, flags if flags else '')
         try:
             if pipe:
                 output = subprocess.check_output(
@@ -74,57 +119,27 @@ def run(
                 output = subprocess.check_output(['script', '-qfec', ninja_cmd, '/dev/null'],
                                                  cwd=self.d.name, env=env)
         except subprocess.CalledProcessError as err:
-            sys.stdout.buffer.write(err.output)
+            if print_err_output:
+              sys.stdout.buffer.write(err.output)
+            err.cooked_output = remove_non_visible_lines(err.output)
             raise err
 
         if raw_output:
             return output.decode('utf-8')
-
-        # When running in a smart terminal, Ninja uses CR (\r) to
-        # return the cursor to the start of the current line, prints
-        # something, then uses `\x1b[K` to clear everything until
-        # the end of the line.
-        #
-        # Thus printing 'FOO', 'BAR', 'ZOO' on the same line, then
-        # jumping to the next one results in the following output
-        # on Posix:
-        #
-        # '\rFOO\x1b[K\rBAR\x1b[K\rZOO\x1b[K\r\n'
-        #
-        # The following splits the output at both \r, \n and \r\n
-        # boundaries, which gives:
-        #
-        #  [ '\r', 'FOO\x1b[K\r', 'BAR\x1b[K\r', 'ZOO\x1b[K\r\n' ]
-        #
-        decoded_lines = output.decode('utf-8').splitlines(True)
-
-        # Remove any item that ends with a '\r' as this means its
-        # content will be overwritten by the next item in the list.
-        # For the previous example, this gives:
-        #
-        #  [ 'ZOO\x1b[K\r\n' ]
-        #
-        final_lines = [ l for l in decoded_lines if not l.endswith('\r') ]
-
-        # Return a single string that concatenates all filtered lines
-        # while removing any remaining \r in it. Needed to transform
-        # \r\n into \n.
-        #
-        #  "ZOO\x1b[K\n'
-        #
-        return ''.join(final_lines).replace('\r', '')
+        return remove_non_visible_lines(output)
 
 def run(
     build_ninja: str,
-    flags: str = '',
+    flags: T.Optional[str] = None,
     pipe: bool = False,
     raw_output: bool = False,
-    env: Dict[str, str] = default_env,
+    env: T.Dict[str, str] = default_env,
+    print_err_output = True,
 ) -> str:
     """Run Ninja with a given build plan in a temporary directory.
     """
     with BuildDir(build_ninja) as b:
-        return b.run(flags, pipe, raw_output, env)
+        return b.run(flags, pipe, raw_output, env, print_err_output)
 
 @unittest.skipIf(platform.system() == 'Windows', 'These test methods do not work on Windows')
 class Output(unittest.TestCase):
@@ -137,6 +152,16 @@ class Output(unittest.TestCase):
         '',
     ))
 
+    def _test_expected_error(self, plan: str, flags: T.Optional[str], expected: str):
+        """Run Ninja with a given plan and flags, and verify its cooked output against an expected content.
+        """
+        actual = ''
+        try:
+          actual = run(plan, flags, print_err_output=False)
+        except subprocess.CalledProcessError as err:
+          actual = err.cooked_output
+        self.assertEqual(expected, actual)
+
     def test_issue_1418(self) -> None:
         self.assertEqual(run(
 '''rule echo
@@ -371,6 +396,59 @@ def test_tool_inputs(self) -> None:
         )
 
 
+    def test_tool_compdb_targets(self) -> None:
+        plan = '''
+rule cat
+  command = cat $in $out
+build out1 : cat in1
+build out2 : cat in2 out1
+build out3 : cat out2 out1
+build out4 : cat in4
+'''
+
+
+        self._test_expected_error(plan, '-t compdb-targets',
+'''ninja: error: compdb-targets expects the name of at least one target
+usage: ninja -t compdb [-hx] target [targets]
+
+options:
+  -h     display this help message
+  -x     expand @rspfile style response file invocations
+''')
+
+        self._test_expected_error(plan, '-t compdb-targets in1',
+            "ninja: fatal: 'in1' is not a target (i.e. it is not an output of any `build` statement)\n")
+
+        self._test_expected_error(plan, '-t compdb-targets nonexistent_target',
+            "ninja: fatal: unknown target 'nonexistent_target'\n")
+
+
+        with BuildDir(plan) as b:
+            actual = b.run(flags='-t compdb-targets out3')
+            expected = f'''[
+  {{
+    "directory": "{b.path}",
+    "command": "cat in1 out1",
+    "file": "in1",
+    "output": "out1"
+  }},
+  {{
+    "directory": "{b.path}",
+    "command": "cat in2 out1 out2",
+    "file": "in2",
+    "output": "out2"
+  }},
+  {{
+    "directory": "{b.path}",
+    "command": "cat out2 out1 out3",
+    "file": "out2",
+    "output": "out3"
+  }}
+]
+'''
+            self.assertEqual(expected, actual)
+
+
     def test_explain_output(self):
         b = BuildDir('''\
             build .FORCE: phony
diff --git a/src/command_collector.h b/src/command_collector.h
new file mode 100644
index 0000000000..003af9fbb7
--- /dev/null
+++ b/src/command_collector.h
@@ -0,0 +1,65 @@
+// Copyright 2024 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef NINJA_COMMAND_COLLECTOR_H_
+#define NINJA_COMMAND_COLLECTOR_H_
+
+#include <cassert>
+#include <unordered_set>
+#include <vector>
+
+#include "graph.h"
+
+/// Collects the transitive set of edges that lead into a given set
+/// of starting nodes. Used to implement the `compdb-targets` tool.
+///
+/// When collecting inputs, the outputs of phony edges are always ignored
+/// from the result, but are followed by the dependency walk.
+///
+/// Usage is:
+/// - Create instance.
+/// - Call CollectFrom() for each root node to collect edges from.
+/// - Call TakeResult() to retrieve the list of edges.
+///
+struct CommandCollector {
+  void CollectFrom(const Node* node) {
+    assert(node);
+
+    if (!visited_nodes_.insert(node).second)
+      return;
+
+    Edge* edge = node->in_edge();
+    if (!edge || !visited_edges_.insert(edge).second)
+      return;
+
+    for (Node* input_node : edge->inputs_)
+      CollectFrom(input_node);
+
+    if (!edge->is_phony())
+      in_edges.push_back(edge);
+  }
+
+ private:
+  std::unordered_set<const Node*> visited_nodes_;
+  std::unordered_set<Edge*> visited_edges_;
+
+  /// we use a vector to preserve order from requisites to their dependents.
+  /// This may help LSP server performance in languages that support modules,
+  /// but it also ensures that the output of `-t compdb-targets foo` is
+  /// consistent, which is useful in regression tests.
+ public:
+  std::vector<Edge*> in_edges;
+};
+
+#endif  //  NINJA_COMMAND_COLLECTOR_H_
diff --git a/src/graph_test.cc b/src/graph_test.cc
index 6c654eeb32..6483410e99 100644
--- a/src/graph_test.cc
+++ b/src/graph_test.cc
@@ -15,6 +15,7 @@
 #include "graph.h"
 
 #include "build.h"
+#include "command_collector.h"
 #include "test.h"
 
 using namespace std;
@@ -310,6 +311,54 @@ TEST_F(GraphTest, InputsCollectorWithEscapes) {
   EXPECT_EQ("order_only", inputs[4]);
 }
 
+TEST_F(GraphTest, CommandCollector) {
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
+                                      "build out1: cat in1\n"
+                                      "build mid1: cat in1\n"
+                                      "build out2: cat mid1\n"
+                                      "build out3 out4: cat mid1\n"
+                                      "build all: phony out1 out2 out3\n"));
+  {
+    CommandCollector collector;
+    auto& edges = collector.in_edges;
+
+    // Start visit from out2; this should add `build mid1` and `build out2` to
+    // the edge list.
+    collector.CollectFrom(GetNode("out2"));
+    ASSERT_EQ(2u, edges.size());
+    EXPECT_EQ("cat in1 > mid1", edges[0]->EvaluateCommand());
+    EXPECT_EQ("cat mid1 > out2", edges[1]->EvaluateCommand());
+
+    // Add a visit from out1, this should append `build out1`
+    collector.CollectFrom(GetNode("out1"));
+    ASSERT_EQ(3u, edges.size());
+    EXPECT_EQ("cat in1 > out1", edges[2]->EvaluateCommand());
+
+    // Another visit from all; this should add edges for out1, out2 and out3,
+    // but not all (because it's phony).
+    collector.CollectFrom(GetNode("all"));
+    ASSERT_EQ(4u, edges.size());
+    EXPECT_EQ("cat in1 > mid1", edges[0]->EvaluateCommand());
+    EXPECT_EQ("cat mid1 > out2", edges[1]->EvaluateCommand());
+    EXPECT_EQ("cat in1 > out1", edges[2]->EvaluateCommand());
+    EXPECT_EQ("cat mid1 > out3 out4", edges[3]->EvaluateCommand());
+  }
+
+  {
+    CommandCollector collector;
+    auto& edges = collector.in_edges;
+
+    // Starting directly from all, will add `build out1` before `build mid1`
+    // compared to the previous example above.
+    collector.CollectFrom(GetNode("all"));
+    ASSERT_EQ(4u, edges.size());
+    EXPECT_EQ("cat in1 > out1", edges[0]->EvaluateCommand());
+    EXPECT_EQ("cat in1 > mid1", edges[1]->EvaluateCommand());
+    EXPECT_EQ("cat mid1 > out2", edges[2]->EvaluateCommand());
+    EXPECT_EQ("cat mid1 > out3 out4", edges[3]->EvaluateCommand());
+  }
+}
+
 TEST_F(GraphTest, VarInOutPathEscaping) {
   ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
 "build a$ b: cat no'space with$ space$$ no\"space2\n"));
diff --git a/src/ninja.cc b/src/ninja.cc
index 7885bb3682..93c0ca6a2a 100644
--- a/src/ninja.cc
+++ b/src/ninja.cc
@@ -20,6 +20,8 @@
 
 #include <algorithm>
 #include <cstdlib>
+#include <cstring>
+#include <string>
 
 #ifdef _WIN32
 #include "getopt.h"
@@ -38,6 +40,7 @@
 #include "build_log.h"
 #include "deps_log.h"
 #include "clean.h"
+#include "command_collector.h"
 #include "debug_flags.h"
 #include "depfile_parser.h"
 #include "disk_interface.h"
@@ -130,6 +133,8 @@ struct NinjaMain : public BuildLogUser {
   int ToolClean(const Options* options, int argc, char* argv[]);
   int ToolCleanDead(const Options* options, int argc, char* argv[]);
   int ToolCompilationDatabase(const Options* options, int argc, char* argv[]);
+  int ToolCompilationDatabaseForTargets(const Options* options, int argc,
+                                        char* argv[]);
   int ToolRecompact(const Options* options, int argc, char* argv[]);
   int ToolRestat(const Options* options, int argc, char* argv[]);
   int ToolUrtle(const Options* options, int argc, char** argv);
@@ -932,8 +937,8 @@ std::string EvaluateCommandWithRspfile(const Edge* edge,
   return command;
 }
 
-void printCompdb(const char* const directory, const Edge* const edge,
-                 const EvaluateCommandMode eval_mode) {
+void PrintOneCompdbObject(std::string const& directory, const Edge* const edge,
+                          const EvaluateCommandMode eval_mode) {
   printf("\n  {\n    \"directory\": \"");
   PrintJSONString(directory);
   printf("\",\n    \"command\": \"");
@@ -977,37 +982,25 @@ int NinjaMain::ToolCompilationDatabase(const Options* options, int argc,
   argc -= optind;
 
   bool first = true;
-  vector<char> cwd;
-  char* success = NULL;
-
-  do {
-    cwd.resize(cwd.size() + 1024);
-    errno = 0;
-    success = getcwd(&cwd[0], cwd.size());
-  } while (!success && errno == ERANGE);
-  if (!success) {
-    Error("cannot determine working directory: %s", strerror(errno));
-    return 1;
-  }
 
+  std::string directory = GetWorkingDirectory();
   putchar('[');
-  for (vector<Edge*>::iterator e = state_.edges_.begin();
-       e != state_.edges_.end(); ++e) {
-    if ((*e)->inputs_.empty())
+  for (const Edge* edge : state_.edges_) {
+    if (edge->inputs_.empty())
       continue;
     if (argc == 0) {
       if (!first) {
         putchar(',');
       }
-      printCompdb(&cwd[0], *e, eval_mode);
+      PrintOneCompdbObject(directory, edge, eval_mode);
       first = false;
     } else {
       for (int i = 0; i != argc; ++i) {
-        if ((*e)->rule_->name() == argv[i]) {
+        if (edge->rule_->name() == argv[i]) {
           if (!first) {
             putchar(',');
           }
-          printCompdb(&cwd[0], *e, eval_mode);
+          PrintOneCompdbObject(directory, edge, eval_mode);
           first = false;
         }
       }
@@ -1087,6 +1080,118 @@ int NinjaMain::ToolRestat(const Options* options, int argc, char* argv[]) {
   return EXIT_SUCCESS;
 }
 
+struct CompdbTargets {
+  enum class Action { kDisplayHelpAndExit, kEmitCommands };
+
+  Action action;
+  EvaluateCommandMode eval_mode = ECM_NORMAL;
+
+  std::vector<std::string> targets;
+
+  static CompdbTargets CreateFromArgs(int argc, char* argv[]) {
+    //
+    // grammar:
+    //     ninja -t compdb-targets [-hx] target [targets]
+    //
+    CompdbTargets ret;
+
+    // getopt_long() expects argv[0] to contain the name of
+    // the tool, i.e. "compdb-targets".
+    argc++;
+    argv--;
+
+    // Phase 1: parse options:
+    optind = 1;  // see `man 3 getopt` for documentation on optind
+    int opt;
+    while ((opt = getopt(argc, argv, const_cast<char*>("hx"))) != -1) {
+      switch (opt) {
+      case 'x':
+        ret.eval_mode = ECM_EXPAND_RSPFILE;
+        break;
+      case 'h':
+      default:
+        ret.action = CompdbTargets::Action::kDisplayHelpAndExit;
+        return ret;
+      }
+    }
+
+    // Phase 2: parse operands:
+    int const targets_begin = optind;
+    int const targets_end = argc;
+
+    if (targets_begin == targets_end) {
+      Error("compdb-targets expects the name of at least one target");
+      ret.action = CompdbTargets::Action::kDisplayHelpAndExit;
+    } else {
+      ret.action = CompdbTargets::Action::kEmitCommands;
+      for (int i = targets_begin; i < targets_end; ++i) {
+        ret.targets.push_back(argv[i]);
+      }
+    }
+
+    return ret;
+  }
+};
+
+void PrintCompdb(std::string const& directory, std::vector<Edge*> const& edges,
+                 const EvaluateCommandMode eval_mode) {
+  putchar('[');
+
+  bool first = true;
+  for (const Edge* edge : edges) {
+    if (edge->is_phony() || edge->inputs_.empty())
+      continue;
+    if (!first)
+      putchar(',');
+    PrintOneCompdbObject(directory, edge, eval_mode);
+    first = false;
+  }
+
+  puts("\n]");
+}
+
+int NinjaMain::ToolCompilationDatabaseForTargets(const Options* options,
+                                                 int argc, char* argv[]) {
+  auto compdb = CompdbTargets::CreateFromArgs(argc, argv);
+
+  switch (compdb.action) {
+  case CompdbTargets::Action::kDisplayHelpAndExit: {
+    printf(
+        "usage: ninja -t compdb [-hx] target [targets]\n"
+        "\n"
+        "options:\n"
+        "  -h     display this help message\n"
+        "  -x     expand @rspfile style response file invocations\n");
+    return 1;
+  }
+
+  case CompdbTargets::Action::kEmitCommands: {
+    CommandCollector collector;
+
+    for (const std::string& target_arg : compdb.targets) {
+      std::string err;
+      Node* node = CollectTarget(target_arg.c_str(), &err);
+      if (!node) {
+        Fatal("%s", err.c_str());
+        return 1;
+      }
+      if (!node->in_edge()) {
+        Fatal(
+            "'%s' is not a target "
+            "(i.e. it is not an output of any `build` statement)",
+            node->path().c_str());
+      }
+      collector.CollectFrom(node);
+    }
+
+    std::string directory = GetWorkingDirectory();
+    PrintCompdb(directory, collector.in_edges, compdb.eval_mode);
+  } break;
+  }
+
+  return 0;
+}
+
 int NinjaMain::ToolUrtle(const Options* options, int argc, char** argv) {
   // RLE encoded.
   const char* urtle =
@@ -1141,6 +1246,9 @@ const Tool* ChooseTool(const string& tool_name) {
       Tool::RUN_AFTER_LOAD, &NinjaMain::ToolTargets },
     { "compdb",  "dump JSON compilation database to stdout",
       Tool::RUN_AFTER_LOAD, &NinjaMain::ToolCompilationDatabase },
+    { "compdb-targets",
+      "dump JSON compilation database for a given list of targets to stdout",
+      Tool::RUN_AFTER_LOAD, &NinjaMain::ToolCompilationDatabaseForTargets },
     { "recompact",  "recompacts ninja-internal data structures",
       Tool::RUN_AFTER_LOAD, &NinjaMain::ToolRecompact },
     { "restat",  "restats all outputs in the build log",
diff --git a/src/util.cc b/src/util.cc
index ac1b14e55f..70421bf119 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -21,6 +21,7 @@
 #include <windows.h>
 #include <io.h>
 #include <share.h>
+#include <direct.h>
 #endif
 
 #include <assert.h>
@@ -917,6 +918,21 @@ double GetLoadAverage() {
 }
 #endif // _WIN32
 
+std::string GetWorkingDirectory() {
+  std::string ret;
+  char* success = NULL;
+  do {
+    ret.resize(ret.size() + 1024);
+    errno = 0;
+    success = getcwd(&ret[0], ret.size());
+  } while (!success && errno == ERANGE);
+  if (!success) {
+    Fatal("cannot determine working directory: %s", strerror(errno));
+  }
+  ret.resize(strlen(&ret[0]));
+  return ret;
+}
+
 bool Truncate(const string& path, size_t size, string* err) {
 #ifdef _WIN32
   int fh = _sopen(path.c_str(), _O_RDWR | _O_CREAT, _SH_DENYNO,
diff --git a/src/util.h b/src/util.h
index 211a43d348..b38578c326 100644
--- a/src/util.h
+++ b/src/util.h
@@ -102,6 +102,9 @@ int GetProcessorCount();
 /// on error.
 double GetLoadAverage();
 
+/// a wrapper for getcwd()
+std::string GetWorkingDirectory();
+
 /// Truncates a file to the given size.
 bool Truncate(const std::string& path, size_t size, std::string* err);
 

From b785947b1f3e986cdc17b3cf0744ca79ad6ea88a Mon Sep 17 00:00:00 2001
From: James Widman <james.widman@gmail.com>
Date: Sun, 13 Oct 2024 01:31:01 -0700
Subject: [PATCH 02/12] Fix display of labels in a `[horizontal]` list

The introduction of the entry for `compdb-targets` in the `[horizontal]`
labeled list in doc/manual.asciidoc revealed some display issues in the
left column:

First, the web browser would insert a line break in the middle of the
label `compdb-targets`, so that it looked like this:

       compdb-
       targets

We fix this by applying the `white-space: nowrap` attribute to the left
column.

After this is fixed, we see practically no space between the end of the
longest label and the beginning of the text in the second column; we fix
this with the `padding-right` attribute.

Finally, we align all labels to the right side of the column so that
there is a consistent amount of horizontal space between the end of each
label and the beginning of the text in the second column.
---
 doc/style.css | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/style.css b/doc/style.css
index 363e272b24..2be09de8b1 100644
--- a/doc/style.css
+++ b/doc/style.css
@@ -53,3 +53,16 @@ div.chapter {
 p {
     margin-top: 0;
 }
+
+/* The following applies to the left column of a [horizontal] labeled list: */
+table.horizontal > tbody > tr > td:nth-child(1) {
+
+    /* prevent the insertion of a line-break in the middle of a label: */
+    white-space: nowrap;
+
+    /* insert a little horizontal padding between the two columns: */
+    padding-right: 1.5em;
+
+    /* right-justify labels: */
+    text-align: end;
+}

From d85cfed45295c78cbd3f3a3c7bd4a186b279f31f Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Fri, 1 Nov 2024 11:39:06 +0100
Subject: [PATCH 03/12] Reuse some vectors in ManifestParser.

For a no-op build of Chromium (Linux, Zen 2),
this reduces time spent from 5.76 to 5.48 seconds.
---
 src/manifest_parser.cc | 44 +++++++++++++++++++++++-------------------
 src/manifest_parser.h  |  9 +++++++++
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/src/manifest_parser.cc b/src/manifest_parser.cc
index c4b2980164..373dc65a83 100644
--- a/src/manifest_parser.cc
+++ b/src/manifest_parser.cc
@@ -209,14 +209,16 @@ bool ManifestParser::ParseDefault(string* err) {
 }
 
 bool ManifestParser::ParseEdge(string* err) {
-  vector<EvalString> ins, outs, validations;
+  ins_.clear();
+  outs_.clear();
+  validations_.clear();
 
   {
     EvalString out;
     if (!lexer_.ReadPath(&out, err))
       return false;
     while (!out.empty()) {
-      outs.push_back(out);
+      outs_.push_back(std::move(out));
 
       out.Clear();
       if (!lexer_.ReadPath(&out, err))
@@ -233,12 +235,12 @@ bool ManifestParser::ParseEdge(string* err) {
         return false;
       if (out.empty())
         break;
-      outs.push_back(out);
+      outs_.push_back(std::move(out));
       ++implicit_outs;
     }
   }
 
-  if (outs.empty())
+  if (outs_.empty())
     return lexer_.Error("expected path", err);
 
   if (!ExpectToken(Lexer::COLON, err))
@@ -259,7 +261,7 @@ bool ManifestParser::ParseEdge(string* err) {
       return false;
     if (in.empty())
       break;
-    ins.push_back(in);
+    ins_.push_back(std::move(in));
   }
 
   // Add all implicit deps, counting how many as we go.
@@ -271,7 +273,7 @@ bool ManifestParser::ParseEdge(string* err) {
         return false;
       if (in.empty())
         break;
-      ins.push_back(in);
+      ins_.push_back(std::move(in));
       ++implicit;
     }
   }
@@ -285,7 +287,7 @@ bool ManifestParser::ParseEdge(string* err) {
         return false;
       if (in.empty())
         break;
-      ins.push_back(in);
+      ins_.push_back(std::move(in));
       ++order_only;
     }
   }
@@ -298,7 +300,7 @@ bool ManifestParser::ParseEdge(string* err) {
         return false;
       if (validation.empty())
         break;
-      validations.push_back(validation);
+      validations_.push_back(std::move(validation));
     }
   }
 
@@ -329,9 +331,9 @@ bool ManifestParser::ParseEdge(string* err) {
     edge->pool_ = pool;
   }
 
-  edge->outputs_.reserve(outs.size());
-  for (size_t i = 0, e = outs.size(); i != e; ++i) {
-    string path = outs[i].Evaluate(env);
+  edge->outputs_.reserve(outs_.size());
+  for (size_t i = 0, e = outs_.size(); i != e; ++i) {
+    string path = outs_[i].Evaluate(env);
     if (path.empty())
       return lexer_.Error("empty path", err);
     uint64_t slash_bits;
@@ -351,8 +353,8 @@ bool ManifestParser::ParseEdge(string* err) {
   }
   edge->implicit_outs_ = implicit_outs;
 
-  edge->inputs_.reserve(ins.size());
-  for (vector<EvalString>::iterator i = ins.begin(); i != ins.end(); ++i) {
+  edge->inputs_.reserve(ins_.size());
+  for (vector<EvalString>::iterator i = ins_.begin(); i != ins_.end(); ++i) {
     string path = i->Evaluate(env);
     if (path.empty())
       return lexer_.Error("empty path", err);
@@ -363,9 +365,9 @@ bool ManifestParser::ParseEdge(string* err) {
   edge->implicit_deps_ = implicit;
   edge->order_only_deps_ = order_only;
 
-  edge->validations_.reserve(validations.size());
-  for (std::vector<EvalString>::iterator v = validations.begin();
-      v != validations.end(); ++v) {
+  edge->validations_.reserve(validations_.size());
+  for (std::vector<EvalString>::iterator v = validations_.begin();
+      v != validations_.end(); ++v) {
     string path = v->Evaluate(env);
     if (path.empty())
       return lexer_.Error("empty path", err);
@@ -419,14 +421,16 @@ bool ManifestParser::ParseFileInclude(bool new_scope, string* err) {
     return false;
   string path = eval.Evaluate(env_);
 
-  ManifestParser subparser(state_, file_reader_, options_);
+  if (subparser_ == nullptr) {
+    subparser_.reset(new ManifestParser(state_, file_reader_, options_));
+  }
   if (new_scope) {
-    subparser.env_ = new BindingEnv(env_);
+    subparser_->env_ = new BindingEnv(env_);
   } else {
-    subparser.env_ = env_;
+    subparser_->env_ = env_;
   }
 
-  if (!subparser.Load(path, err, &lexer_))
+  if (!subparser_->Load(path, err, &lexer_))
     return false;
 
   if (!ExpectToken(Lexer::NEWLINE, err))
diff --git a/src/manifest_parser.h b/src/manifest_parser.h
index db6812dce4..ce37759676 100644
--- a/src/manifest_parser.h
+++ b/src/manifest_parser.h
@@ -17,6 +17,9 @@
 
 #include "parser.h"
 
+#include <memory>
+#include <vector>
+
 struct BindingEnv;
 struct EvalString;
 
@@ -63,6 +66,12 @@ struct ManifestParser : public Parser {
   BindingEnv* env_;
   ManifestParserOptions options_;
   bool quiet_;
+
+  // ins_/out_/validations_ are reused across invocations to ParseEdge(),
+  // to save on the otherwise constant memory reallocation.
+  // subparser_ is reused solely to get better reuse out ins_/outs_/validation_.
+  std::unique_ptr<ManifestParser> subparser_;
+  std::vector<EvalString> ins_, outs_, validations_;
 };
 
 #endif  // NINJA_MANIFEST_PARSER_H_

From 40efd00fc08c8117294c48c76646a11130f45c69 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Thu, 31 Oct 2024 15:59:10 +0100
Subject: [PATCH 04/12] Apply a short-vector optimization to EvalString.

This very often holds only a single RAW token, so we do not
need to allocate elements on an std::vector for it in the
common case.

For a no-op build of Chromium (Linux, Zen 2),
this reduces time spent from 5.48 to 5.14 seconds.

Note that this opens up for a potential optimization where
EvalString::Evaluate() could just return a StringPiece, without
making a std::string out of it (which requires allocation; this is
about 5% of remaining runtime). However, this would also require
that CanonicalizePath() somehow learned to work with StringPiece
(presumably allocating a new StringPiece if and only if changes
were needed).
---
 src/eval_env.cc    | 57 +++++++++++++++++++++++++++++++---------------
 src/eval_env.h     | 10 ++++++--
 src/string_piece.h |  4 ++++
 3 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/src/eval_env.cc b/src/eval_env.cc
index 796a3264d1..cbc935acc2 100644
--- a/src/eval_env.cc
+++ b/src/eval_env.cc
@@ -99,6 +99,10 @@ string BindingEnv::LookupWithFallback(const string& var,
 }
 
 string EvalString::Evaluate(Env* env) const {
+  if (parsed_.empty()) {
+    return single_token_;
+  }
+
   string result;
   for (TokenList::const_iterator i = parsed_.begin(); i != parsed_.end(); ++i) {
     if (i->second == RAW)
@@ -110,40 +114,57 @@ string EvalString::Evaluate(Env* env) const {
 }
 
 void EvalString::AddText(StringPiece text) {
-  // Add it to the end of an existing RAW token if possible.
-  if (!parsed_.empty() && parsed_.back().second == RAW) {
-    parsed_.back().first.append(text.str_, text.len_);
+  if (parsed_.empty()) {
+    single_token_.append(text.begin(), text.end());
+  } else if (!parsed_.empty() && parsed_.back().second == RAW) {
+    parsed_.back().first.append(text.begin(), text.end());
   } else {
-    parsed_.push_back(make_pair(text.AsString(), RAW));
+    parsed_.push_back(std::make_pair(text.AsString(), RAW));
   }
 }
+
 void EvalString::AddSpecial(StringPiece text) {
-  parsed_.push_back(make_pair(text.AsString(), SPECIAL));
+  if (parsed_.empty() && !single_token_.empty()) {
+    // Going from one to two tokens, so we can no longer apply
+    // our single_token_ optimization and need to push everything
+    // onto the vector.
+    parsed_.push_back(std::make_pair(std::move(single_token_), RAW));
+  }
+  parsed_.push_back(std::make_pair(text.AsString(), SPECIAL));
 }
 
 string EvalString::Serialize() const {
   string result;
-  for (TokenList::const_iterator i = parsed_.begin();
-       i != parsed_.end(); ++i) {
+  if (parsed_.empty() && !single_token_.empty()) {
     result.append("[");
-    if (i->second == SPECIAL)
-      result.append("$");
-    result.append(i->first);
+    result.append(single_token_);
     result.append("]");
+  } else {
+    for (const auto& pair : parsed_) {
+      result.append("[");
+      if (pair.second == SPECIAL)
+        result.append("$");
+      result.append(pair.first.begin(), pair.first.end());
+      result.append("]");
+    }
   }
   return result;
 }
 
 string EvalString::Unparse() const {
   string result;
-  for (TokenList::const_iterator i = parsed_.begin();
-       i != parsed_.end(); ++i) {
-    bool special = (i->second == SPECIAL);
-    if (special)
-      result.append("${");
-    result.append(i->first);
-    if (special)
-      result.append("}");
+  if (parsed_.empty() && !single_token_.empty()) {
+    result.append(single_token_.begin(), single_token_.end());
+  } else {
+    for (TokenList::const_iterator i = parsed_.begin();
+         i != parsed_.end(); ++i) {
+      bool special = (i->second == SPECIAL);
+      if (special)
+        result.append("${");
+      result.append(i->first.begin(), i->first.end());
+      if (special)
+        result.append("}");
+    }
   }
   return result;
 }
diff --git a/src/eval_env.h b/src/eval_env.h
index 677dc217a2..ae6d8bc898 100644
--- a/src/eval_env.h
+++ b/src/eval_env.h
@@ -39,8 +39,8 @@ struct EvalString {
   /// @return The string with variables not expanded.
   std::string Unparse() const;
 
-  void Clear() { parsed_.clear(); }
-  bool empty() const { return parsed_.empty(); }
+  void Clear() { parsed_.clear(); single_token_.clear(); }
+  bool empty() const { return parsed_.empty() && single_token_.empty(); }
 
   void AddText(StringPiece text);
   void AddSpecial(StringPiece text);
@@ -53,6 +53,12 @@ struct EvalString {
   enum TokenType { RAW, SPECIAL };
   typedef std::vector<std::pair<std::string, TokenType> > TokenList;
   TokenList parsed_;
+
+  // If we hold only a single RAW token, then we keep it here instead of
+  // pushing it on TokenList. This saves a bunch of allocations for
+  // what is a common case. If parsed_ is nonempty, then this value
+  // must be ignored.
+  std::string single_token_;
 };
 
 /// An invocable build command and associated metadata (description, etc.).
diff --git a/src/string_piece.h b/src/string_piece.h
index 1c0bee6e1c..7e7367c20a 100644
--- a/src/string_piece.h
+++ b/src/string_piece.h
@@ -63,6 +63,10 @@ struct StringPiece {
     return len_;
   }
 
+  size_t empty() const {
+    return len_ == 0;
+  }
+
   const char* str_;
   size_t len_;
 };

From f1a2f421859742f45c3b964a579221c869ef276c Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Fri, 1 Nov 2024 12:32:57 +0100
Subject: [PATCH 05/12] Switch hash tables to emhash8::HashMap.

This is much faster than std::unordered_map, and also slightly faster
than phmap::flat_hash_map that was included in PR #2468.
It is MIT-licensed, and we just include the .h file wholesale.

I haven't done a detailed test of all the various unordered_maps
out there, but this is the overall highest-ranking contender on

  https://martin.ankerl.com/2022/08/27/hashmap-bench-01/

except for ankerl::unordered_dense::map, which requires C++17.

For a no-op build of Chromium (Linux, Zen 2),
this reduces time spent from 5.14 to 4.62 seconds.
---
 src/hash_map.h                         |    5 +-
 src/third_party/emhash/README.ninja    |    7 +
 src/third_party/emhash/hash_table8.hpp | 1830 ++++++++++++++++++++++++
 3 files changed, 1839 insertions(+), 3 deletions(-)
 create mode 100644 src/third_party/emhash/README.ninja
 create mode 100644 src/third_party/emhash/hash_table8.hpp

diff --git a/src/hash_map.h b/src/hash_map.h
index 3f465338ac..5585b1dc51 100644
--- a/src/hash_map.h
+++ b/src/hash_map.h
@@ -19,6 +19,7 @@
 #include <string.h>
 #include "string_piece.h"
 #include "util.h"
+#include "third_party/emhash/hash_table8.hpp"
 
 // MurmurHash2, by Austin Appleby
 static inline
@@ -53,8 +54,6 @@ unsigned int MurmurHash2(const void* key, size_t len) {
   return h;
 }
 
-#include <unordered_map>
-
 namespace std {
 template<>
 struct hash<StringPiece> {
@@ -73,7 +72,7 @@ struct hash<StringPiece> {
 /// mapping StringPiece => Foo*.
 template<typename V>
 struct ExternalStringHashMap {
-  typedef std::unordered_map<StringPiece, V> Type;
+  typedef emhash8::HashMap<StringPiece, V> Type;
 };
 
 #endif // NINJA_MAP_H_
diff --git a/src/third_party/emhash/README.ninja b/src/third_party/emhash/README.ninja
new file mode 100644
index 0000000000..8a236c2d94
--- /dev/null
+++ b/src/third_party/emhash/README.ninja
@@ -0,0 +1,7 @@
+Description: emhash8::HashMap for C++14/17
+Version: 1.6.5 (commit bdebddbdce1b473bbc189178fd523ef4a876ea01)
+URL: https://github.com/ktprime/emhash
+Copyright: Copyright (c) 2021-2024 Huang Yuanbing & bailuzhou AT 163.com
+SPDX-License-Identifier: MIT
+Local changes:
+ - None.
diff --git a/src/third_party/emhash/hash_table8.hpp b/src/third_party/emhash/hash_table8.hpp
new file mode 100644
index 0000000000..ba121cbdfc
--- /dev/null
+++ b/src/third_party/emhash/hash_table8.hpp
@@ -0,0 +1,1830 @@
+// emhash8::HashMap for C++14/17
+// version 1.6.5
+// https://github.com/ktprime/emhash/blob/master/hash_table8.hpp
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2021-2024 Huang Yuanbing & bailuzhou AT 163.com
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE
+
+#pragma once
+
+#include <cstring>
+#include <string>
+#include <cstdlib>
+#include <type_traits>
+#include <cassert>
+#include <utility>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <algorithm>
+#include <memory>
+
+#undef  EMH_NEW
+#undef  EMH_EMPTY
+
+// likely/unlikely
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+#    define EMH_LIKELY(condition)   __builtin_expect(condition, 1)
+#    define EMH_UNLIKELY(condition) __builtin_expect(condition, 0)
+#else
+#    define EMH_LIKELY(condition)   condition
+#    define EMH_UNLIKELY(condition) condition
+#endif
+
+#define EMH_EMPTY(n) (0 > (int)(_index[n].next))
+#define EMH_EQHASH(n, key_hash) (((size_type)(key_hash) & ~_mask) == (_index[n].slot & ~_mask))
+//#define EMH_EQHASH(n, key_hash) ((size_type)(key_hash - _index[n].slot) & ~_mask) == 0
+#define EMH_NEW(key, val, bucket, key_hash) \
+    new(_pairs + _num_filled) value_type(key, val); \
+    _etail = bucket; \
+    _index[bucket] = {bucket, _num_filled++ | ((size_type)(key_hash) & ~_mask)}
+
+namespace emhash8 {
+
+struct DefaultPolicy {
+    static constexpr float load_factor = 0.80f;
+    static constexpr float min_load_factor = 0.20f;
+    static constexpr size_t cacheline_size = 64U;
+};
+
+template<typename KeyT, typename ValueT,
+         typename HashT = std::hash<KeyT>,
+         typename EqT = std::equal_to<KeyT>,
+         typename Allocator = std::allocator<std::pair<KeyT, ValueT>>, //never used
+         typename Policy = DefaultPolicy> //never used
+class HashMap
+{
+#ifndef EMH_DEFAULT_LOAD_FACTOR
+    constexpr static float EMH_DEFAULT_LOAD_FACTOR = 0.80f;
+#endif
+    constexpr static float EMH_MIN_LOAD_FACTOR     = 0.25f; //< 0.5
+    constexpr static uint32_t EMH_CACHE_LINE_SIZE  = 64; //debug only
+
+public:
+    using htype = HashMap<KeyT, ValueT, HashT, EqT>;
+    using value_type = std::pair<KeyT, ValueT>;
+    using key_type = KeyT;
+    using mapped_type = ValueT;
+    //using dPolicy = Policy;
+
+#ifdef EMH_SMALL_TYPE
+    using size_type = uint16_t;
+#elif EMH_SIZE_TYPE == 0
+    using size_type = uint32_t;
+#else
+    using size_type = size_t;
+#endif
+
+    using hasher = HashT;
+    using key_equal = EqT;
+
+    constexpr static size_type INACTIVE = 0-1u;
+    //constexpr uint32_t END      = 0-0x1u;
+    constexpr static size_type EAD      = 2;
+
+    struct Index
+    {
+        size_type next;
+        size_type slot;
+    };
+
+    class const_iterator;
+    class iterator
+    {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using difference_type = std::ptrdiff_t;
+        using value_type      = typename htype::value_type;
+        using pointer         = value_type*;
+        using const_pointer   = const value_type* ;
+        using reference       = value_type&;
+        using const_reference = const value_type&;
+
+        iterator() : kv_(nullptr) {}
+        iterator(const_iterator& cit) {
+            kv_ = cit.kv_;
+        }
+
+        iterator(const htype* hash_map, size_type bucket) {
+            kv_ = hash_map->_pairs + (int)bucket;
+        }
+
+        iterator& operator++()
+        {
+            kv_ ++;
+            return *this;
+        }
+
+        iterator operator++(int)
+        {
+            auto cur = *this; kv_ ++;
+            return cur;
+        }
+
+        iterator& operator--()
+        {
+            kv_ --;
+            return *this;
+        }
+
+        iterator operator--(int)
+        {
+            auto cur = *this; kv_ --;
+            return cur;
+        }
+
+        reference operator*() const { return *kv_; }
+        pointer operator->() const { return kv_; }
+
+        bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; }
+        bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; }
+        bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; }
+        bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; }
+
+    public:
+        value_type* kv_;
+    };
+
+    class const_iterator
+    {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type        = typename htype::value_type;
+        using difference_type   = std::ptrdiff_t;
+        using pointer           = value_type*;
+        using const_pointer     = const value_type*;
+        using reference         = value_type&;
+        using const_reference   = const value_type&;
+
+        const_iterator(const iterator& it) {
+            kv_ = it.kv_;
+        }
+
+        const_iterator (const htype* hash_map, size_type bucket) {
+            kv_ = hash_map->_pairs + (int)bucket;
+        }
+
+        const_iterator& operator++()
+        {
+            kv_ ++;
+            return *this;
+        }
+
+        const_iterator operator++(int)
+        {
+            auto cur = *this; kv_ ++;
+            return cur;
+        }
+
+        const_iterator& operator--()
+        {
+            kv_ --;
+            return *this;
+        }
+
+        const_iterator operator--(int)
+        {
+            auto cur = *this; kv_ --;
+            return cur;
+        }
+
+        const_reference operator*() const { return *kv_; }
+        const_pointer operator->() const { return kv_; }
+
+        bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; }
+        bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; }
+        bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; }
+        bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; }
+    public:
+        const value_type* kv_;
+    };
+
+    void init(size_type bucket, float mlf = EMH_DEFAULT_LOAD_FACTOR)
+    {
+        _pairs = nullptr;
+        _index = nullptr;
+        _mask  = _num_buckets = 0;
+        _num_filled = 0;
+        _mlf = (uint32_t)((1 << 27) / EMH_DEFAULT_LOAD_FACTOR);
+        max_load_factor(mlf);
+        rehash(bucket);
+    }
+
+    HashMap(size_type bucket = 2, float mlf = EMH_DEFAULT_LOAD_FACTOR)
+    {
+        init(bucket, mlf);
+    }
+
+    HashMap(const HashMap& rhs)
+    {
+        if (rhs.load_factor() > EMH_MIN_LOAD_FACTOR) {
+            _pairs = alloc_bucket((size_type)(rhs._num_buckets * rhs.max_load_factor()) + 4);
+            _index = alloc_index(rhs._num_buckets);
+            clone(rhs);
+        } else {
+            init(rhs._num_filled + 2, rhs.max_load_factor());
+            for (auto it = rhs.begin(); it != rhs.end(); ++it)
+                insert_unique(it->first, it->second);
+        }
+    }
+
+    HashMap(HashMap&& rhs) noexcept
+    {
+        init(0);
+        *this = std::move(rhs);
+    }
+
+    HashMap(std::initializer_list<value_type> ilist)
+    {
+        init((size_type)ilist.size());
+        for (auto it = ilist.begin(); it != ilist.end(); ++it)
+            do_insert(*it);
+    }
+
+    template<class InputIt>
+    HashMap(InputIt first, InputIt last, size_type bucket_count=4)
+    {
+        init(std::distance(first, last) + bucket_count);
+        for (; first != last; ++first)
+            emplace(*first);
+    }
+
+    HashMap& operator=(const HashMap& rhs)
+    {
+        if (this == &rhs)
+            return *this;
+
+        if (rhs.load_factor() < EMH_MIN_LOAD_FACTOR) {
+            clear(); free(_pairs); _pairs = nullptr;
+            rehash(rhs._num_filled + 2);
+            for (auto it = rhs.begin(); it != rhs.end(); ++it)
+                insert_unique(it->first, it->second);
+            return *this;
+        }
+
+        clearkv();
+
+        if (_num_buckets != rhs._num_buckets) {
+            free(_pairs); free(_index);
+            _index = alloc_index(rhs._num_buckets);
+            _pairs = alloc_bucket((size_type)(rhs._num_buckets * rhs.max_load_factor()) + 4);
+        }
+
+        clone(rhs);
+        return *this;
+    }
+
+    HashMap& operator=(HashMap&& rhs) noexcept
+    {
+        if (this != &rhs) {
+            swap(rhs);
+            rhs.clear();
+        }
+        return *this;
+    }
+
+    template<typename Con>
+    bool operator == (const Con& rhs) const
+    {
+        if (size() != rhs.size())
+            return false;
+
+        for (auto it = begin(), last = end(); it != last; ++it) {
+            auto oi = rhs.find(it->first);
+            if (oi == rhs.end() || it->second != oi->second)
+                return false;
+        }
+        return true;
+    }
+
+    template<typename Con>
+    bool operator != (const Con& rhs) const { return !(*this == rhs); }
+
+    ~HashMap() noexcept
+    {
+        clearkv();
+        free(_pairs);
+        free(_index);
+        _index = nullptr;
+        _pairs = nullptr;
+    }
+
+    void clone(const HashMap& rhs)
+    {
+        _hasher      = rhs._hasher;
+//        _eq          = rhs._eq;
+        _num_buckets = rhs._num_buckets;
+        _num_filled  = rhs._num_filled;
+        _mlf         = rhs._mlf;
+        _last        = rhs._last;
+        _mask        = rhs._mask;
+#if EMH_HIGH_LOAD
+        _ehead       = rhs._ehead;
+#endif
+        _etail       = rhs._etail;
+
+        auto opairs  = rhs._pairs;
+        memcpy((char*)_index, (char*)rhs._index, (_num_buckets + EAD) * sizeof(Index));
+
+        if (is_copy_trivially()) {
+            memcpy((char*)_pairs, (char*)opairs, _num_filled * sizeof(value_type));
+        } else {
+            for (size_type slot = 0; slot < _num_filled; slot++)
+                new(_pairs + slot) value_type(opairs[slot]);
+        }
+    }
+
+    void swap(HashMap& rhs)
+    {
+        //      std::swap(_eq, rhs._eq);
+        std::swap(_hasher, rhs._hasher);
+        std::swap(_pairs, rhs._pairs);
+        std::swap(_index, rhs._index);
+        std::swap(_num_buckets, rhs._num_buckets);
+        std::swap(_num_filled, rhs._num_filled);
+        std::swap(_mask, rhs._mask);
+        std::swap(_mlf, rhs._mlf);
+        std::swap(_last, rhs._last);
+#if EMH_HIGH_LOAD
+        std::swap(_ehead, rhs._ehead);
+#endif
+        std::swap(_etail, rhs._etail);
+    }
+
+    // -------------------------------------------------------------
+    iterator first() const { return {this, 0}; }
+    iterator last() const { return {this, _num_filled - 1}; }
+
+    value_type& front() { return _pairs[0]; }
+    const value_type& front() const { return _pairs[0]; }
+    value_type& back() { return _pairs[_num_filled - 1]; }
+    const value_type& back() const { return _pairs[_num_filled - 1]; }
+
+    void pop_front() { erase(begin()); } //TODO. only erase first without move last
+    void pop_back() { erase(last()); }
+
+    iterator begin() { return first(); }
+    const_iterator cbegin() const { return first(); }
+    const_iterator begin() const { return first(); }
+
+    iterator end() { return {this, _num_filled}; }
+    const_iterator cend() const { return {this, _num_filled}; }
+    const_iterator end() const { return cend(); }
+
+    const value_type* values() const { return _pairs; }
+    const Index* index() const { return _index; }
+
+    size_type size() const { return _num_filled; }
+    bool empty() const { return _num_filled == 0; }
+    size_type bucket_count() const { return _num_buckets; }
+
+    /// Returns average number of elements per bucket.
+    float load_factor() const { return static_cast<float>(_num_filled) / (_mask + 1); }
+
+    HashT& hash_function() const { return _hasher; }
+    EqT& key_eq() const { return _eq; }
+
+    void max_load_factor(float mlf)
+    {
+        if (mlf < 0.992 && mlf > EMH_MIN_LOAD_FACTOR) {
+            _mlf = (uint32_t)((1 << 27) / mlf);
+            if (_num_buckets > 0) rehash(_num_buckets);
+        }
+    }
+
+    constexpr float max_load_factor() const { return (1 << 27) / (float)_mlf; }
+    constexpr size_type max_size() const { return (1ull << (sizeof(size_type) * 8 - 1)); }
+    constexpr size_type max_bucket_count() const { return max_size(); }
+
+#if EMH_STATIS
+    //Returns the bucket number where the element with key k is located.
+    size_type bucket(const KeyT& key) const
+    {
+        const auto bucket = hash_bucket(key);
+        const auto next_bucket = _index[bucket].next;
+        if ((int)next_bucket < 0)
+            return 0;
+        else if (bucket == next_bucket)
+            return bucket + 1;
+
+        return hash_main(bucket) + 1;
+    }
+
+    //Returns the number of elements in bucket n.
+    size_type bucket_size(const size_type bucket) const
+    {
+        auto next_bucket = _index[bucket].next;
+        if ((int)next_bucket < 0)
+            return 0;
+
+        next_bucket = hash_main(bucket);
+        size_type ibucket_size = 1;
+
+        //iterator each item in current main bucket
+        while (true) {
+            const auto nbucket = _index[next_bucket].next;
+            if (nbucket == next_bucket) {
+                break;
+            }
+            ibucket_size ++;
+            next_bucket = nbucket;
+        }
+        return ibucket_size;
+    }
+
+    size_type get_main_bucket(const size_type bucket) const
+    {
+        auto next_bucket = _index[bucket].next;
+        if ((int)next_bucket < 0)
+            return INACTIVE;
+
+        return hash_main(bucket);
+    }
+
+    size_type get_diss(size_type bucket, size_type next_bucket, const size_type slots) const
+    {
+        auto pbucket = reinterpret_cast<uint64_t>(&_pairs[bucket]);
+        auto pnext   = reinterpret_cast<uint64_t>(&_pairs[next_bucket]);
+        if (pbucket / EMH_CACHE_LINE_SIZE == pnext / EMH_CACHE_LINE_SIZE)
+            return 0;
+        size_type diff = pbucket > pnext ? (pbucket - pnext) : (pnext - pbucket);
+        if (diff / EMH_CACHE_LINE_SIZE < slots - 1)
+            return diff / EMH_CACHE_LINE_SIZE + 1;
+        return slots - 1;
+    }
+
+    int get_bucket_info(const size_type bucket, size_type steps[], const size_type slots) const
+    {
+        auto next_bucket = _index[bucket].next;
+        if ((int)next_bucket < 0)
+            return -1;
+
+        const auto main_bucket = hash_main(bucket);
+        if (next_bucket == main_bucket)
+            return 1;
+        else if (main_bucket != bucket)
+            return 0;
+
+        steps[get_diss(bucket, next_bucket, slots)] ++;
+        size_type ibucket_size = 2;
+        //find a empty and linked it to tail
+        while (true) {
+            const auto nbucket = _index[next_bucket].next;
+            if (nbucket == next_bucket)
+                break;
+
+            steps[get_diss(nbucket, next_bucket, slots)] ++;
+            ibucket_size ++;
+            next_bucket = nbucket;
+        }
+        return (int)ibucket_size;
+    }
+
+    void dump_statics() const
+    {
+        const size_type slots = 128;
+        size_type buckets[slots + 1] = {0};
+        size_type steps[slots + 1]   = {0};
+        for (size_type bucket = 0; bucket < _num_buckets; ++bucket) {
+            auto bsize = get_bucket_info(bucket, steps, slots);
+            if (bsize > 0)
+                buckets[bsize] ++;
+        }
+
+        size_type sumb = 0, collision = 0, sumc = 0, finds = 0, sumn = 0;
+        puts("============== buckets size ration =========");
+        for (size_type i = 0; i < sizeof(buckets) / sizeof(buckets[0]); i++) {
+            const auto bucketsi = buckets[i];
+            if (bucketsi == 0)
+                continue;
+            sumb += bucketsi;
+            sumn += bucketsi * i;
+            collision += bucketsi * (i - 1);
+            finds += bucketsi * i * (i + 1) / 2;
+            printf("  %2u  %8u  %2.2lf|  %.2lf\n", i, bucketsi, bucketsi * 100.0 * i / _num_filled, sumn * 100.0 / _num_filled);
+        }
+
+        puts("========== collision miss ration ===========");
+        for (size_type i = 0; i < sizeof(steps) / sizeof(steps[0]); i++) {
+            sumc += steps[i];
+            if (steps[i] <= 2)
+                continue;
+            printf("  %2u  %8u  %.2lf  %.2lf\n", i, steps[i], steps[i] * 100.0 / collision, sumc * 100.0 / collision);
+        }
+
+        if (sumb == 0)  return;
+        printf("    _num_filled/bucket_size/packed collision/cache_miss/hit_find = %u/%.2lf/%zd/ %.2lf%%/%.2lf%%/%.2lf\n",
+                _num_filled, _num_filled * 1.0 / sumb, sizeof(value_type), (collision * 100.0 / _num_filled), (collision - steps[0]) * 100.0 / _num_filled, finds * 1.0 / _num_filled);
+        assert(sumn == _num_filled);
+        assert(sumc == collision);
+        puts("============== buckets size end =============");
+    }
+#endif
+
+    void pack_zero(ValueT zero)
+    {
+        _pairs[_num_filled] = {KeyT(), zero};
+    }
+
+    // ------------------------------------------------------------
+    template<typename K=KeyT>
+    iterator find(const K& key) noexcept
+    {
+        return {this, find_filled_slot(key)};
+    }
+
+    template<typename K=KeyT>
+    const_iterator find(const K& key) const noexcept
+    {
+        return {this, find_filled_slot(key)};
+    }
+
+    template<typename K=KeyT>
+    ValueT& at(const K& key)
+    {
+        const auto slot = find_filled_slot(key);
+        //throw
+        return _pairs[slot].second;
+    }
+
+    template<typename K=KeyT>
+    const ValueT& at(const K& key) const
+    {
+        const auto slot = find_filled_slot(key);
+        //throw
+        return _pairs[slot].second;
+    }
+
+    const ValueT& index(const uint32_t index) const
+    {
+        return _pairs[index].second;
+    }
+
+    ValueT& index(const uint32_t index)
+    {
+        return _pairs[index].second;
+    }
+
+    template<typename K=KeyT>
+    bool contains(const K& key) const noexcept
+    {
+        return find_filled_slot(key) != _num_filled;
+    }
+
+    template<typename K=KeyT>
+    size_type count(const K& key) const noexcept
+    {
+        return find_filled_slot(key) == _num_filled ? 0 : 1;
+        //return find_sorted_bucket(key) == END ? 0 : 1;
+        //return find_hash_bucket(key) == END ? 0 : 1;
+    }
+
+    template<typename K=KeyT>
+    std::pair<iterator, iterator> equal_range(const K& key)
+    {
+        const auto found = find(key);
+        if (found.second == _num_filled)
+            return { found, found };
+        else
+            return { found, std::next(found) };
+    }
+
+    void merge(HashMap& rhs)
+    {
+        if (empty()) {
+            *this = std::move(rhs);
+            return;
+        }
+
+        for (auto rit = rhs.begin(); rit != rhs.end(); ) {
+            auto fit = find(rit->first);
+            if (fit == end()) {
+                insert_unique(rit->first, std::move(rit->second));
+                rit = rhs.erase(rit);
+            } else {
+                ++rit;
+            }
+        }
+    }
+
+    /// Returns the matching ValueT or nullptr if k isn't found.
+    bool try_get(const KeyT& key, ValueT& val) const noexcept
+    {
+        const auto slot = find_filled_slot(key);
+        const auto found = slot != _num_filled;
+        if (found) {
+            val = _pairs[slot].second;
+        }
+        return found;
+    }
+
+    /// Returns the matching ValueT or nullptr if k isn't found.
+    ValueT* try_get(const KeyT& key) noexcept
+    {
+        const auto slot = find_filled_slot(key);
+        return slot != _num_filled ? &_pairs[slot].second : nullptr;
+    }
+
+    /// Const version of the above
+    ValueT* try_get(const KeyT& key) const noexcept
+    {
+        const auto slot = find_filled_slot(key);
+        return slot != _num_filled ? &_pairs[slot].second : nullptr;
+    }
+
+    /// set value if key exist
+    bool try_set(const KeyT& key, const ValueT& val) noexcept
+    {
+        const auto slot = find_filled_slot(key);
+        if (slot == _num_filled)
+            return false;
+
+        _pairs[slot].second = val;
+        return true;
+    }
+
+    /// set value if key exist
+    bool try_set(const KeyT& key, ValueT&& val) noexcept
+    {
+        const auto slot = find_filled_slot(key);
+        if (slot == _num_filled)
+            return false;
+
+        _pairs[slot].second = std::move(val);
+        return true;
+    }
+
+    /// Convenience function.
+    ValueT get_or_return_default(const KeyT& key) const noexcept
+    {
+        const auto slot = find_filled_slot(key);
+        return slot == _num_filled ? ValueT() : _pairs[slot].second;
+    }
+
+    // -----------------------------------------------------
+    std::pair<iterator, bool> do_insert(const value_type& value) noexcept
+    {
+        const auto key_hash = hash_key(value.first);
+        const auto bucket = find_or_allocate(value.first, key_hash);
+        const auto bempty = EMH_EMPTY(bucket);
+        if (bempty) {
+            EMH_NEW(value.first, value.second, bucket, key_hash);
+        }
+
+        const auto slot = _index[bucket].slot & _mask;
+        return { {this, slot}, bempty };
+    }
+
+    std::pair<iterator, bool> do_insert(value_type&& value) noexcept
+    {
+        const auto key_hash = hash_key(value.first);
+        const auto bucket = find_or_allocate(value.first, key_hash);
+        const auto bempty = EMH_EMPTY(bucket);
+        if (bempty) {
+            EMH_NEW(std::move(value.first), std::move(value.second), bucket, key_hash);
+        }
+
+        const auto slot = _index[bucket].slot & _mask;
+        return { {this, slot}, bempty };
+    }
+
+    template<typename K, typename V>
+    std::pair<iterator, bool> do_insert(K&& key, V&& val) noexcept
+    {
+        const auto key_hash = hash_key(key);
+        const auto bucket = find_or_allocate(key, key_hash);
+        const auto bempty = EMH_EMPTY(bucket);
+        if (bempty) {
+            EMH_NEW(std::forward<K>(key), std::forward<V>(val), bucket, key_hash);
+        }
+
+        const auto slot = _index[bucket].slot & _mask;
+        return { {this, slot}, bempty };
+    }
+
+    template<typename K, typename V>
+    std::pair<iterator, bool> do_assign(K&& key, V&& val) noexcept
+    {
+        check_expand_need();
+        const auto key_hash = hash_key(key);
+        const auto bucket = find_or_allocate(key, key_hash);
+        const auto bempty = EMH_EMPTY(bucket);
+        if (bempty) {
+            EMH_NEW(std::forward<K>(key), std::forward<V>(val), bucket, key_hash);
+        } else {
+            _pairs[_index[bucket].slot & _mask].second = std::move(val);
+        }
+
+        const auto slot = _index[bucket].slot & _mask;
+        return { {this, slot}, bempty };
+    }
+
+    std::pair<iterator, bool> insert(const value_type& p)
+    {
+        check_expand_need();
+        return do_insert(p);
+    }
+
+    std::pair<iterator, bool> insert(value_type && p)
+    {
+        check_expand_need();
+        return do_insert(std::move(p));
+    }
+
+    void insert(std::initializer_list<value_type> ilist)
+    {
+        reserve(ilist.size() + _num_filled, false);
+        for (auto it = ilist.begin(); it != ilist.end(); ++it)
+            do_insert(*it);
+    }
+
+    template <typename Iter>
+    void insert(Iter first, Iter last)
+    {
+        reserve(std::distance(first, last) + _num_filled, false);
+        for (; first != last; ++first)
+            do_insert(first->first, first->second);
+    }
+
+#if 0
+    template <typename Iter>
+    void insert_unique(Iter begin, Iter end)
+    {
+        reserve(std::distance(begin, end) + _num_filled, false);
+        for (; begin != end; ++begin) {
+            insert_unique(*begin);
+        }
+    }
+#endif
+
+    template<typename K, typename V>
+    size_type insert_unique(K&& key, V&& val)
+    {
+        check_expand_need();
+        const auto key_hash = hash_key(key);
+        auto bucket = find_unique_bucket(key_hash);
+        EMH_NEW(std::forward<K>(key), std::forward<V>(val), bucket, key_hash);
+        return bucket;
+    }
+
+    size_type insert_unique(value_type&& value)
+    {
+        return insert_unique(std::move(value.first), std::move(value.second));
+    }
+
+    size_type insert_unique(const value_type& value)
+    {
+        return insert_unique(value.first, value.second);
+    }
+
+    template <class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) noexcept
+    {
+        check_expand_need();
+        return do_insert(std::forward<Args>(args)...);
+    }
+
+    //no any optimize for position
+    template <class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args)
+    {
+        (void)hint;
+        check_expand_need();
+        return do_insert(std::forward<Args>(args)...).first;
+    }
+
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const KeyT& k, Args&&... args)
+    {
+        check_expand_need();
+        return do_insert(k, std::forward<Args>(args)...);
+    }
+
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(KeyT&& k, Args&&... args)
+    {
+        check_expand_need();
+        return do_insert(std::move(k), std::forward<Args>(args)...);
+    }
+
+    template <class... Args>
+    size_type emplace_unique(Args&&... args)
+    {
+        return insert_unique(std::forward<Args>(args)...);
+    }
+
+    std::pair<iterator, bool> insert_or_assign(const KeyT& key, ValueT&& val) { return do_assign(key, std::forward<ValueT>(val)); }
+    std::pair<iterator, bool> insert_or_assign(KeyT&& key, ValueT&& val) { return do_assign(std::move(key), std::forward<ValueT>(val)); }
+
+    /// Return the old value or ValueT() if it didn't exist.
+    ValueT set_get(const KeyT& key, const ValueT& val)
+    {
+        check_expand_need();
+        const auto key_hash = hash_key(key);
+        const auto bucket = find_or_allocate(key, key_hash);
+        if (EMH_EMPTY(bucket)) {
+            EMH_NEW(key, val, bucket, key_hash);
+            return ValueT();
+        } else {
+            const auto slot = _index[bucket].slot & _mask;
+            ValueT old_value(val);
+            std::swap(_pairs[slot].second, old_value);
+            return old_value;
+        }
+    }
+
+    /// Like std::map<KeyT, ValueT>::operator[].
+    ValueT& operator[](const KeyT& key) noexcept
+    {
+        check_expand_need();
+        const auto key_hash = hash_key(key);
+        const auto bucket = find_or_allocate(key, key_hash);
+        if (EMH_EMPTY(bucket)) {
+            /* Check if inserting a value rather than overwriting an old entry */
+            EMH_NEW(key, std::move(ValueT()), bucket, key_hash);
+        }
+
+        const auto slot = _index[bucket].slot & _mask;
+        return _pairs[slot].second;
+    }
+
+    ValueT& operator[](KeyT&& key) noexcept
+    {
+        check_expand_need();
+        const auto key_hash = hash_key(key);
+        const auto bucket = find_or_allocate(key, key_hash);
+        if (EMH_EMPTY(bucket)) {
+            EMH_NEW(std::move(key), std::move(ValueT()), bucket, key_hash);
+        }
+
+        const auto slot = _index[bucket].slot & _mask;
+        return _pairs[slot].second;
+    }
+
+    /// Erase an element from the hash table.
+    /// return 0 if element was not found
+    size_type erase(const KeyT& key) noexcept
+    {
+        const auto key_hash = hash_key(key);
+        const auto sbucket = find_filled_bucket(key, key_hash);
+        if (sbucket == INACTIVE)
+            return 0;
+
+        const auto main_bucket = key_hash & _mask;
+        erase_slot(sbucket, (size_type)main_bucket);
+        return 1;
+    }
+
+    //iterator erase(const_iterator begin_it, const_iterator end_it)
+    iterator erase(const const_iterator& cit) noexcept
+    {
+        const auto slot = (size_type)(cit.kv_ - _pairs);
+        size_type main_bucket;
+        const auto sbucket = find_slot_bucket(slot, main_bucket); //TODO
+        erase_slot(sbucket, main_bucket);
+        return {this, slot};
+    }
+
+    //only last >= first
+    iterator erase(const_iterator first, const_iterator last) noexcept
+    {
+        auto esize = long(last.kv_ - first.kv_);
+        auto tsize = long((_pairs + _num_filled) - last.kv_); //last to tail size
+        auto next = first;
+        while (tsize -- > 0) {
+            if (esize-- <= 0)
+                break;
+            next = ++erase(next);
+        }
+
+        //fast erase from last
+        next = this->last();
+        while (esize -- > 0)
+            next = --erase(next);
+
+        return {this, size_type(next.kv_ - _pairs)};
+    }
+
+    template<typename Pred>
+    size_type erase_if(Pred pred)
+    {
+        auto old_size = size();
+        for (auto it = begin(); it != end();) {
+            if (pred(*it))
+                it = erase(it);
+            else
+                ++it;
+        }
+        return old_size - size();
+    }
+
+    static constexpr bool is_triviall_destructable()
+    {
+#if __cplusplus >= 201402L || _MSC_VER > 1600
+        return !(std::is_trivially_destructible<KeyT>::value && std::is_trivially_destructible<ValueT>::value);
+#else
+        return !(std::is_pod<KeyT>::value && std::is_pod<ValueT>::value);
+#endif
+    }
+
+    static constexpr bool is_copy_trivially()
+    {
+#if __cplusplus >= 201103L || _MSC_VER > 1600
+        return (std::is_trivially_copyable<KeyT>::value && std::is_trivially_copyable<ValueT>::value);
+#else
+        return (std::is_pod<KeyT>::value && std::is_pod<ValueT>::value);
+#endif
+    }
+
+    void clearkv()
+    {
+        if (is_triviall_destructable()) {
+            while (_num_filled --)
+                _pairs[_num_filled].~value_type();
+        }
+    }
+
+    /// Remove all elements, keeping full capacity.
+    void clear() noexcept
+    {
+        clearkv();
+
+        if (_num_filled > 0)
+            memset((char*)_index, INACTIVE, sizeof(_index[0]) * _num_buckets);
+
+        _last = _num_filled = 0;
+        _etail = INACTIVE;
+
+#if EMH_HIGH_LOAD
+        _ehead = 0;
+#endif
+    }
+
+    void shrink_to_fit(const float min_factor = EMH_DEFAULT_LOAD_FACTOR / 4)
+    {
+        if (load_factor() < min_factor && bucket_count() > 10) //safe guard
+            rehash(_num_filled + 1);
+    }
+
+#if EMH_HIGH_LOAD
+    #define EMH_PREVET(i, n) i[n].slot
+    void set_empty()
+    {
+        auto prev = 0;
+        for (int32_t bucket = 1; bucket < _num_buckets; ++bucket) {
+            if (EMH_EMPTY(bucket)) {
+                if (prev != 0) {
+                    EMH_PREVET(_index, bucket) = prev;
+                    _index[_prev].next = -bucket;
+                }
+                else
+                    _ehead = bucket;
+                prev = bucket;
+            }
+        }
+
+        EMH_PREVET(_index, _ehead) = prev;
+        _index[_prev].next = 0-_ehead;
+        _ehead = 0-_index[_ehead].next;
+    }
+
+    void clear_empty()
+    {
+        auto prev = EMH_PREVET(_index, _ehead);
+        while (prev != _ehead) {
+            _index[_prev].next = INACTIVE;
+            prev = EMH_PREVET(_index, prev);
+        }
+        _index[_ehead].next = INACTIVE;
+        _ehead = 0;
+    }
+
+    //prev-ehead->next
+    size_type pop_empty(const size_type bucket)
+    {
+        const auto prev_bucket = EMH_PREVET(_index, bucket);
+        const int next_bucket = 0-_index[bucket].next;
+
+        EMH_PREVET(_index, next_bucket) = prev_bucket;
+        _index[prev_bucket].next = -next_bucket;
+
+        _ehead = next_bucket;
+        return bucket;
+    }
+
+    //ehead->bucket->next
+    void push_empty(const int32_t bucket)
+    {
+        const int next_bucket = 0-_index[_ehead].next;
+        assert(next_bucket > 0);
+
+        EMH_PREVET(_index, bucket) = _ehead;
+        _index[bucket].next = -next_bucket;
+
+        EMH_PREVET(_index, next_bucket) = bucket;
+        _index[_ehead].next = -bucket;
+        //        _ehead = bucket;
+    }
+#endif
+
+    /// Make room for this many elements
+    bool reserve(uint64_t num_elems, bool force)
+    {
+        (void)force;
+#if EMH_HIGH_LOAD == 0
+        const auto required_buckets = num_elems * _mlf >> 27;
+        if (EMH_LIKELY(required_buckets < _mask)) // && !force
+            return false;
+
+#elif EMH_HIGH_LOAD
+        const auto required_buckets = num_elems + num_elems * 1 / 9;
+        if (EMH_LIKELY(required_buckets < _mask))
+            return false;
+
+        else if (_num_buckets < 16 && _num_filled < _num_buckets)
+            return false;
+
+        else if (_num_buckets > EMH_HIGH_LOAD) {
+            if (_ehead == 0) {
+                set_empty();
+                return false;
+            } else if (/*_num_filled + 100 < _num_buckets && */_index[_ehead].next != 0-_ehead) {
+                return false;
+            }
+        }
+#endif
+#if EMH_STATIS
+        if (_num_filled > EMH_STATIS) dump_statics();
+#endif
+
+        //assert(required_buckets < max_size());
+        rehash(required_buckets + 2);
+        return true;
+    }
+
+    static value_type* alloc_bucket(size_type num_buckets)
+    {
+#ifdef EMH_ALLOC
+        auto new_pairs = aligned_alloc(32, (uint64_t)num_buckets * sizeof(value_type));
+#else
+        auto new_pairs = malloc((uint64_t)num_buckets * sizeof(value_type));
+#endif
+        return (value_type *)(new_pairs);
+    }
+
+    static Index* alloc_index(size_type num_buckets)
+    {
+        auto new_index = (char*)malloc((uint64_t)(EAD + num_buckets) * sizeof(Index));
+        return (Index *)(new_index);
+    }
+
+    bool reserve(size_type required_buckets) noexcept
+    {
+        if (_num_filled != required_buckets)
+            return reserve(required_buckets, true);
+
+        _last = 0;
+#if EMH_HIGH_LOAD
+        _ehead = 0;
+#endif
+
+#if EMH_SORT
+        std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) {
+            const auto hashl = (size_type)hash_key(l.first) & _mask, hashr = (size_type)hash_key(r.first) & _mask;
+            return hashl < hashr;
+            //return l.first < r.first;
+        });
+#endif
+
+        memset((char*)_index, INACTIVE, sizeof(_index[0]) * _num_buckets);
+        for (size_type slot = 0; slot < _num_filled; slot++) {
+            const auto& key = _pairs[slot].first;
+            const auto key_hash = hash_key(key);
+            const auto bucket = size_type(key_hash & _mask);
+            auto& next_bucket = _index[bucket].next;
+            if ((int)next_bucket < 0)
+                _index[bucket] = {1, slot | ((size_type)(key_hash) & ~_mask)};
+            else {
+                _index[bucket].slot |= (size_type)(key_hash) & ~_mask;
+                next_bucket ++;
+            }
+        }
+        return true;
+    }
+
+    void rebuild(size_type num_buckets) noexcept
+    {
+        free(_index);
+        auto new_pairs = (value_type*)alloc_bucket((size_type)(num_buckets * max_load_factor()) + 4);
+        if (is_copy_trivially()) {
+            if (_pairs)
+            memcpy((char*)new_pairs, (char*)_pairs, _num_filled * sizeof(value_type));
+        } else {
+            for (size_type slot = 0; slot < _num_filled; slot++) {
+                new(new_pairs + slot) value_type(std::move(_pairs[slot]));
+                if (is_triviall_destructable())
+                    _pairs[slot].~value_type();
+            }
+        }
+        free(_pairs);
+        _pairs = new_pairs;
+        _index = (Index*)alloc_index (num_buckets);
+
+        memset((char*)_index, INACTIVE, sizeof(_index[0]) * num_buckets);
+        memset((char*)(_index + num_buckets), 0, sizeof(_index[0]) * EAD);
+    }
+
+    void rehash(uint64_t required_buckets)
+    {
+        if (required_buckets < _num_filled)
+            return;
+
+        assert(required_buckets < max_size());
+        auto num_buckets = _num_filled > (1u << 16) ? (1u << 16) : 4u;
+        while (num_buckets < required_buckets) { num_buckets *= 2; }
+#if EMH_SAVE_MEM
+        if (sizeof(KeyT) < sizeof(size_type) && num_buckets >= (1ul << (2 * 8)))
+            num_buckets = 2ul << (sizeof(KeyT) * 8);
+#endif
+
+#if EMH_REHASH_LOG
+        auto last = _last;
+        size_type collision = 0;
+#endif
+
+#if EMH_HIGH_LOAD
+        _ehead = 0;
+#endif
+        _last = 0;
+
+        _mask        = num_buckets - 1;
+#if EMH_PACK_TAIL > 1
+        _last = _mask;
+        num_buckets += num_buckets * EMH_PACK_TAIL / 100; //add more 5-10%
+#endif
+        _num_buckets = num_buckets;
+
+        rebuild(num_buckets);
+
+#ifdef EMH_SORT
+        std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) {
+            const auto hashl = hash_key(l.first), hashr = hash_key(r.first);
+            auto diff = int64_t((hashl & _mask) - (hashr & _mask));
+            if (diff != 0)
+                return diff < 0;
+            return hashl < hashr;
+//          return l.first < r.first;
+        });
+#endif
+
+        _etail = INACTIVE;
+        for (size_type slot = 0; slot < _num_filled; ++slot) {
+            const auto& key = _pairs[slot].first;
+            const auto key_hash = hash_key(key);
+            const auto bucket = find_unique_bucket(key_hash);
+            _index[bucket] = { bucket, slot | ((size_type)(key_hash) & ~_mask) };
+
+#if EMH_REHASH_LOG
+            if (bucket != hash_main(bucket))
+                collision ++;
+#endif
+        }
+
+#if EMH_REHASH_LOG
+        if (_num_filled > EMH_REHASH_LOG) {
+            auto mbucket = _num_filled - collision;
+            char buff[255] = {0};
+            sprintf(buff, "    _num_filled/aver_size/K.V/pack/collision|last = %u/%.2lf/%s.%s/%zd|%.2lf%%,%.2lf%%",
+                    _num_filled, double (_num_filled) / mbucket, typeid(KeyT).name(), typeid(ValueT).name(), sizeof(_pairs[0]), collision * 100.0 / _num_filled, last * 100.0 / _num_buckets);
+#ifdef EMH_LOG
+            static uint32_t ihashs = 0; EMH_LOG() << "hash_nums = " << ihashs ++ << "|" <<__FUNCTION__ << "|" << buff << endl;
+#else
+            puts(buff);
+#endif
+        }
+#endif
+    }
+
+private:
+    // Can we fit another element?
+    bool check_expand_need()
+    {
+        return reserve(_num_filled, false);
+    }
+
+    static void prefetch_heap_block(char* ctrl)
+    {
+        // Prefetch the heap-allocated memory region to resolve potential TLB
+        // misses.  This is intended to overlap with execution of calculating the hash for a key.
+#if __linux__
+        __builtin_prefetch(static_cast<const void*>(ctrl));
+#elif _WIN32
+        _mm_prefetch((const char*)ctrl, _MM_HINT_T0);
+#endif
+    }
+
+    size_type slot_to_bucket(const size_type slot) const noexcept
+    {
+        size_type main_bucket;
+        return find_slot_bucket(slot, main_bucket); //TODO
+    }
+
+    //very slow
+    void erase_slot(const size_type sbucket, const size_type main_bucket) noexcept
+    {
+        const auto slot = _index[sbucket].slot & _mask;
+        const auto ebucket = erase_bucket(sbucket, main_bucket);
+        const auto last_slot = --_num_filled;
+        if (EMH_LIKELY(slot != last_slot)) {
+            const auto last_bucket = (_etail == INACTIVE || ebucket == _etail)
+                ? slot_to_bucket(last_slot) : _etail;
+
+            _pairs[slot] = std::move(_pairs[last_slot]);
+            _index[last_bucket].slot = slot | (_index[last_bucket].slot & ~_mask);
+        }
+
+        if (is_triviall_destructable())
+            _pairs[last_slot].~value_type();
+
+        _etail = INACTIVE;
+        _index[ebucket] = {INACTIVE, 0};
+#if EMH_HIGH_LOAD
+        if (_ehead) {
+            if (10 * _num_filled < 8 * _num_buckets)
+                clear_empty();
+            else if (ebucket)
+                push_empty(ebucket);
+        }
+#endif
+    }
+
+    size_type erase_bucket(const size_type bucket, const size_type main_bucket) noexcept
+    {
+        const auto next_bucket = _index[bucket].next;
+        if (bucket == main_bucket) {
+            if (main_bucket != next_bucket) {
+                const auto nbucket = _index[next_bucket].next;
+                _index[main_bucket] = {
+                    (nbucket == next_bucket) ? main_bucket : nbucket,
+                    _index[next_bucket].slot
+                };
+            }
+            return next_bucket;
+        }
+
+        const auto prev_bucket = find_prev_bucket(main_bucket, bucket);
+        _index[prev_bucket].next = (bucket == next_bucket) ? prev_bucket : next_bucket;
+        return bucket;
+    }
+
+    // Find the slot with this key, or return bucket size
+    size_type find_slot_bucket(const size_type slot, size_type& main_bucket) const
+    {
+        const auto key_hash = hash_key(_pairs[slot].first);
+        const auto bucket = main_bucket = size_type(key_hash & _mask);
+        if (slot == (_index[bucket].slot & _mask))
+            return bucket;
+
+        auto next_bucket = _index[bucket].next;
+        while (true) {
+            if (EMH_LIKELY(slot == (_index[next_bucket].slot & _mask)))
+                return next_bucket;
+            next_bucket = _index[next_bucket].next;
+        }
+
+        return INACTIVE;
+    }
+
+    // Find the slot with this key, or return bucket size
+    size_type find_filled_bucket(const KeyT& key, uint64_t key_hash) const noexcept
+    {
+        const auto bucket = size_type(key_hash & _mask);
+        auto next_bucket  = _index[bucket].next;
+        if (EMH_UNLIKELY((int)next_bucket < 0))
+            return INACTIVE;
+
+        const auto slot = _index[bucket].slot & _mask;
+        //prefetch_heap_block((char*)&_pairs[slot]);
+        if (EMH_EQHASH(bucket, key_hash)) {
+            if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
+                return bucket;
+        }
+        if (next_bucket == bucket)
+            return INACTIVE;
+
+        while (true) {
+            if (EMH_EQHASH(next_bucket, key_hash)) {
+                const auto slot = _index[next_bucket].slot & _mask;
+                if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
+                    return next_bucket;
+            }
+
+            const auto nbucket = _index[next_bucket].next;
+            if (nbucket == next_bucket)
+                return INACTIVE;
+            next_bucket = nbucket;
+        }
+
+        return INACTIVE;
+    }
+
+    // Find the slot with this key, or return bucket size
+    template<typename K=KeyT>
+    size_type find_filled_slot(const K& key) const noexcept
+    {
+        const auto key_hash = hash_key(key);
+        const auto bucket = size_type(key_hash & _mask);
+        auto next_bucket = _index[bucket].next;
+        if ((int)next_bucket < 0)
+            return _num_filled;
+
+        const auto slot = _index[bucket].slot & _mask;
+        //prefetch_heap_block((char*)&_pairs[slot]);
+        if (EMH_EQHASH(bucket, key_hash)) {
+            if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
+                return slot;
+        }
+        if (next_bucket == bucket)
+            return _num_filled;
+
+        while (true) {
+            if (EMH_EQHASH(next_bucket, key_hash)) {
+                const auto slot = _index[next_bucket].slot & _mask;
+                if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
+                    return slot;
+            }
+
+            const auto nbucket = _index[next_bucket].next;
+            if (nbucket == next_bucket)
+                return _num_filled;
+            next_bucket = nbucket;
+        }
+
+        return _num_filled;
+    }
+
+#if EMH_SORT
+    size_type find_hash_bucket(const KeyT& key) const noexcept
+    {
+        const auto key_hash = hash_key(key);
+        const auto bucket = size_type(key_hash & _mask);
+        const auto next_bucket = _index[bucket].next;
+        if ((int)next_bucket < 0)
+            return END;
+
+        auto slot = _index[bucket].slot & _mask;
+        if (_eq(key, _pairs[slot++].first))
+            return slot;
+        else if (next_bucket == bucket)
+            return END;
+
+        while (true) {
+            const auto& okey = _pairs[slot++].first;
+            if (_eq(key, okey))
+                return slot;
+
+            const auto hasho = hash_key(okey);
+            if ((hasho & _mask) != bucket)
+                break;
+            else if (hasho > key_hash)
+                break;
+            else if (EMH_UNLIKELY(slot >= _num_filled))
+                break;
+        }
+
+        return END;
+    }
+
+    //only for find/can not insert
+    size_type find_sorted_bucket(const KeyT& key) const noexcept
+    {
+        const auto key_hash = hash_key(key);
+        const auto bucket = size_type(key_hash & _mask);
+        const auto slots = (int)(_index[bucket].next); //TODO
+        if (slots < 0 /**|| key < _pairs[slot].first*/)
+            return END;
+
+        const auto slot = _index[bucket].slot & _mask;
+        auto ormask = _index[bucket].slot & ~_mask;
+        auto hmask  = (size_type)(key_hash) & ~_mask;
+        if ((hmask | ormask) != ormask)
+            return END;
+
+        if (_eq(key, _pairs[slot].first))
+            return slot;
+        else if (slots == 1 || key < _pairs[slot].first)
+            return END;
+
+#if EMH_SORT
+        if (key < _pairs[slot].first || key > _pairs[slots + slot - 1].first)
+            return END;
+#endif
+
+        for (size_type i = 1; i < slots; ++i) {
+            const auto& okey = _pairs[slot + i].first;
+            if (_eq(key, okey))
+                return slot + i;
+            //            else if (okey > key)
+            //                return END;
+        }
+
+        return END;
+    }
+#endif
+
+    //kick out bucket and find empty to occpuy
+    //it will break the orgin link and relnik again.
+    //before: main_bucket-->prev_bucket --> bucket   --> next_bucket
+    //atfer : main_bucket-->prev_bucket --> (removed)--> new_bucket--> next_bucket
+    size_type kickout_bucket(const size_type kmain, const size_type bucket) noexcept
+    {
+        const auto next_bucket = _index[bucket].next;
+        const auto new_bucket  = find_empty_bucket(next_bucket, 2);
+        const auto prev_bucket = find_prev_bucket(kmain, bucket);
+
+        const auto last = next_bucket == bucket ? new_bucket : next_bucket;
+        _index[new_bucket] = {last, _index[bucket].slot};
+
+        _index[prev_bucket].next = new_bucket;
+        _index[bucket].next = INACTIVE;
+
+        return bucket;
+    }
+
+    /*
+     ** inserts a new key into a hash table; first, check whether key's main
+     ** bucket/position is free. If not, check whether colliding node/bucket is in its main
+     ** position or not: if it is not, move colliding bucket to an empty place and
+     ** put new key in its main position; otherwise (colliding bucket is in its main
+     ** position), new key goes to an empty position.
+     */
+    template<typename K=KeyT>
+    size_type find_or_allocate(const K& key, uint64_t key_hash) noexcept
+    {
+        const auto bucket = size_type(key_hash & _mask);
+        auto next_bucket = _index[bucket].next;
+        prefetch_heap_block((char*)&_pairs[bucket]);
+        if ((int)next_bucket < 0) {
+#if EMH_HIGH_LOAD
+            if (next_bucket != INACTIVE)
+                pop_empty(bucket);
+#endif
+            return bucket;
+        }
+
+        const auto slot = _index[bucket].slot & _mask;
+        if (EMH_EQHASH(bucket, key_hash))
+            if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
+                return bucket;
+
+        //check current bucket_key is in main bucket or not
+        const auto kmain = hash_bucket(_pairs[slot].first);
+        if (kmain != bucket)
+            return kickout_bucket(kmain, bucket);
+        else if (next_bucket == bucket)
+            return _index[next_bucket].next = find_empty_bucket(next_bucket, 1);
+
+        uint32_t csize = 1;
+        //find next linked bucket and check key
+        while (true) {
+            const auto eslot = _index[next_bucket].slot & _mask;
+            if (EMH_EQHASH(next_bucket, key_hash)) {
+                if (EMH_LIKELY(_eq(key, _pairs[eslot].first)))
+                    return next_bucket;
+            }
+
+            csize += 1;
+            const auto nbucket = _index[next_bucket].next;
+            if (nbucket == next_bucket)
+                break;
+            next_bucket = nbucket;
+        }
+
+        //find a empty and link it to tail
+        const auto new_bucket = find_empty_bucket(next_bucket, csize);
+        prefetch_heap_block((char*)&_pairs[new_bucket]);
+        return _index[next_bucket].next = new_bucket;
+    }
+
+    size_type find_unique_bucket(uint64_t key_hash) noexcept
+    {
+        const auto bucket = size_type(key_hash & _mask);
+        auto next_bucket = _index[bucket].next;
+        if ((int)next_bucket < 0) {
+#if EMH_HIGH_LOAD
+            if (next_bucket != INACTIVE)
+                pop_empty(bucket);
+#endif
+            return bucket;
+        }
+
+        //check current bucket_key is in main bucket or not
+        const auto kmain = hash_main(bucket);
+        if (EMH_UNLIKELY(kmain != bucket))
+            return kickout_bucket(kmain, bucket);
+        else if (EMH_UNLIKELY(next_bucket != bucket))
+            next_bucket = find_last_bucket(next_bucket);
+
+        return _index[next_bucket].next = find_empty_bucket(next_bucket, 2);
+    }
+
+    /***
+      Different probing techniques usually provide a trade-off between memory locality and avoidance of clustering.
+      Since Robin Hood hashing is relatively resilient to clustering (both primary and secondary), linear probing is the most cache friendly alternativeis typically used.
+
+      It's the core algorithm of this hash map with highly optimization/benchmark.
+      normaly linear probing is inefficient with high load factor, it use a new 3-way linear
+      probing strategy to search empty slot. from benchmark even the load factor > 0.9, it's more 2-3 timer fast than
+      one-way search strategy.
+
+      1. linear or quadratic probing a few cache line for less cache miss from input slot "bucket_from".
+      2. the first  search  slot from member variant "_last", init with 0
+      3. the second search slot from calculated pos "(_num_filled + _last) & _mask", it's like a rand value
+      */
+    // key is not in this mavalue. Find a place to put it.
+    size_type find_empty_bucket(const size_type bucket_from, uint32_t csize) noexcept
+    {
+        (void)csize;
+#if EMH_HIGH_LOAD
+        if (_ehead)
+            return pop_empty(_ehead);
+#endif
+
+        auto bucket = bucket_from;
+        if (EMH_EMPTY(++bucket) || EMH_EMPTY(++bucket))
+            return bucket;
+
+#ifdef EMH_QUADRATIC
+        constexpr size_type linear_probe_length = 2 * EMH_CACHE_LINE_SIZE / sizeof(Index);//16
+        for (size_type offset = csize + 2, step = 4; offset <= linear_probe_length; ) {
+            bucket = (bucket_from + offset) & _mask;
+            if (EMH_EMPTY(bucket) || EMH_EMPTY(++bucket))
+                return bucket;
+            offset += step; //7/8. 12. 16
+        }
+#else
+        constexpr size_type quadratic_probe_length = 6u;
+        for (size_type offset = 4u, step = 3u; step < quadratic_probe_length; ) {
+            bucket = (bucket_from + offset) & _mask;
+            if (EMH_EMPTY(bucket) || EMH_EMPTY(++bucket))
+                return bucket;
+            offset += step++;
+        }
+#endif
+
+#if EMH_PREFETCH
+        __builtin_prefetch(static_cast<const void*>(_index + _last + 1), 0, EMH_PREFETCH);
+#endif
+
+        for (;;) {
+#if EMH_PACK_TAIL
+            //find empty bucket and skip next
+            if (EMH_EMPTY(_last++))// || EMH_EMPTY(_last++))
+                return _last++ - 1;
+
+            if (EMH_UNLIKELY(_last >= _num_buckets))
+                _last = 0;
+
+            auto medium = (_mask / 4 + _last++) & _mask;
+            if (EMH_EMPTY(medium))
+                return medium;
+#else
+            _last &= _mask;
+            if (EMH_EMPTY(++_last))// || EMH_EMPTY(++_last))
+                return _last;
+
+            auto medium = (_num_buckets / 2 + _last) & _mask;
+            if (EMH_EMPTY(medium))// || EMH_EMPTY(++medium))
+                return medium;
+#endif
+        }
+
+        return 0;
+    }
+
+    size_type find_last_bucket(size_type main_bucket) const
+    {
+        auto next_bucket = _index[main_bucket].next;
+        if (next_bucket == main_bucket)
+            return main_bucket;
+
+        while (true) {
+            const auto nbucket = _index[next_bucket].next;
+            if (nbucket == next_bucket)
+                return next_bucket;
+            next_bucket = nbucket;
+        }
+    }
+
+    size_type find_prev_bucket(const size_type main_bucket, const size_type bucket) const
+    {
+        auto next_bucket = _index[main_bucket].next;
+        if (next_bucket == bucket)
+            return main_bucket;
+
+        while (true) {
+            const auto nbucket = _index[next_bucket].next;
+            if (nbucket == bucket)
+                return next_bucket;
+            next_bucket = nbucket;
+        }
+    }
+
+    size_type hash_bucket(const KeyT& key) const noexcept
+    {
+        return (size_type)hash_key(key) & _mask;
+    }
+
+    size_type hash_main(const size_type bucket) const noexcept
+    {
+        const auto slot = _index[bucket].slot & _mask;
+        return (size_type)hash_key(_pairs[slot].first) & _mask;
+    }
+
+#if EMH_INT_HASH
+    static constexpr uint64_t KC = UINT64_C(11400714819323198485);
+    static uint64_t hash64(uint64_t key)
+    {
+#if __SIZEOF_INT128__ && EMH_INT_HASH == 1
+        __uint128_t r = key; r *= KC;
+        return (uint64_t)(r >> 64) + (uint64_t)r;
+#elif EMH_INT_HASH == 2
+        //MurmurHash3Mixer
+        uint64_t h = key;
+        h ^= h >> 33;
+        h *= 0xff51afd7ed558ccd;
+        h ^= h >> 33;
+        h *= 0xc4ceb9fe1a85ec53;
+        h ^= h >> 33;
+        return h;
+#elif _WIN64 && EMH_INT_HASH == 1
+        uint64_t high;
+        return _umul128(key, KC, &high) + high;
+#elif EMH_INT_HASH == 3
+        auto ror  = (key >> 32) | (key << 32);
+        auto low  = key * 0xA24BAED4963EE407ull;
+        auto high = ror * 0x9FB21C651E98DF25ull;
+        auto mix  = low + high;
+        return mix;
+#elif EMH_INT_HASH == 1
+        uint64_t r = key * UINT64_C(0xca4bcaa75ec3f625);
+        return (r >> 32) + r;
+#elif EMH_WYHASH64
+        return wyhash64(key, KC);
+#else
+        uint64_t x = key;
+        x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
+        x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
+        x = x ^ (x >> 31);
+        return x;
+#endif
+    }
+#endif
+
+#if EMH_WYHASH_HASH
+    //#define WYHASH_CONDOM 1
+    static uint64_t wymix(uint64_t A, uint64_t B)
+    {
+#if defined(__SIZEOF_INT128__)
+        __uint128_t r = A; r *= B;
+#if WYHASH_CONDOM2
+        A ^= (uint64_t)r; B ^= (uint64_t)(r >> 64);
+#else
+        A = (uint64_t)r; B = (uint64_t)(r >> 64);
+#endif
+
+#elif defined(_MSC_VER) && defined(_M_X64)
+#if WYHASH_CONDOM2
+        uint64_t a, b;
+        a = _umul128(A, B, &b);
+        A ^= a; B ^= b;
+#else
+        A = _umul128(A, B, &B);
+#endif
+#else
+        uint64_t ha = A >> 32, hb = B >> 32, la = (uint32_t)A, lb = (uint32_t)B, hi, lo;
+        uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, t = rl + (rm0 << 32), c = t < rl;
+        lo = t + (rm1 << 32); c += lo < t; hi = rh + (rm0 >> 32) + (rm1 >> 32) + c;
+#if WYHASH_CONDOM2
+        A ^= lo; B ^= hi;
+#else
+        A = lo; B = hi;
+#endif
+#endif
+        return A ^ B;
+    }
+
+    //multiply and xor mix function, aka MUM
+    static inline uint64_t wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v; }
+    static inline uint64_t wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v; }
+    static inline uint64_t wyr3(const uint8_t *p, size_t k) {
+        return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];
+    }
+
+    inline static const uint64_t secret[4] = {
+        0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull,
+        0x4b33a62ed433d4a3ull, 0x4d5a2da51de1aa47ull};
+public:
+    //wyhash main function https://github.com/wangyi-fudan/wyhash
+    static uint64_t wyhashstr(const char *key, const size_t len)
+    {
+        uint64_t a = 0, b = 0, seed = secret[0];
+        const uint8_t *p = (const uint8_t*)key;
+        if (EMH_LIKELY(len <= 16)) {
+            if (EMH_LIKELY(len >= 4)) {
+                const auto half = (len >> 3) << 2;
+                a = (wyr4(p) << 32U) | wyr4(p + half); p += len - 4;
+                b = (wyr4(p) << 32U) | wyr4(p - half);
+            } else if (len) {
+                a = wyr3(p, len);
+            }
+        } else {
+            size_t i = len;
+            if (EMH_UNLIKELY(i > 48)) {
+                uint64_t see1 = seed, see2 = seed;
+                do {
+                    seed = wymix(wyr8(p +  0) ^ secret[1], wyr8(p +  8) ^ seed);
+                    see1 = wymix(wyr8(p + 16) ^ secret[2], wyr8(p + 24) ^ see1);
+                    see2 = wymix(wyr8(p + 32) ^ secret[3], wyr8(p + 40) ^ see2);
+                    p += 48; i -= 48;
+                } while (EMH_LIKELY(i > 48));
+                seed ^= see1 ^ see2;
+            }
+            while (i > 16) {
+                seed = wymix(wyr8(p) ^ secret[1], wyr8(p + 8) ^ seed);
+                i -= 16; p += 16;
+            }
+            a = wyr8(p + i - 16);
+            b = wyr8(p + i - 8);
+        }
+
+        return wymix(secret[1] ^ len, wymix(a ^ secret[1], b ^ seed));
+    }
+#endif
+
+private:
+    template<typename UType, typename std::enable_if<std::is_integral<UType>::value, uint32_t>::type = 0>
+        inline uint64_t hash_key(const UType key) const
+        {
+#if EMH_INT_HASH
+            return hash64(key);
+#elif EMH_IDENTITY_HASH
+            return key + (key >> 24);
+#else
+            return _hasher(key);
+#endif
+        }
+
+    template<typename UType, typename std::enable_if<std::is_same<UType, std::string>::value, uint32_t>::type = 0>
+        inline uint64_t hash_key(const UType& key) const
+        {
+#if EMH_WYHASH_HASH
+            return wyhashstr(key.data(), key.size());
+#else
+            return _hasher(key);
+#endif
+        }
+
+    template<typename UType, typename std::enable_if<!std::is_integral<UType>::value && !std::is_same<UType, std::string>::value, uint32_t>::type = 0>
+        inline uint64_t hash_key(const UType& key) const
+        {
+            return _hasher(key);
+        }
+
+private:
+    Index*    _index;
+    value_type*_pairs;
+
+    HashT     _hasher;
+    EqT       _eq;
+    uint32_t  _mlf;
+    size_type _mask;
+    size_type _num_buckets;
+    size_type _num_filled;
+    size_type _last;
+#if EMH_HIGH_LOAD
+    size_type _ehead;
+#endif
+    size_type _etail;
+};
+} // namespace emhash
+

From 22a0eba8dbffd6d46f59c4e19cb6fa816cbbbe64 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Fri, 1 Nov 2024 13:32:43 +0100
Subject: [PATCH 06/12] Fix emhash8 compilation for MinGW.

---
 src/third_party/emhash/README.ninja    | 2 +-
 src/third_party/emhash/hash_table8.hpp | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/third_party/emhash/README.ninja b/src/third_party/emhash/README.ninja
index 8a236c2d94..ac2c2114a6 100644
--- a/src/third_party/emhash/README.ninja
+++ b/src/third_party/emhash/README.ninja
@@ -4,4 +4,4 @@ URL: https://github.com/ktprime/emhash
 Copyright: Copyright (c) 2021-2024 Huang Yuanbing & bailuzhou AT 163.com
 SPDX-License-Identifier: MIT
 Local changes:
- - None.
+ - Added includes for _mm_prefetch on MinGW.
diff --git a/src/third_party/emhash/hash_table8.hpp b/src/third_party/emhash/hash_table8.hpp
index ba121cbdfc..689185edac 100644
--- a/src/third_party/emhash/hash_table8.hpp
+++ b/src/third_party/emhash/hash_table8.hpp
@@ -58,6 +58,10 @@
     _etail = bucket; \
     _index[bucket] = {bucket, _num_filled++ | ((size_type)(key_hash) & ~_mask)}
 
+#if _WIN32 && defined(_M_IX86)
+#include <xmmintrin.h>
+#endif
+
 namespace emhash8 {
 
 struct DefaultPolicy {
@@ -1237,7 +1241,7 @@ class HashMap
         // misses.  This is intended to overlap with execution of calculating the hash for a key.
 #if __linux__
         __builtin_prefetch(static_cast<const void*>(ctrl));
-#elif _WIN32
+#elif _WIN32 && defined(_M_IX86)
         _mm_prefetch((const char*)ctrl, _MM_HINT_T0);
 #endif
     }

From c3e3fb98e2987875ef0b640f168837bd301170c1 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Tue, 5 Nov 2024 14:57:15 +0100
Subject: [PATCH 07/12] Fix some spelling errors in emhash8.

---
 src/third_party/emhash/README.ninja    | 1 +
 src/third_party/emhash/hash_table8.hpp | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/third_party/emhash/README.ninja b/src/third_party/emhash/README.ninja
index ac2c2114a6..12ead4e545 100644
--- a/src/third_party/emhash/README.ninja
+++ b/src/third_party/emhash/README.ninja
@@ -5,3 +5,4 @@ Copyright: Copyright (c) 2021-2024 Huang Yuanbing & bailuzhou AT 163.com
 SPDX-License-Identifier: MIT
 Local changes:
  - Added includes for _mm_prefetch on MinGW.
+ - Fixed some spelling errors to appease the linter.
diff --git a/src/third_party/emhash/hash_table8.hpp b/src/third_party/emhash/hash_table8.hpp
index 689185edac..36b7218381 100644
--- a/src/third_party/emhash/hash_table8.hpp
+++ b/src/third_party/emhash/hash_table8.hpp
@@ -1456,7 +1456,7 @@ class HashMap
 #endif
 
     //kick out bucket and find empty to occpuy
-    //it will break the orgin link and relnik again.
+    //it will break the origin link and relink again.
     //before: main_bucket-->prev_bucket --> bucket   --> next_bucket
     //atfer : main_bucket-->prev_bucket --> (removed)--> new_bucket--> next_bucket
     size_type kickout_bucket(const size_type kmain, const size_type bucket) noexcept
@@ -1556,7 +1556,7 @@ class HashMap
       Since Robin Hood hashing is relatively resilient to clustering (both primary and secondary), linear probing is the most cache friendly alternativeis typically used.
 
       It's the core algorithm of this hash map with highly optimization/benchmark.
-      normaly linear probing is inefficient with high load factor, it use a new 3-way linear
+      normally linear probing is inefficient with high load factor, it use a new 3-way linear
       probing strategy to search empty slot. from benchmark even the load factor > 0.9, it's more 2-3 timer fast than
       one-way search strategy.
 

From 23350f1cc737d9ec48b8c27b01f1d6cc93eea8c6 Mon Sep 17 00:00:00 2001
From: Jan Niklas Hasse <jhasse@bixense.com>
Date: Mon, 11 Nov 2024 21:26:03 +0100
Subject: [PATCH 08/12] Move Emacs file to
 https://github.com/ninja-build/ninja-emacs, see #2213

---
 misc/ninja-mode.el | 112 ---------------------------------------------
 1 file changed, 112 deletions(-)
 delete mode 100644 misc/ninja-mode.el

diff --git a/misc/ninja-mode.el b/misc/ninja-mode.el
deleted file mode 100644
index 1656009f68..0000000000
--- a/misc/ninja-mode.el
+++ /dev/null
@@ -1,112 +0,0 @@
-;;; ninja-mode.el --- Major mode for editing .ninja files -*- lexical-binding: t -*-
-
-;; Package-Requires: ((emacs "24"))
-;; Keywords: languages
-;; URL: https://ninja-build.org/
-
-;; Copyright 2011 Google Inc. All Rights Reserved.
-;;
-;; Licensed under the Apache License, Version 2.0 (the "License");
-;; you may not use this file except in compliance with the License.
-;; You may obtain a copy of the License at
-;;
-;;     http://www.apache.org/licenses/LICENSE-2.0
-;;
-;; Unless required by applicable law or agreed to in writing, software
-;; distributed under the License is distributed on an "AS IS" BASIS,
-;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-;; See the License for the specific language governing permissions and
-;; limitations under the License.
-
-;;; Commentary:
-
-;; Simple emacs mode for editing .ninja files.
-
-;;; Code:
-
-(defcustom ninja-indent-offset 2
-  "*Amount of offset per level of indentation."
-  :type 'integer
-  :safe 'natnump
-  :group 'ninja)
-
-(defconst ninja-keywords-re
-  (concat "^" (regexp-opt '("rule" "build" "subninja" "include" "pool" "default")
-                          'words)))
-
-(defvar ninja-keywords
-  `((,ninja-keywords-re . font-lock-keyword-face)
-    ("^[[:space:]]*\\([[:alnum:]_]+\\)[[:space:]]*=" 1 font-lock-variable-name-face)
-    ;; Variable expansion.
-    ("$[[:alnum:]_]+" . font-lock-variable-name-face)
-    ("${[[:alnum:]._]+}" . font-lock-variable-name-face)
-    ;; Rule names
-    ("rule +\\([[:alnum:]_.-]+\\)" 1 font-lock-function-name-face)
-    ;; Build Statement - highlight the rule used,
-    ;; allow for escaped $,: in outputs.
-    ("build +\\(?:[^:$\n]\\|$[:$]\\)+ *: *\\([[:alnum:]_.-]+\\)"
-     1 font-lock-function-name-face)))
-
-(defvar ninja-mode-syntax-table
-  (let ((table (make-syntax-table)))
-    (modify-syntax-entry ?\" "." table)
-    table)
-  "Syntax table used in `ninja-mode'.")
-
-(defun ninja-syntax-propertize (start end)
-  (save-match-data
-    (goto-char start)
-    (while (search-forward "#" end t)
-      (let ((match-pos (match-beginning 0)))
-        (when (and
-               ;; Is it the first non-white character on the line?
-               (eq match-pos (save-excursion (back-to-indentation) (point)))
-               (save-excursion
-                 (end-of-line 0) ; Go to the end of the previous line.
-                 (or
-                  ;; If we're continuing the previous line, it's not a
-                  ;; comment.
-                  (not (eq ?$ (char-before)))
-                  ;; Except if the previous line is a comment as well, as the
-                  ;; continuation dollar is ignored then.
-                  (nth 4 (syntax-ppss)))))
-          (put-text-property match-pos (1+ match-pos) 'syntax-table '(11))
-          (let ((line-end (line-end-position)))
-            ;; Avoid putting properties past the end of the buffer.
-            ;; Otherwise we get an `args-out-of-range' error.
-            (unless (= line-end (point-max))
-              (put-text-property line-end (1+ line-end) 'syntax-table '(12)))))))))
-
-(defun ninja-compute-indentation ()
-  "Calculate indentation for the current line."
-  (save-excursion
-    (beginning-of-line)
-    (if (or (looking-at ninja-keywords-re)
-            (= (line-number-at-pos) 1))
-        0
-      (forward-line -1)
-      (if (looking-at ninja-keywords-re)
-          ninja-indent-offset
-        (current-indentation)))))
-
-(defun ninja-indent-line ()
-  "Indent the current line.  Uses previous indentation level if
- available or `ninja-indent-offset'"
-  (interactive "*")
-  (indent-line-to (ninja-compute-indentation)))
-
-;;;###autoload
-(define-derived-mode ninja-mode prog-mode "ninja"
-  (set (make-local-variable 'comment-start) "#")
-  (set (make-local-variable 'parse-sexp-lookup-properties) t)
-  (set (make-local-variable 'syntax-propertize-function) #'ninja-syntax-propertize)
-  (set (make-local-variable 'indent-line-function) 'ninja-indent-line)
-  (setq font-lock-defaults '(ninja-keywords)))
-
-;; Run ninja-mode for files ending in .ninja.
-;;;###autoload
-(add-to-list 'auto-mode-alist '("\\.ninja$" . ninja-mode))
-
-(provide 'ninja-mode)
-
-;;; ninja-mode.el ends here

From 32e7e2385f098009b45956f74fd4ce603cd0a098 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Fri, 1 Nov 2024 12:38:13 +0100
Subject: [PATCH 09/12] Switch hash to rapidhash.

This is the currently fastest hash that passes SMHasher and does not
require special instructions (e.g. SIMD). Like emhash8, it is
liberally licensed (2-clause BSD), and we include the .h file directly.

For a no-op build of Chromium (Linux, Zen 2),
this reduces time spent from 4.62 to 4.22 seconds.
(NOTE: This is a more difficult measurement than the previous ones,
as it necessarily involves removing the entire build log and doing
a clean build. However, just switching the HashMap hash takes
to 4.47 seconds or so.)
---
 src/build_log.cc                       |  55 +----
 src/build_log_test.cc                  |  12 +-
 src/hash_map.h                         |  37 +--
 src/third_party/rapidhash/README.ninja |   7 +
 src/third_party/rapidhash/rapidhash.h  | 323 +++++++++++++++++++++++++
 5 files changed, 342 insertions(+), 92 deletions(-)
 create mode 100644 src/third_party/rapidhash/README.ninja
 create mode 100755 src/third_party/rapidhash/rapidhash.h

diff --git a/src/build_log.cc b/src/build_log.cc
index 52c7c84f85..073d2fe81e 100644
--- a/src/build_log.cc
+++ b/src/build_log.cc
@@ -53,63 +53,14 @@ using namespace std;
 namespace {
 
 const char kFileSignature[] = "# ninja log v%d\n";
-const int kOldestSupportedVersion = 6;
-const int kCurrentVersion = 6;
-
-// 64bit MurmurHash2, by Austin Appleby
-#if defined(_MSC_VER)
-#define BIG_CONSTANT(x) (x)
-#else   // defined(_MSC_VER)
-#define BIG_CONSTANT(x) (x##LLU)
-#endif // !defined(_MSC_VER)
-inline
-uint64_t MurmurHash64A(const void* key, size_t len) {
-  static const uint64_t seed = 0xDECAFBADDECAFBADull;
-  const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
-  const int r = 47;
-  uint64_t h = seed ^ (len * m);
-  const unsigned char* data = static_cast<const unsigned char*>(key);
-  while (len >= 8) {
-    uint64_t k;
-    memcpy(&k, data, sizeof k);
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-    h ^= k;
-    h *= m;
-    data += 8;
-    len -= 8;
-  }
-  switch (len & 7)
-  {
-  case 7: h ^= uint64_t(data[6]) << 48;
-          NINJA_FALLTHROUGH;
-  case 6: h ^= uint64_t(data[5]) << 40;
-          NINJA_FALLTHROUGH;
-  case 5: h ^= uint64_t(data[4]) << 32;
-          NINJA_FALLTHROUGH;
-  case 4: h ^= uint64_t(data[3]) << 24;
-          NINJA_FALLTHROUGH;
-  case 3: h ^= uint64_t(data[2]) << 16;
-          NINJA_FALLTHROUGH;
-  case 2: h ^= uint64_t(data[1]) << 8;
-          NINJA_FALLTHROUGH;
-  case 1: h ^= uint64_t(data[0]);
-          h *= m;
-  };
-  h ^= h >> r;
-  h *= m;
-  h ^= h >> r;
-  return h;
-}
-#undef BIG_CONSTANT
-
+const int kOldestSupportedVersion = 7;
+const int kCurrentVersion = 7;
 
 }  // namespace
 
 // static
 uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) {
-  return MurmurHash64A(command.str_, command.len_);
+  return rapidhash(command.str_, command.len_);
 }
 
 BuildLog::LogEntry::LogEntry(const string& output)
diff --git a/src/build_log_test.cc b/src/build_log_test.cc
index 12c2dc742c..630b1f1a92 100644
--- a/src/build_log_test.cc
+++ b/src/build_log_test.cc
@@ -104,7 +104,7 @@ TEST_F(BuildLogTest, FirstWriteAddsSignature) {
 
 TEST_F(BuildLogTest, DoubleEntry) {
   FILE* f = fopen(kTestFilename, "wb");
-  fprintf(f, "# ninja log v6\n");
+  fprintf(f, "# ninja log v7\n");
   fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
       BuildLog::LogEntry::HashCommand("command abc"));
   fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
@@ -177,7 +177,7 @@ TEST_F(BuildLogTest, ObsoleteOldVersion) {
 
 TEST_F(BuildLogTest, SpacesInOutput) {
   FILE* f = fopen(kTestFilename, "wb");
-  fprintf(f, "# ninja log v6\n");
+  fprintf(f, "# ninja log v7\n");
   fprintf(f, "123\t456\t456\tout with space\t%" PRIx64 "\n",
       BuildLog::LogEntry::HashCommand("command"));
   fclose(f);
@@ -200,10 +200,10 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) {
   // build log on Windows. This shouldn't crash, and the second version header
   // should be ignored.
   FILE* f = fopen(kTestFilename, "wb");
-  fprintf(f, "# ninja log v6\n");
+  fprintf(f, "# ninja log v7\n");
   fprintf(f, "123\t456\t456\tout\t%" PRIx64 "\n",
       BuildLog::LogEntry::HashCommand("command"));
-  fprintf(f, "# ninja log v6\n");
+  fprintf(f, "# ninja log v7\n");
   fprintf(f, "456\t789\t789\tout2\t%" PRIx64 "\n",
       BuildLog::LogEntry::HashCommand("command2"));
   fclose(f);
@@ -252,7 +252,7 @@ struct TestDiskInterface : public DiskInterface {
 
 TEST_F(BuildLogTest, Restat) {
   FILE* f = fopen(kTestFilename, "wb");
-  fprintf(f, "# ninja log v6\n"
+  fprintf(f, "# ninja log v7\n"
              "1\t2\t3\tout\tcommand\n");
   fclose(f);
   std::string err;
@@ -280,7 +280,7 @@ TEST_F(BuildLogTest, VeryLongInputLine) {
   // Ninja's build log buffer is currently 256kB. Lines longer than that are
   // silently ignored, but don't affect parsing of other lines.
   FILE* f = fopen(kTestFilename, "wb");
-  fprintf(f, "# ninja log v6\n");
+  fprintf(f, "# ninja log v7\n");
   fprintf(f, "123\t456\t456\tout\tcommand start");
   for (size_t i = 0; i < (512 << 10) / strlen(" more_command"); ++i)
     fputs(" more_command", f);
diff --git a/src/hash_map.h b/src/hash_map.h
index 5585b1dc51..4361c80a35 100644
--- a/src/hash_map.h
+++ b/src/hash_map.h
@@ -19,40 +19,9 @@
 #include <string.h>
 #include "string_piece.h"
 #include "util.h"
-#include "third_party/emhash/hash_table8.hpp"
 
-// MurmurHash2, by Austin Appleby
-static inline
-unsigned int MurmurHash2(const void* key, size_t len) {
-  static const unsigned int seed = 0xDECAFBAD;
-  const unsigned int m = 0x5bd1e995;
-  const int r = 24;
-  unsigned int h = seed ^ len;
-  const unsigned char* data = static_cast<const unsigned char*>(key);
-  while (len >= 4) {
-    unsigned int k;
-    memcpy(&k, data, sizeof k);
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-    h *= m;
-    h ^= k;
-    data += 4;
-    len -= 4;
-  }
-  switch (len) {
-  case 3: h ^= data[2] << 16;
-          NINJA_FALLTHROUGH;
-  case 2: h ^= data[1] << 8;
-          NINJA_FALLTHROUGH;
-  case 1: h ^= data[0];
-    h *= m;
-  };
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-  return h;
-}
+#include "third_party/emhash/hash_table8.hpp"
+#include "third_party/rapidhash/rapidhash.h"
 
 namespace std {
 template<>
@@ -61,7 +30,7 @@ struct hash<StringPiece> {
   typedef size_t result_type;
 
   size_t operator()(StringPiece key) const {
-    return MurmurHash2(key.str_, key.len_);
+    return rapidhash(key.str_, key.len_);
   }
 };
 }
diff --git a/src/third_party/rapidhash/README.ninja b/src/third_party/rapidhash/README.ninja
new file mode 100644
index 0000000000..1d74b67c1f
--- /dev/null
+++ b/src/third_party/rapidhash/README.ninja
@@ -0,0 +1,7 @@
+Description: Very fast, high quality, platform-independent hashing algorithm.
+Version: commit 4a6b2570e868536be84800353efd92c699f37d2c
+URL: https://github.com/Nicoshev/rapidhash
+Copyright: Copyright (C) 2024 Nicolas De Carli, Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net>
+SPDX-License-Identifier: BSD-2-Clause
+Local changes:
+ - Changed to UNIX line endings
diff --git a/src/third_party/rapidhash/rapidhash.h b/src/third_party/rapidhash/rapidhash.h
new file mode 100755
index 0000000000..463f733d85
--- /dev/null
+++ b/src/third_party/rapidhash/rapidhash.h
@@ -0,0 +1,323 @@
+/*
+ * rapidhash - Very fast, high quality, platform-independent hashing algorithm.
+ * Copyright (C) 2024 Nicolas De Carli
+ *
+ * Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net>
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - rapidhash source repository: https://github.com/Nicoshev/rapidhash
+ */
+
+/*
+ *  Includes.
+ */
+#include <stdint.h>
+#include <string.h>
+#if defined(_MSC_VER)
+  #include <intrin.h>
+  #if defined(_M_X64) && !defined(_M_ARM64EC)
+    #pragma intrinsic(_umul128)
+  #endif
+#endif
+
+/*
+ *  C++ macros.
+ *
+ *  RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)).
+ */
+#ifdef __cplusplus
+  #define RAPIDHASH_NOEXCEPT noexcept
+  #define RAPIDHASH_CONSTEXPR constexpr
+  #ifndef RAPIDHASH_INLINE
+    #define RAPIDHASH_INLINE inline
+  #endif
+#else
+  #define RAPIDHASH_NOEXCEPT
+  #define RAPIDHASH_CONSTEXPR static const
+  #ifndef RAPIDHASH_INLINE
+    #define RAPIDHASH_INLINE static inline
+  #endif
+#endif
+
+/*
+ *  Protection macro, alters behaviour of rapid_mum multiplication function.
+ *
+ *  RAPIDHASH_FAST: Normal behavior, max speed.
+ *  RAPIDHASH_PROTECTED: Extra protection against entropy loss.
+ */
+#ifndef RAPIDHASH_PROTECTED
+  #define RAPIDHASH_FAST
+#elif defined(RAPIDHASH_FAST)
+  #error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously."
+#endif
+
+/*
+ *  Unrolling macros, changes code definition for main hash function.
+ *
+ *  RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes.
+ *  RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes.
+ *
+ *  Most modern CPUs should benefit from having RAPIDHASH_UNROLLED.
+ *
+ *  These macros do not alter the output hash.
+ */
+#ifndef RAPIDHASH_COMPACT
+  #define RAPIDHASH_UNROLLED
+#elif defined(RAPIDHASH_UNROLLED)
+  #error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously."
+#endif
+
+/*
+ *  Likely and unlikely macros.
+ */
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+  #define _likely_(x)  __builtin_expect(x,1)
+  #define _unlikely_(x)  __builtin_expect(x,0)
+#else
+  #define _likely_(x) (x)
+  #define _unlikely_(x) (x)
+#endif
+
+/*
+ *  Endianness macros.
+ */
+#ifndef RAPIDHASH_LITTLE_ENDIAN
+  #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+    #define RAPIDHASH_LITTLE_ENDIAN
+  #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+    #define RAPIDHASH_BIG_ENDIAN
+  #else
+    #warning "could not determine endianness! Falling back to little endian."
+    #define RAPIDHASH_LITTLE_ENDIAN
+  #endif
+#endif
+
+/*
+ *  Default seed.
+ */
+#define RAPID_SEED (0xbdd89aa982704029ull)
+
+/*
+ *  Default secret parameters.
+ */
+RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull};
+
+/*
+ *  64*64 -> 128bit multiply function.
+ *
+ *  @param A  Address of 64-bit number.
+ *  @param B  Address of 64-bit number.
+ *
+ *  Calculates 128-bit C = *A * *B.
+ *
+ *  When RAPIDHASH_FAST is defined:
+ *  Overwrites A contents with C's low 64 bits.
+ *  Overwrites B contents with C's high 64 bits.
+ *
+ *  When RAPIDHASH_PROTECTED is defined:
+ *  Xors and overwrites A contents with C's low 64 bits.
+ *  Xors and overwrites B contents with C's high 64 bits.
+ */
+RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT {
+#if defined(__SIZEOF_INT128__)
+  __uint128_t r=*A; r*=*B;
+  #ifdef RAPIDHASH_PROTECTED
+  *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
+  #else
+  *A=(uint64_t)r; *B=(uint64_t)(r>>64);
+  #endif
+#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64))
+  #if defined(_M_X64)
+    #ifdef RAPIDHASH_PROTECTED
+    uint64_t a, b;
+    a=_umul128(*A,*B,&b);
+    *A^=a;  *B^=b;
+    #else
+    *A=_umul128(*A,*B,B);
+    #endif
+  #else
+    #ifdef RAPIDHASH_PROTECTED
+    uint64_t a, b;
+    b = __umulh(*A, *B);
+    a = *A * *B;
+    *A^=a;  *B^=b;
+    #else
+    uint64_t c = __umulh(*A, *B);
+    *A = *A * *B;
+    *B = c;
+    #endif
+  #endif
+#else
+  uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
+  uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
+  lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
+  #ifdef RAPIDHASH_PROTECTED
+  *A^=lo;  *B^=hi;
+  #else
+  *A=lo;  *B=hi;
+  #endif
+#endif
+}
+
+/*
+ *  Multiply and xor mix function.
+ *
+ *  @param A  64-bit number.
+ *  @param B  64-bit number.
+ *
+ *  Calculates 128-bit C = A * B.
+ *  Returns 64-bit xor between high and low 64 bits of C.
+ */
+RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT { rapid_mum(&A,&B); return A^B; }
+
+/*
+ *  Read functions.
+ */
+#ifdef RAPIDHASH_LITTLE_ENDIAN
+RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;}
+RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;}
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);}
+RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);}
+#elif defined(_MSC_VER)
+RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);}
+RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);}
+#else
+RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT {
+  uint64_t v; memcpy(&v, p, 8);
+  return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >>  8) & 0xff000000)| ((v <<  8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
+}
+RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT {
+  uint32_t v; memcpy(&v, p, 4);
+  return (((v >> 24) & 0xff)| ((v >>  8) & 0xff00)| ((v <<  8) & 0xff0000)| ((v << 24) & 0xff000000));
+}
+#endif
+
+/*
+ *  Reads and combines 3 bytes of input.
+ *
+ *  @param p  Buffer to read from.
+ *  @param k  Length of @p, in bytes.
+ *
+ *  Always reads and combines 3 bytes from memory.
+ *  Guarantees to read each buffer position at least once.
+ *
+ *  Returns a 64-bit value containing all three bytes read.
+ */
+RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];}
+
+/*
+ *  rapidhash main function.
+ *
+ *  @param key     Buffer to be hashed.
+ *  @param len     @key length, in bytes.
+ *  @param seed    64-bit seed used to alter the hash result predictably.
+ *  @param secret  Triplet of 64-bit secrets used to alter hash result predictably.
+ *
+ *  Returns a 64-bit hash.
+ */
+RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT {
+  const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len;  uint64_t  a,  b;
+  if(_likely_(len<=16)){
+    if(_likely_(len>=4)){
+      const uint8_t * plast = p + len - 4;
+      a = (rapid_read32(p) << 32) | rapid_read32(plast);
+      const uint64_t delta = ((len&24)>>(len>>3));
+      b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); }
+    else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;}
+    else a=b=0;
+  }
+  else{
+    size_t i=len;
+    if(_unlikely_(i>48)){
+      uint64_t see1=seed, see2=seed;
+#ifdef RAPIDHASH_UNROLLED
+      while(_likely_(i>=96)){
+        seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
+        see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
+        see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
+        seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed);
+        see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1);
+        see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2);
+        p+=96; i-=96;
+      }
+      if(_unlikely_(i>=48)){
+        seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
+        see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
+        see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
+        p+=48; i-=48;
+      }
+#else
+      do {
+        seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
+        see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
+        see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
+        p+=48; i-=48;
+      } while (_likely_(i>=48));
+#endif
+      seed^=see1^see2;
+    }
+    if(i>16){
+      seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]);
+      if(i>32)
+        seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed);
+    }
+    a=rapid_read64(p+i-16);  b=rapid_read64(p+i-8);
+  }
+  a^=secret[1]; b^=seed;  rapid_mum(&a,&b);
+  return  rapid_mix(a^secret[0]^len,b^secret[1]);
+}
+
+/*
+ *  rapidhash default seeded hash function.
+ *
+ *  @param key     Buffer to be hashed.
+ *  @param len     @key length, in bytes.
+ *  @param seed    64-bit seed used to alter the hash result predictably.
+ *
+ *  Calls rapidhash_internal using provided parameters and default secrets.
+ *
+ *  Returns a 64-bit hash.
+ */
+RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT {
+  return rapidhash_internal(key, len, seed, rapid_secret);
+}
+
+/*
+ *  rapidhash default hash function.
+ *
+ *  @param key     Buffer to be hashed.
+ *  @param len     @key length, in bytes.
+ *
+ *  Calls rapidhash_withSeed using provided parameters and the default seed.
+ *
+ *  Returns a 64-bit hash.
+ */
+RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT {
+  return rapidhash_withSeed(key, len, RAPID_SEED);
+}

From cffec3852f407094407aa39842ff54d34e088f64 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Fri, 1 Nov 2024 14:00:51 +0100
Subject: [PATCH 10/12] Stop calling ftell() in a loop.

ftell() must go ask the kernel for the file offset, in case
someone knew the underlying file descriptor number and seeked it.
Thus, we can save a couple hundred thousand syscalls by just
caching the offset and maintaining it ourselves.

This cuts another ~170ms off a no-op Chromium build.
---
 src/deps_log.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/deps_log.cc b/src/deps_log.cc
index 504799f4bf..d117d06907 100644
--- a/src/deps_log.cc
+++ b/src/deps_log.cc
@@ -186,15 +186,13 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
     return LOAD_SUCCESS;
   }
 
-  long offset;
+  long offset = ftell(f);
   bool read_failed = false;
   int unique_dep_record_count = 0;
   int total_dep_record_count = 0;
   for (;;) {
-    offset = ftell(f);
-
     unsigned size;
-    if (fread(&size, 4, 1, f) < 1) {
+    if (fread(&size, sizeof(size), 1, f) < 1) {
       if (!feof(f))
         read_failed = true;
       break;
@@ -206,6 +204,7 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
       read_failed = true;
       break;
     }
+    offset += size + sizeof(size);
 
     if (is_deps) {
       if ((size % 4) != 0) {

From c97558ac2407ad915308ee4d07bcbbad7b86145c Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sesse@chromium.org>
Date: Fri, 1 Nov 2024 15:58:35 +0100
Subject: [PATCH 11/12] Microoptimization in LoadDepsFromLog().

This cuts off another ~100 ms, most likely because the compiler
doesn't have smart enough alias analysis to do the same (trivial)
transformation.
---
 src/graph.cc | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/graph.cc b/src/graph.cc
index f04ffb47c8..c1276daefb 100644
--- a/src/graph.cc
+++ b/src/graph.cc
@@ -740,12 +740,13 @@ bool ImplicitDepLoader::LoadDepsFromLog(Edge* edge, string* err) {
     return false;
   }
 
-  vector<Node*>::iterator implicit_dep =
-      PreallocateSpace(edge, deps->node_count);
-  for (int i = 0; i < deps->node_count; ++i, ++implicit_dep) {
-    Node* node = deps->nodes[i];
-    *implicit_dep = node;
-    node->AddOutEdge(edge);
+  Node** nodes = deps->nodes;
+  size_t node_count = deps->node_count;
+  edge->inputs_.insert(edge->inputs_.end() - edge->order_only_deps_,
+                       nodes, nodes + node_count);
+  edge->implicit_deps_ += node_count;
+  for (size_t i = 0; i < node_count; ++i) {
+    nodes[i]->AddOutEdge(edge);
   }
   return true;
 }

From beabef098786d54be8e40a8a103629639b94b57f Mon Sep 17 00:00:00 2001
From: Fredrik Andersson <freand@gmail.com>
Date: Tue, 12 Nov 2024 17:26:16 +0100
Subject: [PATCH 12/12] Add multi-inputs tool

The 'multi-inputs' option will list all <target> + <inputs> for
the given targets.

Run:
ninja -t multi-inputs <target1> <target2> <target3>

Ninja will then output:
<target1> <input_x>
<target1> <input_y>
<target2> <input_x>
<target2> <input_z>
<target3> <input_y>
---
 doc/manual.asciidoc | 41 ++++++++++++++++++++++++++
 misc/output_test.py | 34 ++++++++++++++++++++++
 src/ninja.cc        | 71 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+)

diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc
index e4b05eca2f..1e9ede9891 100644
--- a/doc/manual.asciidoc
+++ b/doc/manual.asciidoc
@@ -265,6 +265,47 @@ output files are out of date.
 rebuild those targets.
 _Available since Ninja 1.11._
 
+`multi-inputs`:: print one or more sets of inputs required to build targets.
+Each line will consist of a target, a delimiter, an input and a terminator character.
+The list produced by the tool can be helpful if one would like to know which targets
+that are affected by a certain input.
++
+The output will be a series of lines with the following elements:
++
+----
+<target> <delimiter> <input> <terminator>
+----
++
+The default `<delimiter>` is a single TAB character.
+The delimiter can be modified to any string using the `--delimiter` argument.
++
+The default `<terminator>` is a line terminator (i.e. `\n` on Posix and `\r\n` on Windows).
+The terminator can be changed to `\0` by using the `--print0` argument.
++
+----
+----
++
+Example usage of the `multi-inputs` tool:
++
+----
+ninja -t multi-inputs target1 target2 target3
+----
++
+Example of produced output from the `multi-inputs` tool:
++
+----
+target1 file1.c
+target2 file1.c
+target2 file2.c
+target3 file1.c
+target3 file2.c
+target3 file3.c
+----
++
+_Note that a given input may appear for several targets if it is used by more
+than one targets._
+_Available since Ninja 1.13._
+
 `clean`:: remove built files. By default, it removes all built files
 except for those created by the generator.  Adding the `-g` flag also
 removes built files created by the generator (see <<ref_rule,the rule
diff --git a/misc/output_test.py b/misc/output_test.py
index b9ded383f1..9691e68245 100755
--- a/misc/output_test.py
+++ b/misc/output_test.py
@@ -449,6 +449,40 @@ def test_tool_compdb_targets(self) -> None:
             self.assertEqual(expected, actual)
 
 
+    def test_tool_multi_inputs(self) -> None:
+        plan = '''
+rule cat
+  command = cat $in $out
+build out1 : cat in1
+build out2 : cat in1 in2
+build out3 : cat in1 in2 in3
+'''
+        self.assertEqual(run(plan, flags='-t multi-inputs out1'),
+'''out1<TAB>in1
+'''.replace("<TAB>", "\t"))
+
+        self.assertEqual(run(plan, flags='-t multi-inputs out1 out2 out3'),
+'''out1<TAB>in1
+out2<TAB>in1
+out2<TAB>in2
+out3<TAB>in1
+out3<TAB>in2
+out3<TAB>in3
+'''.replace("<TAB>", "\t"))
+
+        self.assertEqual(run(plan, flags='-t multi-inputs -d: out1'),
+'''out1:in1
+''')
+
+        self.assertEqual(
+          run(
+            plan,
+            flags='-t multi-inputs -d, --print0 out1 out2'
+          ),
+          '''out1,in1\0out2,in1\0out2,in2\0'''
+        )
+
+
     def test_explain_output(self):
         b = BuildDir('''\
             build .FORCE: phony
diff --git a/src/ninja.cc b/src/ninja.cc
index 93c0ca6a2a..bf8c3f60c0 100644
--- a/src/ninja.cc
+++ b/src/ninja.cc
@@ -130,6 +130,7 @@ struct NinjaMain : public BuildLogUser {
   int ToolTargets(const Options* options, int argc, char* argv[]);
   int ToolCommands(const Options* options, int argc, char* argv[]);
   int ToolInputs(const Options* options, int argc, char* argv[]);
+  int ToolMultiInputs(const Options* options, int argc, char* argv[]);
   int ToolClean(const Options* options, int argc, char* argv[]);
   int ToolCleanDead(const Options* options, int argc, char* argv[]);
   int ToolCompilationDatabase(const Options* options, int argc, char* argv[]);
@@ -845,6 +846,74 @@ int NinjaMain::ToolInputs(const Options* options, int argc, char* argv[]) {
   return 0;
 }
 
+int NinjaMain::ToolMultiInputs(const Options* options, int argc, char* argv[]) {
+  // The inputs tool uses getopt, and expects argv[0] to contain the name of
+  // the tool, i.e. "inputs".
+  argc++;
+  argv--;
+
+  optind = 1;
+  int opt;
+  char terminator = '\n';
+  const char* delimiter = "\t";
+  const option kLongOptions[] = { { "help", no_argument, NULL, 'h' },
+                                  { "delimiter", required_argument, NULL,
+                                    'd' },
+                                  { "print0", no_argument, NULL, '0' },
+                                  { NULL, 0, NULL, 0 } };
+  while ((opt = getopt_long(argc, argv, "d:h0", kLongOptions, NULL)) != -1) {
+    switch (opt) {
+    case 'd':
+      delimiter = optarg;
+      break;
+    case '0':
+      terminator = '\0';
+      break;
+    case 'h':
+    default:
+      // clang-format off
+      printf(
+"Usage '-t multi-inputs [options] [targets]\n"
+"\n"
+"Print one or more sets of inputs required to build targets, sorted in dependency order.\n"
+"The tool works like inputs tool but with addition of the target for each line.\n"
+"The output will be a series of lines with the following elements:\n"
+"<target> <delimiter> <input> <terminator>\n"
+"Note that a given input may appear for several targets if it is used by more than one targets.\n"
+"Options:\n"
+"  -h, --help                   Print this message.\n"
+"  -d  --delimiter=DELIM        Use DELIM instead of TAB for field delimiter.\n"
+"  -0, --print0                 Use \\0, instead of \\n as a line terminator.\n"
+      );
+      // clang-format on
+      return 1;
+    }
+  }
+  argv += optind;
+  argc -= optind;
+
+  std::vector<Node*> nodes;
+  std::string err;
+  if (!CollectTargetsFromArgs(argc, argv, &nodes, &err)) {
+    Error("%s", err.c_str());
+    return 1;
+  }
+
+  for (const Node* node : nodes) {
+    InputsCollector collector;
+
+    collector.VisitNode(node);
+    std::vector<std::string> inputs = collector.GetInputsAsStrings();
+
+    for (const std::string& input : inputs) {
+      printf("%s%s%s", node->path().c_str(), delimiter, input.c_str());
+      fputc(terminator, stdout);
+    }
+  }
+
+  return 0;
+}
+
 int NinjaMain::ToolClean(const Options* options, int argc, char* argv[]) {
   // The clean tool uses getopt, and expects argv[0] to contain the name of
   // the tool, i.e. "clean".
@@ -1234,6 +1303,8 @@ const Tool* ChooseTool(const string& tool_name) {
       Tool::RUN_AFTER_LOAD, &NinjaMain::ToolCommands },
     { "inputs", "list all inputs required to rebuild given targets",
       Tool::RUN_AFTER_LOAD, &NinjaMain::ToolInputs},
+    { "multi-inputs", "print one or more sets of inputs required to build targets",
+      Tool::RUN_AFTER_LOAD, &NinjaMain::ToolMultiInputs},
     { "deps", "show dependencies stored in the deps log",
       Tool::RUN_AFTER_LOGS, &NinjaMain::ToolDeps },
     { "missingdeps", "check deps log dependencies on generated files",