references-by-popularity: cache computation to avoid memory bloat

On very large graphs (14k+ paths), we'd end up with a massive in memory tree of mostly duplication. We can safely cache trees and point back to them later, saving memory.
author: Graham Christensen <graham@grahamc.com> 2019-03-05 16:37:49 -0500
committer: Graham Christensen <graham@grahamc.com> 2019-03-05 16:37:52 -0500
commit: 09362bc3e88f7e78953bcd312b6bd86191ce4d65 (patch)
tree: 976a8acb6cbc079a2441bec70a50f71c77b3a55c /pkgs/build-support/references-by-popularity
parent: 54826e7471fb5c8ccfff47fd0389f7a89ec3b67d (diff)
download: nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar
nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.gz
nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.bz2
nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.lz
nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.xz
nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.zst
nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.zip
1 files changed, 30 insertions, 4 deletions
diff --git a/pkgs/build-support/references-by-popularity/closure-graph.py b/pkgs/build-support/references-by-popularity/closure-graph.py
index 145ba1bc9b3..579f3b041fa 100644
--- a/pkgs/build-support/references-by-popularity/closure-graph.py
+++ b/pkgs/build-support/references-by-popularity/closure-graph.py
@@ -338,11 +338,23 @@ class TestMakeLookup(unittest.TestCase):
 #                   /nix/store/tux: {}
 #   }
 # }
+subgraphs_cache = {}
 def make_graph_segment_from_root(root, lookup):
+    global subgraphs_cache
     children = {}
     for ref in lookup[root]:
-        debug("Making graph segments on {}".format(ref))
-        children[ref] = make_graph_segment_from_root(ref, lookup)
+        # make_graph_segment_from_root is a pure function, and will
+        # always return the same result based on a given input. Thus,
+        # cache computation.
+        #
+        # Python's assignment will use a pointer, preventing memory
+        # bloat for large graphs.
+        if ref not in subgraphs_cache:
+            debug("Subgraph Cache miss on {}".format(ref))
+            subgraphs_cache[ref] = make_graph_segment_from_root(ref, lookup)
+        else:
+            debug("Subgraph Cache hit on {}".format(ref))
+        children[ref] = subgraphs_cache[ref]
     return children
 
 class TestMakeGraphSegmentFromRoot(unittest.TestCase):
@@ -393,13 +405,27 @@ class TestMakeGraphSegmentFromRoot(unittest.TestCase):
 #   /nix/store/baz: 4
 #   /nix/store/tux: 6
 # ]
+popularity_cache = {}
 def graph_popularity_contest(full_graph):
+    global popularity_cache
     popularity = defaultdict(int)
     for path, subgraph in full_graph.items():
-        debug("Calculating popularity under {}".format(path))
         popularity[path] += 1
-        subcontest = graph_popularity_contest(subgraph)
+        # graph_popularity_contest is a pure function, and will
+        # always return the same result based on a given input. Thus,
+        # cache computation.
+        #
+        # Python's assignment will use a pointer, preventing memory
+        # bloat for large graphs.
+        if path not in popularity_cache:
+            debug("Popularity Cache miss on {}", path)
+            popularity_cache[path] = graph_popularity_contest(subgraph)
+        else:
+            debug("Popularity Cache hit on {}", path)
+
+        subcontest = popularity_cache[path]
         for subpath, subpopularity in subcontest.items():
+            debug("Calculating popularity for {}", subpath)
             popularity[subpath] += subpopularity + 1
 
     return popularity
author	Graham Christensen <graham@grahamc.com>	2019-03-05 16:37:49 -0500
committer	Graham Christensen <graham@grahamc.com>	2019-03-05 16:37:52 -0500
commit	09362bc3e88f7e78953bcd312b6bd86191ce4d65 (patch)
tree	976a8acb6cbc079a2441bec70a50f71c77b3a55c /pkgs/build-support/references-by-popularity
parent	54826e7471fb5c8ccfff47fd0389f7a89ec3b67d (diff)
download	nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.gz nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.bz2 nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.lz nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.xz nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.zst nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.zip