diff options
author | Graham Christensen <graham@grahamc.com> | 2019-03-05 16:37:49 -0500 |
---|---|---|
committer | Graham Christensen <graham@grahamc.com> | 2019-03-05 16:37:52 -0500 |
commit | 09362bc3e88f7e78953bcd312b6bd86191ce4d65 (patch) | |
tree | 976a8acb6cbc079a2441bec70a50f71c77b3a55c /pkgs/build-support/references-by-popularity | |
parent | 54826e7471fb5c8ccfff47fd0389f7a89ec3b67d (diff) | |
download | nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.gz nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.bz2 nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.lz nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.xz nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.tar.zst nixpkgs-09362bc3e88f7e78953bcd312b6bd86191ce4d65.zip |
references-by-popularity: cache computation to avoid memory bloat
On very large graphs (14k+ paths), we'd end up with a massive in memory tree of mostly duplication. We can safely cache trees and point back to them later, saving memory.
Diffstat (limited to 'pkgs/build-support/references-by-popularity')
-rw-r--r-- | pkgs/build-support/references-by-popularity/closure-graph.py | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/pkgs/build-support/references-by-popularity/closure-graph.py b/pkgs/build-support/references-by-popularity/closure-graph.py index 145ba1bc9b3..579f3b041fa 100644 --- a/pkgs/build-support/references-by-popularity/closure-graph.py +++ b/pkgs/build-support/references-by-popularity/closure-graph.py @@ -338,11 +338,23 @@ class TestMakeLookup(unittest.TestCase): # /nix/store/tux: {} # } # } +subgraphs_cache = {} def make_graph_segment_from_root(root, lookup): + global subgraphs_cache children = {} for ref in lookup[root]: - debug("Making graph segments on {}".format(ref)) - children[ref] = make_graph_segment_from_root(ref, lookup) + # make_graph_segment_from_root is a pure function, and will + # always return the same result based on a given input. Thus, + # cache computation. + # + # Python's assignment will use a pointer, preventing memory + # bloat for large graphs. + if ref not in subgraphs_cache: + debug("Subgraph Cache miss on {}".format(ref)) + subgraphs_cache[ref] = make_graph_segment_from_root(ref, lookup) + else: + debug("Subgraph Cache hit on {}".format(ref)) + children[ref] = subgraphs_cache[ref] return children class TestMakeGraphSegmentFromRoot(unittest.TestCase): @@ -393,13 +405,27 @@ class TestMakeGraphSegmentFromRoot(unittest.TestCase): # /nix/store/baz: 4 # /nix/store/tux: 6 # ] +popularity_cache = {} def graph_popularity_contest(full_graph): + global popularity_cache popularity = defaultdict(int) for path, subgraph in full_graph.items(): - debug("Calculating popularity under {}".format(path)) popularity[path] += 1 - subcontest = graph_popularity_contest(subgraph) + # graph_popularity_contest is a pure function, and will + # always return the same result based on a given input. Thus, + # cache computation. + # + # Python's assignment will use a pointer, preventing memory + # bloat for large graphs. + if path not in popularity_cache: + debug("Popularity Cache miss on {}", path) + popularity_cache[path] = graph_popularity_contest(subgraph) + else: + debug("Popularity Cache hit on {}", path) + + subcontest = popularity_cache[path] for subpath, subpopularity in subcontest.items(): + debug("Calculating popularity for {}", subpath) popularity[subpath] += subpopularity + 1 return popularity |