Source code for mbank.placement

"""
mbank.placement
===============

A lot of placement methods, both for the tiling and the normalizing flow. Most of the methods are deprecated and are not guaranteed to work properly. The functions :func:`place_random_flow` are :func:`place_stochastically_flow` are guaranteed to work.
"""

import numpy as np
import scipy
from scipy.spatial import ConvexHull, Rectangle
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import itertools
import warnings
import sys

from .utils import dummy_iterator, avg_dist

#############DEBUG LINE PROFILING
try:
	from line_profiler import LineProfiler

	def do_profile(follow=[]):
		def inner(func):
			def profiled_func(*args, **kwargs):
				try:
					profiler = LineProfiler()
					profiler.add_function(func)
					for f in follow:
						profiler.add_function(f)
					profiler.enable_by_count()
					return func(*args, **kwargs)
				finally:
					profiler.print_stats()
			return profiled_func
		return inner
except:
	pass

[docs]def place_stochastically_flow(minimum_match, flow, metric_obj, boundaries_checker = None, empty_iterations = 200, seed_bank = None, dry_run = False, verbose = True): """ Place templates with a stochastic placing algorithm. It iteratively proposes a new template to add to the bank. The proposal is accepted if the match of the proposal with the previously placed templates is smaller than ``minimum_match``. The iteration goes on until no template is found to have a distance smaller than the given threshold ``minimum_match``. It can start from a given set of templates. The match of a proposal is computed against all the templats that have been added. Parameters ---------- minimum_match: float Minimum match between templates. flow: flow.GW_flow Normalizing flow to sample template from metric_obj: handlers.cbc_metric A metric object to evaluate the metric at the livepoints boundaries_checker: callable A boolean function to check whether each input point is inside the boundaries. If `None`, no boundaries control will be applied. A common usage would be: .. code-block:: python from mbank.parser import boundary_keeper bk = boundary_keeper(args) def boundaries_checker(theta): return bk(theta, args.variable_format) empty_iterations: int Number of consecutive templates that are not accepted before the placing algorithm is terminated seed_bank: :class:`~numpy:numpy.ndarray` shape: (N,D) - A set of templates that provides a first guess for the bank dry_run: bool Whether to run the placement without actually storing the templates. It is useful for the purpose of bank size measurement verbose: bool Whether to print the progress bar Returns ------- new_templates: :class:`~numpy:numpy.ndarray` shape: (N,D) - A set of templates generated by the stochastic placing algorithm """ MM = minimum_match if boundaries_checker is None: boundaries_checker = lambda x: np.full(True, len(x)) new_templates = flow.sample_within_boundaries(1, boundaries_checker)[0].numpy() if seed_bank is None else np.asarray(seed_bank) nothing_new, i, max_nothing_new = 0, 0, 0 proposal_list, log_pdf_theta_list = [], [] it = tqdm(dummy_iterator(), disable = not verbose) try: for _ in it: if verbose and i%100==0: it.set_description("Templates added {} ({}/{} empty iterations)".format(new_templates.shape[0], int(max_nothing_new), int(empty_iterations))) if nothing_new >= empty_iterations: break if len(proposal_list)==0: with torch.no_grad(): proposal_list, _ = flow.sample_within_boundaries(1000, boundaries_checker) proposal_list = list(proposal_list.numpy()) proposal = proposal_list.pop(0)[None,:] #FIXME: This part here is too expensive... # Maybe you can get an average metric (just with the relative magnitude of the eigenvalues) and # assign the determinant with the flow log_pdf. That would be crazy fast... metric = metric_obj.get_metric(proposal, metric_type = 'symphony') diff = new_templates - proposal #(N_templates, D) max_match = np.max(1 - np.sum(np.multiply(diff, np.matmul(diff, metric)), axis = -1)) #max_match = np.exp(-(1-max_match)) #do we need this? if (max_match < MM): new_templates = np.concatenate([new_templates, proposal], axis =0) nothing_new = 0 else: nothing_new += 1 max_nothing_new = max(max_nothing_new, nothing_new) i+=1 except KeyboardInterrupt: pass del proposal_list if dry_run: return len(new_templates) return new_templates
[docs]def place_geometric_flow(minimum_match, flow, metric_obj, n_livepoints, boundaries_checker = None, covering_fraction = 0.9, qmc = True, dry_run = False, verbose = True): """ It computes the number of templates using the random algorithm :func:`place_random_flow`. It then places templates on a grid in the gaussian space or with quasi-Monte Carlo sampling and transforms it back to the normal space Parameters ---------- minimum_match: float Minimum match between templates. flow: flow.GW_flow Normalizing flow to sample template from metric_obj: handlers.cbc_metric A metric object to evaluate the metric at the livepoints n_livepoints: int Number of livepoints to compute the covering fraction with boundaries_checker: callable A boolean function to check whether each input point is inside the boundaries. If `None`, no boundaries control will be applied. A common usage would be: .. code-block:: python from mbank.parser import boundary_keeper bk = boundary_keeper(args) def boundaries_checker(theta): return bk(theta, args.variable_format) covering_fraction: float Fraction of livepoints to be killed before terminating the loop qmc: bool Whether to sample the templates in the latent space with a quasi-MC algorithm. If False, templates will lie in a grid dry_run: bool Whether to run the placement without actually storing the templates. It is useful for the purpose of bank size measurement verbose: bool Whether to display the progress bar Returns ------- new_templates: :class:`~numpy:numpy.ndarray`, int shape: (N,D) - A set of templates generated by the placing algorithm. If ``dry_run`` is set, only the number of templates will be returned """ #Computing the number of templates to place n_templates = place_random_flow(minimum_match, flow, metric_obj, n_livepoints, boundaries_checker, covering_fraction, dry_run = True, verbose = verbose) #TODO: estimate the fraction of the flow support covered by the region of interest. This will help you to decide how many templates to place in the grid D = flow.D if qmc: qmc_sampler = scipy.stats.qmc.MultivariateNormalQMC([0]*D) #print(2**int(np.round(np.log2(n_templates))), 2**int(np.log2(n_templates)), n_templates) n_templates = 2**int(np.log2(n_templates)+0.4) grid = torch.tensor(qmc_sampler.random(n_templates).astype(np.float32)) else: N_points = [int(n_templates**(1/D))+1] for d in range(D-1, 0, -1): #N_points.append(max(int((n_templates/np.prod(N_points))**(1/d)),1)) # This will always underestimate the number of templates N_points.append(int((n_templates/np.prod(N_points))**(1/d))+1) # This will always overestimate the number of templates print(N_points, np.prod(N_points), n_templates) grid_1d = [scipy.special.erfinv(2*np.linspace(0,1, n, endpoint = True)[1:-1] - 1) for n in N_points] grid = [] for p in itertools.product(*grid_1d): grid.append(p) grid = torch.tensor(np.array(grid).astype(np.float32)) with torch.no_grad(): new_templates, _ = flow._transform.inverse(grid) new_templates = new_templates.numpy() new_templates = new_templates[boundaries_checker(new_templates)] if dry_run: return len(new_templates) return new_templates
#@do_profile(follow=[])
[docs]def place_random_flow(minimum_match, flow, metric_obj, n_livepoints, boundaries_checker = None, covering_fraction = 0.9, dry_run = False, importance_sampling = False, metric_type = 'symphony', verbose = True): """ Draw templates from the flow. For each proposal, all the livepoints in the ellipse of constant ``minimum_match`` are killed. The iteration goes on until a fraction of ``covering_fraction`` of the space is covered. It follows `2202.09380 <https://arxiv.org/abs/2202.09380>`_ Parameters ---------- minimum_match: float Minimum match between templates. flow: flow.GW_flow Normalizing flow to sample template from metric_obj: handlers.cbc_metric A metric object to evaluate the metric at the livepoints n_livepoints: int Number of livepoints to compute the covering fraction with boundaries_checker: callable A boolean function to check whether each input point is inside the boundaries. If `None`, no boundaries control will be applied. A common usage would be: .. code-block:: python from mbank.parser import boundary_keeper bk = boundary_keeper(args) def boundaries_checker(theta): return bk(theta, args.variable_format) covering_fraction: float Fraction of livepoints to be killed before terminating the loop dry_run: bool Whether to run the placement without actually storing the templates. It is useful for the purpose of bank size measurement importance_sampling: bool Whether to compute the covering fraction using importance sampling. Importance sampling gives a more accurate estimation of the covering fraction but at the cost of a higher variance in the number of templates. metric_type: str The method to compute the metric. Default 'symphony' verbose: bool Whether to display the progress bar Returns ------- new_templates: :class:`~numpy:numpy.ndarray`, int shape: (N,D) - A set of templates generated by the placing algorithm. If ``dry_run`` is set, only the number of templates will be returned """ assert 0<covering_fraction <=1., "The covering_fraction should be a fraction in (0,1]" dtype = np.float32 if boundaries_checker is None: boundaries_checker = lambda x: np.full(True, len(x)) elif isinstance(boundaries_checker, np.ndarray): assert boundaries_checker.shape[0] == 2, "Wrong shape for the given boundary array!" boundaries = np.array(boundaries_checker) def boundaries_checker(theta): theta = np.atleast_2d(theta) return np.logical_and(np.all(theta > boundaries[0,:], axis =1), np.all(theta < boundaries[1,:], axis = 1)) #Sampling livepoints livepoints, log_pdf = flow.sample_within_boundaries(n_livepoints, boundaries_checker) livepoints, log_pdf = livepoints.numpy(), log_pdf.numpy() assert sum(boundaries_checker(livepoints))==len(livepoints), "Something messed up with sample_within_boundaries" #Computing the metric for all the livepoints metric, p = [], [] ids_to_remove = [] #Keeps the ids of the livepoints for which the metric computation fails for i, (l, l_pdf) in enumerate(zip(tqdm(livepoints, desc = 'Computing the metric for each livepoint', disable = not verbose), log_pdf)): metric_ = metric_obj.get_metric(l, metric_type = metric_type) if np.linalg.det(metric_)<0: ids_to_remove.append(i) continue l_pdf_true = np.log(np.abs(np.linalg.det(metric_)))*0.5 metric.append(metric_) if importance_sampling: p_ = l_pdf_true-l_pdf-flow.constant.item() if p_>np.log(10): warnings.warn("One (or more) importance sampling weight exceeded a safe threshold and were reularized. Please, check whether the normalizing flow is accurate enough") #if verbose: print('\tRegularized livepoint: theta | weight ', l, p_) p_ = min(p_, np.log(10)) #FIXME: regularization: how to tune this number? p.append(np.exp(p_)) if ids_to_remove: livepoints = np.delete(livepoints, ids_to_remove, axis = 0) metric_cholesky = np.linalg.cholesky(metric).astype(dtype) #(N, D,D) if importance_sampling: if verbose: print('Importance sampling weights (not normalized): max | min ', np.max(p), np.min(p)) print('\tPercentiles: [99, 90, 50, 10, 1]', np.percentile(p, [99, 90, 50, 10, 1])) p = np.array(p)/np.sum(p) if verbose: print('Importance sampling weights: max | min | equal w ', np.max(p), np.min(p), 1/len(livepoints)) print('\tPercentiles: [99, 90, 50, 10, 1]', np.percentile(p, [99, 90, 50, 10, 1])) new_templates = [] N_tmplts = 0 actual_covering_fraction = 0 #if dry_run: # from .utils import plot_tiles_templates # plot_tiles_templates(livepoints[:,:4], 'logMq_s1xz', injections = livepoints[:,:4], inj_cmap = p, show = True) bar_str = '{} templates placed ({} % space covered)' it = tqdm(dummy_iterator(), desc = bar_str.format(0, 0), disable = not verbose, leave = True) for _ in it: if actual_covering_fraction>=covering_fraction: break #Generating proposals with torch.no_grad(): proposals = flow.sample(3000).numpy() proposals = proposals[boundaries_checker(proposals)] if len(proposals) ==0: continue ids_kill = [] for i, (l, L_t) in enumerate(zip(livepoints, metric_cholesky)): diff = proposals - l #(N,D) diff_prime = scipy.linalg.blas.sgemm(1, diff, L_t) dist_sq = np.sum(np.square(diff_prime), axis =1) #(N,) #This is the bottleneck of the computation, as it should be if np.any(dist_sq < 1- minimum_match): ids_kill.append(i) if importance_sampling: actual_covering_fraction += np.sum(p[ids_kill]) #This computes covering fraction w importance sampling p = np.delete(p, ids_kill, axis = 0) else: actual_covering_fraction += len(ids_kill)/n_livepoints #This computes covering fraction w/o importance sampling livepoints = np.delete(livepoints, ids_kill, axis = 0) metric_cholesky = np.delete(metric_cholesky, ids_kill, axis = 0) if not dry_run: new_templates.append(np.array(proposals, dtype = np.float64)) N_tmplts += len(proposals) if verbose: it.set_description(bar_str.format(N_tmplts, round(100*actual_covering_fraction, 1))) #TODO: return the livepoints still alive: they give interesting info about where the bank sucks... if dry_run: #from .utils import plot_tiles_templates #plot_tiles_templates(livepoints[:,:4], 'logMq_s1xz', injections = livepoints[:,:4], inj_cmap = p, show = True) return N_tmplts new_templates = np.concatenate(new_templates, axis = 0) return new_templates
[docs]def place_stochastically_in_tile(minimum_match, tile): """ Place templates with a stochastic placing algorithm withing a given tile, by iteratively proposing a new template to add to the bank inside the given tile. The proposal is accepted if the match of the proposal with the previously placed templates is smaller than ``minimum_match``. The iteration goes on until no template is found to have a distance smaller than the given threshold ``minimum_match``. Parameters ---------- minimum_match: float Minimum match between templates. tile: tuple An element of the ``tiling_handler`` object. It consists of a tuple ``(scipy.spatial.Rectangle, np.ndarray)`` Returns ------- new_templates: :class:`~numpy:numpy.ndarray` shape: (N,D) - A set of templates generated by the stochastic placing algorithm within the given tile """ dist_sq = 1-minimum_match #initial template new_templates = np.random.uniform(tile.rectangle.mins, tile.rectangle.maxes, (1, tile.D)) #(1,D) nothing_new = 0 while nothing_new < 300: proposal = np.random.uniform(tile.rectangle.mins, tile.rectangle.maxes, tile.D) #(D,) diff = new_templates - proposal min_dist = np.min(np.sum(np.multiply(diff, np.matmul(diff, tile.metric)), axis = -1)) if min_dist > dist_sq: new_templates = np.concatenate([new_templates, proposal[None,:]], axis = 0) nothing_new = 0 else: nothing_new += 1 return new_templates
#@do_profile(follow=[])
[docs]def place_stochastically(minimum_match, tiling, empty_iterations = 200, seed_bank = None, verbose = True): """ Place templates with a stochastic placing algorithm. It iteratively proposes a new template to add to the bank. The proposal is accepted if the match of the proposal with the previously placed templates is smaller than ``minimum_match``. The iteration goes on until no template is found to have a distance smaller than the given threshold ``minimum_match``. It can start from a given set of templates. The match of a proposal is computed against all the templats that have been added. Parameters ---------- minimum_match: float Minimum match between templates. tiling: tiling_handler A tiling object to compute the match with empty_iterations: int Number of consecutive templates that are not accepted before the placing algorithm is terminated seed_bank: :class:`~numpy:numpy.ndarray` shape: (N,D) - A set of templates that provides a first guess for the bank verbose: bool Whether to print the progress bar Returns ------- new_templates: :class:`~numpy:numpy.ndarray` shape: (N,D) - A set of templates generated by the stochastic placing algorithm """ #User communication stuff t_ = tqdm(dummy_iterator()) if verbose else dummy_iterator() MM = minimum_match if seed_bank is None: ran_id_ = np.random.choice(len(tiling)) new_templates = np.random.uniform(tiling[ran_id_][0].mins, tiling[ran_id_][0].maxes, (1, len(tiling[ran_id_][0].maxes))) else: new_templates = np.asarray(seed_bank) nothing_new, i, max_nothing_new = 0, 0, 0 #optimized version of the above... (not really helpful) if tiling.flow: with torch.no_grad(): log_pdf_centers = tiling.flow.log_prob(tiling.get_centers().astype(np.float32)).numpy() proposal_list, log_pdf_theta_list = [], [] try: for _ in t_: if verbose and i%100==0:t_.set_description("Templates added {} ({}/{} empty iterations)".format(new_templates.shape[0], int(max_nothing_new), int(empty_iterations))) if nothing_new >= empty_iterations: break if tiling.flow: #Unoptimized version - you need to make things in batches! #proposal = tiling.sample_from_flow(1) #metric = tiling.get_metric(proposal, flow = True) #using the flow if it is trained #optimized version of the above... (not really helpful) with torch.no_grad(): if len(proposal_list)==0: proposal_list, log_pdf_theta_list = tiling.flow.sample_and_log_prob(1000) proposal_list, log_pdf_theta_list = list(proposal_list.numpy()), list(log_pdf_theta_list.numpy()) proposal, log_pdf_theta = proposal_list.pop(0)[None,:], log_pdf_theta_list.pop(0) #checking if the proposal is inside the tiling if not tiling.is_inside(proposal)[0]: continue #proposal, log_pdf_theta = tiling.flow.sample_and_log_prob(1) #proposal = proposal.numpy() #FIXME: this kdtree may mess things up id_ = tiling.get_tile(proposal, kdtree = True)[0] metric = tiling[id_].metric factor = (2/metric.shape[0])*(log_pdf_theta-log_pdf_centers[id_]) factor = np.exp(factor) metric = (metric.T*factor).T else: #FIXME: this thing is fucking slooooow! Maybe you should do a fancy buffer to parallelize this? proposal, tile_id = tiling.sample_from_tiling(1, tile_id = True) metric = tiling[tile_id[0]].metric diff = new_templates - proposal #(N_templates, D) max_match = np.max(1 - np.sum(np.multiply(diff, np.matmul(diff, metric)), axis = -1)) #max_match = np.exp(-(1-max_match)) #do we need this? if (max_match < MM): new_templates = np.concatenate([new_templates, proposal], axis =0) nothing_new = 0 else: nothing_new += 1 max_nothing_new = max(max_nothing_new, nothing_new) i+=1 except KeyboardInterrupt: pass if tiling.flow: del proposal_list, log_pdf_theta_list return new_templates
[docs]def place_iterative(match, t): """ Given a tile, it returns the templates within the tile obtained by iterative splitting. Parameters ---------- match: float Match defining the template volume t: tile The tile to cover with templates Returns ------- new_templates: :class:`~numpy:numpy.ndarray` Array with the generated templates """ dist = avg_dist(match, t.D) is_ok = lambda tile_: tile_.N_templates(dist)<=1 template_list = [(t, is_ok(t))] while True: if np.all([b for _, b in template_list]): break for t_ in template_list: if t_[1]: continue t_left, t_right = t_[0].split(None,2) extended_list = [(t_left, is_ok(t_left)), (t_right, is_ok(t_right))] template_list.remove(t_) template_list.extend(extended_list) new_templates = np.array([t_.center for t_, _ in template_list]) return new_templates
#@do_profile(follow = [])
[docs]def place_random_tiling(minimum_match, tiling, N_livepoints, covering_fraction = 0.9, verbose = True): """ Draw templates from the uniform distribution on the manifold. For each proposal, all the livepoints in the ellipse of constant ``minimum_match`` are killed. The iteration goes on until a fraction of ``covering_fraction`` of livepoints are alive. It follows `2202.09380 <https://arxiv.org/abs/2202.09380>`_ Parameters ---------- minimum_match: float Minimum match between templates. tiling: tiling_handler Tiling handler that tiles the parameter space N_livepoints: int Number of livepoints to cover the space with covering_fraction: float Fraction of livepoints to be killed before terminating the loop verbose: bool Whether to display the progress bar Returns ------- new_templates: :class:`~numpy:numpy.ndarray` shape: (N,D) - A set of templates generated by the placing algorithm """ assert 0<covering_fraction <=1., "The covering_fraction should be a fraction in (0,1]" MM = minimum_match dtype = np.float32 livepoints = tiling.sample_from_tiling(N_livepoints, dtype = dtype, tile_id = False, p_equal = False) new_templates = [] if tiling.flow: with torch.no_grad(): log_pdf_centers = tiling.flow.log_prob(tiling.get_centers().astype(np.float32)).numpy() proposal_list, proposal_ids_, log_pdf_theta_list = [], [], [] bar_str = '{} templates placed ({} % livepoints alive)' if verbose: it = tqdm(dummy_iterator(), desc = bar_str.format(len(new_templates), 100), leave = True) else: it = dummy_iterator() for _ in it: if len(livepoints)<N_livepoints*(1-covering_fraction): break #Generating proposals if tiling.flow: with torch.no_grad(): if len(proposal_list)==0: proposal_list, log_pdf_theta_list = tiling.flow.sample_and_log_prob(1000) proposal_list, log_pdf_theta_list = list(proposal_list.numpy()), list(log_pdf_theta_list.numpy()) #FIXME: this thing is shit!! You should code all these operations inside the tiling!! proposal, log_pdf_theta = proposal_list.pop(0), log_pdf_theta_list.pop(0) #checking if the proposal is inside the tiling if not tiling.is_inside(proposal)[0]: continue id_ = tiling.get_tile(proposal, kdtree = True)[0] metric = tiling[id_].metric factor = (2/metric.shape[0])*(log_pdf_theta-log_pdf_centers[id_]) factor = np.exp(factor) metric = (metric.T*factor).T else: if len(proposal_list)==0: proposal_list, proposal_ids_ = tiling.sample_from_tiling(1000, dtype = dtype, tile_id = True, p_equal = False) proposal_list = list(proposal_list) proposal_ids_ = list(proposal_ids_) proposal, id_ = proposal_list.pop(0), proposal_ids_.pop(0) metric = tiling[id_].metric diff = livepoints - proposal #(N,D) L_t = np.linalg.cholesky(metric).astype(dtype) #(D,D) diff_prime = scipy.linalg.blas.sgemm(1, diff, L_t) dist = np.sum(np.square(diff_prime), axis =1) #(N,) #This is the bottleneck of the computation, as it should be ids_kill = np.where(dist < 1- MM)[0] livepoints = np.delete(livepoints, ids_kill, axis = 0) new_templates.append(np.array(proposal, dtype = np.float64)) if len(new_templates) %100 ==0 and verbose: it.set_description(bar_str.format(len(new_templates), round(100*len(livepoints)/N_livepoints, 1))) new_templates = np.array(new_templates) return new_templates
#@do_profile(follow=[])
[docs]def place_pruning(minimum_match, tiling, N_points, covering_fraction = 0.9, verbose = True): """ Given a tiling object, it covers the volume with points and covers them with templates. It uses a pruning method, where proposal are chosen from a large set of random points, called livepoints. The bank is created by selecting a proposal from the set of livepoints and removing (killing) the livepoints too close from the proposal. This methods effectively prunes the original set of livepoints, to remove the random points that are too close from each other. Parameters ---------- minimum_match: float Minimum match between templates. tiling: tiling_handler Tiling handler that tiles the parameter space N_points: int Number of livepoints to cover the space with covering_fraction: float Fraction of livepoints to be killed before terminating the loop verbose: bool Whether to display the progress bar Returns ------- new_templates: :class:`~numpy:numpy.ndarray` shape: (N,D) - A set of templates generated by the placing algorithm """ #TODO: maybe here you can use the tiling KDTree for saving some useless computations? #i.e. you should add a tiling label to all the generated livepoints and use it somehow... assert 0<covering_fraction <=1., "The covering_fraction should be a fraction in (0,1]" assert isinstance(N_points, int) and N_points>0, "N_points should be a positive integer" MM = minimum_match dtype = np.float32 #better to downcast to single precision! There is a mild speedup there #FIXME: add here the sampling from flow option! livepoints, id_tile_livepoints = tiling.sample_from_tiling(N_points, dtype = dtype, tile_id = True, p_equal = False) #(N_points, D) if False: #sorting livepoint by their metric determinant... _, vols = tiling.compute_volume() v_list_livepoints = [np.linalg.det(tiling[i].metric) for i in id_tile_livepoints] id_sort = np.argsort(v_list_livepoints)[::-1] else: #random shuffling id_sort = np.random.permutation(N_points) livepoints = livepoints[id_sort, :] id_tile_livepoints = id_tile_livepoints[id_sort] det_list = [t_.det for t_ in tiling] det_livepoints = np.array([det_list[id_] for id_ in id_tile_livepoints]) del det_list #ordering the tile by volume in ascending order... _, vols = tiling.compute_volume() new_templates = [] bar_str = 'Loops on tiles ({}/{} livepoints killed | {} templates placed)' if verbose: it = tqdm(dummy_iterator(), desc = bar_str.format(N_points -len(livepoints), N_points, len(new_templates)), leave = True) else: it = dummy_iterator() for _ in it: #choosing livepoints in whatever order they are set id_point = 0 #id_point = np.random.randint(len(livepoints)) point = livepoints[id_point,:] id_ = id_tile_livepoints[id_point] if tiling.flow: metric = tiling.get_metric(point, flow = True) #using the flow if it is trained else: metric = tiling[id_].metric diff = livepoints - point #(N,D) #TODO: you may insert here a distance cutoff (like 4 or 10 in coordinate distance...): this should account for the very very large unphysical tails of the metric!! #measuring metric match between livepoints and proposal #Doing things with cholesky is faster but requires non degenerate matrix L_t = np.linalg.cholesky(metric).astype(dtype) #(D,D) #BLAS seems to be faster for larger matrices but slower for smaller ones... #Maybe put a threshold on the number of livepoints? diff_prime = scipy.linalg.blas.sgemm(1, diff, L_t) dist = np.sum(np.square(diff_prime), axis =1) #(N,) #This is the bottleneck of the computation, as it should be #match = 1 - np.sum(np.multiply(diff, np.matmul(diff, metric)), axis = -1) #(N,) ids_kill = np.where(dist < 1- MM)[0] #This variant kind of works although the way to go (probably) is to use normalizing flows to interpolate the metric #scaled_dist = dist * np.power(det_livepoints/np.linalg.det(metric), 1/tiling[0].D) #ids_kill = np.where(np.logical_and(dist < 1- MM, scaled_dist < 1- MM) )[0] #This operation is very slow! But maybe there is nothing else to do... livepoints = np.delete(livepoints, ids_kill, axis = 0) id_tile_livepoints = np.delete(id_tile_livepoints, ids_kill, axis = 0) det_livepoints = np.delete(det_livepoints, ids_kill, axis = 0) #this is very very subtle: if you don't allocate new memory with np.array, you won't decrease the reference to livepoints, which won't be deallocated. This is real real bad!! new_templates.append(np.array(point, dtype = np.float64)) del point #communication and exit condition if len(livepoints)<=(1-covering_fraction)*N_points: break if len(new_templates) %100 ==0 and verbose: it.set_description(bar_str.format(N_points -len(livepoints), N_points, len(new_templates)) ) new_templates = np.column_stack([new_templates]) #if len(livepoints)>0: new_templates = np.concatenate([new_templates, livepoints], axis =0) #adding the remaining livepoints return new_templates
[docs]def get_cube_corners(boundaries): """ Given the boundaries of an hyper-rectangle, it computes all the corners of it Parameters ---------- boundaries: :class:`~numpy:numpy.ndarray` shape: (2,D) - An array with the boundaries for the model. Lower limit is boundaries[0,:] while upper limits is boundaries[1,:] Returns ------- corners: :class:`~numpy:numpy.ndarray` shape: (N,D) - An array with the corners. Each row is a different corner """ corners = np.meshgrid(*boundaries.T) corners = [c.flatten() for c in corners] corners = np.column_stack(corners) return corners
[docs]def create_mesh(dist, tile, coarse_boundaries = None): """ Creates a mesh of points on an hypercube, given a metric. The points are approximately equally spaced with a distance ``dist``. Parameters ---------- dist: float Distance between templates tile: tuple An element of the ``tiling_handler`` object. It consists of a tuple ``(scipy.spatial.Rectangle, np.ndarray)`` coarse_boundaries: :class:`~numpy:numpy.ndarray` shape: (2,D) - An array with the coarse boundaries of the tiling. If given, each tile is checked to belong to the border of the tiling. If it's the case, some templates are added to cover the boundaries Returns ------- mesh: :class:`~numpy:numpy.ndarray` shape: (N,D) - A mesh of N templates that cover the tile """ #dist: float #metric: (D,D) #boundaries (2,D) D = tile[0].maxes.shape[0] #bound_list keeps the dimension over which the tile is a boundary in the larger space if D < 2: coarse_boundaries = None if coarse_boundaries is not None: up_bound_list = np.where( np.isclose(tile[0].maxes, coarse_boundaries[1,:], 1e-4, 0) )[0].tolist() #axis where there is an up bound low_bound_list = np.where( np.isclose(tile[0].mins, coarse_boundaries[0,:], 1e-4, 0) )[0].tolist() bound_list = [ (1, up_) for up_ in up_bound_list] bound_list.extend( [ (0, low_) for low_ in low_bound_list]) else: bound_list = [] #Computing cholesky decomposition of the metric metric = tile[1] L = np.linalg.cholesky(metric).T L_inv = np.linalg.inv(L) #computing boundaries and boundaries_prime boundaries = np.stack([tile[0].mins, tile[0].maxes], axis = 0) corners = get_cube_corners(boundaries)#[0,:], boundaries[1,:]) corners_prime = np.matmul(corners, L.T) center = (tile[0].mins+tile[0].maxes)/2. #(D,) #center center_prime = np.matmul(L, center) #(D,) #center_prime #computing the extrema of the boundaries (rectangle) boundaries_prime = np.array([np.amin(corners_prime, axis =0), np.amax(corners_prime, axis =0)]) #creating a mesh in the primed coordinates (centered around center_prime) mesh_prime = [] where_random = [] #list to keep track of the dimensions where templates should be drawn at random! for d in range(D): min_d, max_d = boundaries_prime[:,d] N = max(int((max_d-min_d)/dist), 1) #this tends to overcover... #grid_d = [np.linspace(min_d, max_d, N+1, endpoint = False)[1:]] #this imposes a constant distance but may undercover grid_d = [np.arange(center_prime[d], min_d, -dist)[1:][::-1], np.arange(center_prime[d], max_d, dist)] grid_d = np.concatenate(grid_d) if len(grid_d) <=1 and d >1: where_random.append(d) mesh_prime.append(grid_d) #creating the mesh in the primed space and inverting mesh_prime = np.meshgrid(*mesh_prime) mesh_prime = [g.flatten() for g in mesh_prime] mesh_prime = np.column_stack(mesh_prime) #(N,D) mesh = np.matmul(mesh_prime, L_inv.T) #we don't check the boundaries for the axis that will be drawn at random axis_ok = [i for i in range(D) if i not in where_random] ids_ok = np.logical_and(np.all(mesh[:,axis_ok] >= boundaries[0,axis_ok], axis =1), np.all(mesh[:,axis_ok] <= boundaries[1,axis_ok], axis = 1)) #(N,) mesh = mesh[ids_ok,:] #Whenever there is a single point in the grid, the templates along that dimension will be placed at random for id_random in where_random: mesh[:,id_random] =np.random.uniform(boundaries[0,id_random], boundaries[1,id_random], (mesh.shape[0], )) # center[id_random] # #warnings.warn('Random extraction for "non-important" dimensions disabled!') return mesh #### #adding the boundaries #### #Boundaries are added by creating a mesh in the D-1 plane of the tile boundary boundary_mesh = [] #up_down keeps track whether we are at the min (0) or max (1) value along the d-th dimension for up_down, d in bound_list: ids_not_d = [d_ for d_ in range(D) if d_ is not d] new_dist = dist*np.sqrt(D/(D-1)) #this the distance between templates that must be achieved in the low dimensional manifold #creating the input for the boundary tiling rect_proj = Rectangle( tile[0].mins[ids_not_d], tile[0].maxes[ids_not_d]) #projected rectangle metric_proj = metric - np.outer(metric[:,d], metric[:,d]) /metric[d,d] metric_proj = metric_proj[tuple(np.meshgrid(ids_not_d,ids_not_d))].T #projected metric on the rectangle new_coarse_boundaries = np.stack([rect_proj.mins, rect_proj.maxes], axis =0) #(2,D) #new_coarse_boundaries = None new_mesh_ = create_mesh(new_dist, (rect_proj, metric_proj), new_coarse_boundaries) #(N,D-1) #mesh on the D-1 plane boundary_mesh_ = np.zeros((new_mesh_.shape[0], D)) boundary_mesh_[:,ids_not_d] = new_mesh_ boundary_mesh_[:,d] = boundaries[up_down,d] boundary_mesh.extend(boundary_mesh_) if len(boundary_mesh)>0: boundary_mesh = np.array(boundary_mesh) mesh = np.concatenate([mesh,boundary_mesh], axis =0) return mesh
########################################################################################### #All the garbage here should be removed!! def points_in_hull(points, hull, tolerance=1e-12): #"Check if points (N,D) are in the hull" if points.ndim == 1: points = points[None,:] value_list = [np.einsum('ij,j->i', points, eq[:-1])+eq[-1] for eq in hull.equations] value_list = np.array(value_list).T #(N, N_eqs) return np.prod(value_list<= tolerance, axis = 1).astype(bool) #(N,) def all_line_hull_intersection(v, c, hull): #"Compute all the intersection between N_lines and a single hull" if c.ndim == 1: c = np.repeat(c[None,:], v.shape[0], axis =0) eq=hull.equations.T n,b=eq[:-1],eq[-1] #(N_faces, D), (N_faces,) den = np.matmul(v,n)+1e-18 #(N_lines, N_faces) num = np.matmul(c,n) #(N_lines, N_faces) alpha= -(b +num )/den #(N_lines, N_faces) #v (N_lines, D) res = c[:,None,:] + np.einsum('ij,ik->ijk', alpha,v) #(N_lines, N_faces, D) return res.reshape((res.shape[0]*res.shape[1], res.shape[2])) def sample_from_hull(hull, N_points): #"Sample N_points from a convex hull" dims = hull.points.shape[-1] del_obj = scipy.spatial.Delaunay(hull.points) #Delaunay triangulation obj deln = hull.points[del_obj.simplices] #(N_triangles, 3, dims) vols = np.abs(np.linalg.det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims) sample = np.random.choice(len(vols), size = N_points, p = vols / vols.sum()) #Decide where to sample (Gibbs sampling) samples = np.einsum('ijk, ij -> ik', deln[sample], scipy.stats.dirichlet.rvs([1]*(dims + 1), size = N_points)) if False: plt.figure() plt.triplot(hull.points[:,0], hull.points[:,1], del_obj.simplices) #plot delaneuy triangulation plt.scatter(*samples.T, s = 2) plt.show() return samples #@do_profile(follow=[]) def sample_from_hull_boundaries(hull, N_points, boundaries = None, max_iter = 1000): #"SamplesN_points from a convex hull. If boundaries are given, it will enforce them" dims = hull.points.shape[1] del_obj = scipy.spatial.Delaunay(hull.points) deln = hull.points[del_obj.simplices] vols = np.abs(np.linalg.det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims) samples_list = [] N_samples = 0 #100 is the max number of iterations, after them we break for i in range(max_iter): sample = np.random.choice(len(vols), size = N_points, p = vols / vols.sum()) print(sample, sample.shape, deln[sample].shape) samples = np.einsum('ijk, ij -> ik', deln[sample], scipy.stats.dirichlet.rvs([1]*(dims + 1), size = N_points)) if boundaries is not None: ids_ok = np.logical_and(np.all(samples > boundaries[0,:], axis =1), np.all(samples < boundaries[1,:], axis = 1)) #(N,) samples = samples[ids_ok,:] #enforcing boundaries on masses and spins samples = samples[np.where(samples[:,0]>=samples[:,1])[0],:] #enforcing mass cut if samples.shape[0]> 0: samples_list.append(samples) N_samples += samples.shape[0] if N_samples >= N_points: break if len(samples_list)>0: samples = np.concatenate(samples_list, axis =0) else: samples = None return samples def plot_hull(hull, points = None): #"Plot the hull and a bunch of additional points" plt.figure() for simplex in hull.simplices: plt.plot(hull.points[simplex, 0], hull.points[simplex, 1], 'g-') plt.scatter(*hull.points[:,:2].T, alpha = 0.1, c = 'y', marker = 'o') if points is not None: plt.scatter(*points[:,:2].T, alpha = 0.3, c = 'r', marker = 's') plt.plot([10,100],[10,100]) plt.show() return def get_pad_points_2d(N_grid, boundaries): #"Get N points padding the boundary box" m1, M1 = boundaries[:,0] m2, M2 = boundaries[:,1] m, M = min(m1,m2), max(M1, M2) #creating points in 2D new_points = [*np.column_stack([M1*1.5*np.ones((N_grid,)), np.linspace(m2, M2,N_grid)])] new_points = new_points + [*np.column_stack([ np.linspace(m1, M1, N_grid), m2*0.2*np.ones((N_grid,))])] new_points = new_points + [*np.column_stack([ np.linspace(m, M, N_grid), np.linspace(m,M, N_grid)*1.5])] new_points = new_points + [np.array([.1,.1])] #new_points keeps a 2D grid over the mass boundaries new_points_2D = np.array(new_points) return new_points_2D def get_pad_points(N_grid, boundaries): #"Get N points padding the boundary box" #FIXME: this function is shit m1, M1 = boundaries[:,0] m2, M2 = boundaries[:,1] m, M = min(m1,m2), max(M1, M2) #creating points in 2D new_points = [*np.column_stack([M1*1.52*np.ones((N_grid,)), np.linspace(m2, M2,N_grid)])] new_points = new_points + [*np.column_stack([ np.linspace(m1, M1, N_grid), m2*0.18*np.ones((N_grid,))])] new_points = new_points + [*np.column_stack([ np.linspace(m, M, N_grid), np.linspace(m,M, N_grid)*1.52])] new_points = new_points + [np.array([1,1])] #new_points keeps a 2D grid over the mass boundaries new_points_2D = np.array(new_points) #This is to add the rest: it's super super super super super ugly if boundaries.shape[1] > 2: new_points_list = [] #creating grids s1_grid = np.linspace(boundaries[0,2]*0.8, boundaries[1,2]*1.2, N_grid) if boundaries.shape[1] >3: s2_grid = np.linspace(boundaries[0,3]*0.8, boundaries[1,3]*1.2, N_grid) else: s2_grid = [0] if boundaries.shape[1] >4: s3_grid = np.linspace(boundaries[0,4]*0.8, boundaries[1,4]*1.2, N_grid) else: s3_grid = [0] if boundaries.shape[1] >5: s4_grid = np.linspace(boundaries[0,5]*0.8, boundaries[1,5]*1.2, N_grid) else: s4_grid = [0] if boundaries.shape[1] >6: s5_grid = np.linspace(boundaries[0,6]*0.8, boundaries[1,6]*1.2, N_grid) else: s5_grid = [0] #the super ugly way: there is a better way for s1 in s1_grid: for s2 in s2_grid: for s3 in s3_grid: for s4 in s4_grid: for s5 in s5_grid: temp = np.concatenate([new_points_2D, np.repeat([[s1,s2,s3,s4,s5]], new_points_2D.shape[0], axis =0) ], axis = 1) new_points_list.append(temp) new_points = np.concatenate(new_points_list, axis = 0) #(N,D) new_points = new_points[:,:boundaries.shape[1]] else: new_points = new_points_2D return new_points