Source code for mbank.placement

"""
mbank.placement
===============

A lot of placement methods, both for the tiling and the normalizing flow. Most of the methods are deprecated and are not guaranteed to work properly. The functions :func:`place_random_flow` are :func:`place_stochastically_flow` are guaranteed to work.
"""

import numpy as np
import scipy
from scipy.spatial import ConvexHull, Rectangle
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import itertools
import warnings
import sys

from .utils import dummy_iterator, avg_dist

#############DEBUG LINE PROFILING
try:
	from line_profiler import LineProfiler

	def do_profile(follow=[]):
		def inner(func):
			def profiled_func(*args, **kwargs):
				try:
					profiler = LineProfiler()
					profiler.add_function(func)
					for f in follow:
						profiler.add_function(f)
					profiler.enable_by_count()
					return func(*args, **kwargs)
				finally:
					profiler.print_stats()
			return profiled_func
		return inner
except:
	pass

[docs]def place_stochastically_flow(minimum_match, flow, metric_obj, boundaries_checker = None, empty_iterations = 200, seed_bank = None, dry_run = False, verbose = True):
	"""
	Place templates with a stochastic placing algorithm.
	It iteratively proposes a new template to add to the bank. The proposal is accepted if the match of the proposal with the previously placed templates is smaller than ``minimum_match``. The iteration goes on until no template is found to have a distance smaller than the given threshold ``minimum_match``.
	It can start from a given set of templates.

	The match of a proposal is computed against all the templats that have been added.
	
	Parameters
	----------
		minimum_match: float
			Minimum match between templates.

		flow: flow.GW_flow
			Normalizing flow to sample template from

		metric_obj: handlers.cbc_metric
			A metric object to evaluate the metric at the livepoints
		
		boundaries_checker: callable
			A boolean function to check whether each input point is inside the boundaries. If `None`, no boundaries control will be applied.
			A common usage would be:
			
			.. code-block:: python
			
				from mbank.parser import boundary_keeper
				bk = boundary_keeper(args)
				def boundaries_checker(theta):
					return bk(theta, args.variable_format)
		
		empty_iterations: int
			Number of consecutive templates that are not accepted before the placing algorithm is terminated
			
		seed_bank: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			A set of templates that provides a first guess for the bank
		
		dry_run: bool
			Whether to run the placement without actually storing the templates. It is useful for the purpose of bank size measurement
		
		verbose: bool
			Whether to print the progress bar
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			A set of templates generated by the stochastic placing algorithm
	"""
	MM = minimum_match

	if boundaries_checker is None:
		boundaries_checker = lambda x: np.full(True, len(x))
	
	new_templates = flow.sample_within_boundaries(1, boundaries_checker)[0].numpy() if seed_bank is None else np.asarray(seed_bank)

	nothing_new, i, max_nothing_new = 0, 0, 0
	
	proposal_list, log_pdf_theta_list = [], []
	
	it = tqdm(dummy_iterator(), disable = not verbose)
	try:
		for _ in it:

			if verbose and i%100==0:
				it.set_description("Templates added {} ({}/{} empty iterations)".format(new_templates.shape[0], int(max_nothing_new), int(empty_iterations)))
			if nothing_new >= empty_iterations: break
			
			if len(proposal_list)==0:
				with torch.no_grad():
					proposal_list, _ = flow.sample_within_boundaries(1000, boundaries_checker)
					proposal_list = list(proposal_list.numpy())

			proposal = proposal_list.pop(0)[None,:]
				#FIXME: This part here is too expensive...
				# Maybe you can get an average metric (just with the relative magnitude of the eigenvalues) and
				# assign the determinant with the flow log_pdf. That would be crazy fast...
			metric = metric_obj.get_metric(proposal, metric_type = 'symphony') 
					
			diff = new_templates - proposal #(N_templates, D)
			
			max_match = np.max(1 - np.sum(np.multiply(diff, np.matmul(diff, metric)), axis = -1))
			#max_match = np.exp(-(1-max_match)) #do we need this?

			if (max_match < MM):
				new_templates = np.concatenate([new_templates, proposal], axis =0)
				nothing_new = 0
			else:
				nothing_new += 1
				max_nothing_new = max(max_nothing_new, nothing_new)
			i+=1
	except KeyboardInterrupt:
		pass
	
	del proposal_list
	
	if dry_run: return len(new_templates)
	
	return new_templates

[docs]def place_geometric_flow(minimum_match, flow, metric_obj, n_livepoints, boundaries_checker = None, covering_fraction = 0.9, qmc = True, dry_run = False, verbose = True):
	"""
	It computes the number of templates using the random algorithm :func:`place_random_flow`.
	It then places templates on a grid in the gaussian space or with quasi-Monte Carlo sampling and transforms it back to the normal space
	
	Parameters
	----------
	
		minimum_match: float
			Minimum match between templates.
		
		flow: flow.GW_flow
			Normalizing flow to sample template from

		metric_obj: handlers.cbc_metric
			A metric object to evaluate the metric at the livepoints
		
		n_livepoints: int
			Number of livepoints to compute the covering fraction with

		boundaries_checker: callable
			A boolean function to check whether each input point is inside the boundaries. If `None`, no boundaries control will be applied.
			A common usage would be:
			
			.. code-block:: python
			
				from mbank.parser import boundary_keeper
				bk = boundary_keeper(args)
				def boundaries_checker(theta):
					return bk(theta, args.variable_format)
			
		covering_fraction: float
			Fraction of livepoints to be killed before terminating the loop

		qmc: bool
			Whether to sample the templates in the latent space with a quasi-MC algorithm. If False, templates will lie in a grid

		dry_run: bool
			Whether to run the placement without actually storing the templates. It is useful for the purpose of bank size measurement
		
		verbose: bool
			Whether to display the progress bar
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`, int
			shape: (N,D) -
			A set of templates generated by the placing algorithm. If ``dry_run`` is set, only the number of templates will be returned
	
	"""
		#Computing the number of templates to place
	n_templates = place_random_flow(minimum_match, flow, metric_obj, n_livepoints, boundaries_checker, covering_fraction, dry_run = True, verbose = verbose)
	
	#TODO: estimate the fraction of the flow support covered by the region of interest. This will help you to decide how many templates to place in the grid
	
	D = flow.D
	
	if qmc:
		qmc_sampler = scipy.stats.qmc.MultivariateNormalQMC([0]*D)
		#print(2**int(np.round(np.log2(n_templates))), 2**int(np.log2(n_templates)), n_templates)
		n_templates = 2**int(np.log2(n_templates)+0.4)
		grid = torch.tensor(qmc_sampler.random(n_templates).astype(np.float32))
	else:
		N_points = [int(n_templates**(1/D))+1]
		
		for d in range(D-1, 0, -1):
			#N_points.append(max(int((n_templates/np.prod(N_points))**(1/d)),1)) # This will always underestimate the number of templates
			N_points.append(int((n_templates/np.prod(N_points))**(1/d))+1) # This will always overestimate the number of templates
		
		print(N_points, np.prod(N_points), n_templates)

		grid_1d = [scipy.special.erfinv(2*np.linspace(0,1, n, endpoint = True)[1:-1] - 1) for n in N_points]
		grid = []
		for p in itertools.product(*grid_1d):
			grid.append(p)
		grid = torch.tensor(np.array(grid).astype(np.float32))

	with torch.no_grad():
		new_templates, _ = flow._transform.inverse(grid)
		new_templates = new_templates.numpy()
		new_templates = new_templates[boundaries_checker(new_templates)]

	if dry_run:
		return len(new_templates)

	return new_templates


#@do_profile(follow=[])
[docs]def place_random_flow(minimum_match, flow, metric_obj, n_livepoints, boundaries_checker = None, covering_fraction = 0.9, dry_run = False, importance_sampling = False, metric_type = 'symphony', verbose = True):
	"""
	Draw templates from the flow. For each proposal, all the livepoints in the ellipse of constant ``minimum_match`` are killed. The iteration goes on until a fraction of ``covering_fraction`` of the space is covered.
	It follows `2202.09380 <https://arxiv.org/abs/2202.09380>`_
	
	Parameters
	----------
	
		minimum_match: float
			Minimum match between templates.
		
		flow: flow.GW_flow
			Normalizing flow to sample template from

		metric_obj: handlers.cbc_metric
			A metric object to evaluate the metric at the livepoints
		
		n_livepoints: int
			Number of livepoints to compute the covering fraction with

		boundaries_checker: callable
			A boolean function to check whether each input point is inside the boundaries. If `None`, no boundaries control will be applied.
			A common usage would be:
			
			.. code-block:: python
			
				from mbank.parser import boundary_keeper
				bk = boundary_keeper(args)
				def boundaries_checker(theta):
					return bk(theta, args.variable_format)
			
		covering_fraction: float
			Fraction of livepoints to be killed before terminating the loop

		dry_run: bool
			Whether to run the placement without actually storing the templates. It is useful for the purpose of bank size measurement
		
		importance_sampling: bool
			Whether to compute the covering fraction using importance sampling. Importance sampling gives a more accurate estimation of the covering fraction but at the cost of a higher variance in the number of templates.

		metric_type: str
			The method to compute the metric. Default 'symphony'

		verbose: bool
			Whether to display the progress bar
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`, int
			shape: (N,D) -
			A set of templates generated by the placing algorithm. If ``dry_run`` is set, only the number of templates will be returned
	
	"""
	assert 0<covering_fraction <=1., "The covering_fraction should be a fraction in (0,1]"
	dtype = np.float32
	
	if boundaries_checker is None:
		boundaries_checker = lambda x: np.full(True, len(x))
	elif isinstance(boundaries_checker, np.ndarray):
		assert boundaries_checker.shape[0] == 2, "Wrong shape for the given boundary array!"
		boundaries = np.array(boundaries_checker)
		
		def boundaries_checker(theta):
			theta = np.atleast_2d(theta)
			return np.logical_and(np.all(theta > boundaries[0,:], axis =1), np.all(theta < boundaries[1,:], axis = 1))
	
		#Sampling livepoints
	livepoints, log_pdf = flow.sample_within_boundaries(n_livepoints, boundaries_checker)
	livepoints, log_pdf = livepoints.numpy(), log_pdf.numpy()
	assert sum(boundaries_checker(livepoints))==len(livepoints), "Something messed up with sample_within_boundaries"

		#Computing the metric for all the livepoints
	metric, p = [], []
	ids_to_remove = [] #Keeps the ids of the livepoints for which the metric computation fails
	
	for i, (l, l_pdf) in enumerate(zip(tqdm(livepoints, desc = 'Computing the metric for each livepoint', disable = not verbose), log_pdf)):
		metric_ = metric_obj.get_metric(l, metric_type = metric_type)
		if np.linalg.det(metric_)<0:
			ids_to_remove.append(i)
			continue
		l_pdf_true = np.log(np.abs(np.linalg.det(metric_)))*0.5
		metric.append(metric_)
			
		if importance_sampling:
			p_ = l_pdf_true-l_pdf-flow.constant.item()
			if p_>np.log(10):
				warnings.warn("One (or more) importance sampling weight exceeded a safe threshold and were reularized. Please, check whether the normalizing flow is accurate enough")
				#if verbose: print('\tRegularized livepoint: theta | weight ', l, p_)
			p_ = min(p_, np.log(10)) #FIXME: regularization: how to tune this number?
			p.append(np.exp(p_))

	if ids_to_remove:
		livepoints = np.delete(livepoints, ids_to_remove, axis = 0)

	metric_cholesky = np.linalg.cholesky(metric).astype(dtype) #(N, D,D)
	if importance_sampling:
		if verbose:
			print('Importance sampling weights (not normalized): max | min ', np.max(p), np.min(p))
			print('\tPercentiles: [99, 90, 50, 10, 1]', np.percentile(p, [99, 90, 50, 10, 1]))
		p = np.array(p)/np.sum(p)
		if verbose:
			print('Importance sampling weights: max | min | equal w ', np.max(p), np.min(p), 1/len(livepoints))
			print('\tPercentiles: [99, 90, 50, 10, 1]', np.percentile(p, [99, 90, 50, 10, 1]))
	
	new_templates = []
	N_tmplts = 0
	actual_covering_fraction = 0
	
	#if dry_run:
	#	from .utils import plot_tiles_templates
	#	plot_tiles_templates(livepoints[:,:4], 'logMq_s1xz', injections = livepoints[:,:4], inj_cmap = p, show = True)
	
	bar_str = '{} templates placed ({} % space covered)'
	it = tqdm(dummy_iterator(), desc = bar_str.format(0, 0), disable = not verbose, leave = True)
	
	for _ in it: 
		if actual_covering_fraction>=covering_fraction: break
		
			#Generating proposals
		with torch.no_grad():
			proposals = flow.sample(3000).numpy()
		proposals = proposals[boundaries_checker(proposals)]
		if len(proposals) ==0: continue

		ids_kill = []
		for i, (l, L_t) in enumerate(zip(livepoints, metric_cholesky)):
			diff = proposals - l #(N,D)
			diff_prime = scipy.linalg.blas.sgemm(1, diff, L_t)
			dist_sq = np.sum(np.square(diff_prime), axis =1) #(N,) #This is the bottleneck of the computation, as it should be
	
			if np.any(dist_sq < 1- minimum_match):
				ids_kill.append(i)

		if importance_sampling:
			actual_covering_fraction += np.sum(p[ids_kill]) #This computes covering fraction w importance sampling 
			p = np.delete(p, ids_kill, axis = 0)
		else:
			actual_covering_fraction += len(ids_kill)/n_livepoints #This computes covering fraction w/o importance sampling
		
		livepoints = np.delete(livepoints, ids_kill, axis = 0)
		metric_cholesky = np.delete(metric_cholesky, ids_kill, axis = 0)
		
		if not dry_run: new_templates.append(np.array(proposals, dtype = np.float64))
		N_tmplts += len(proposals)

		if verbose:
			it.set_description(bar_str.format(N_tmplts, round(100*actual_covering_fraction, 1)))


	#TODO: return the livepoints still alive: they give interesting info about where the bank sucks...
	
	if dry_run:
		#from .utils import plot_tiles_templates
		#plot_tiles_templates(livepoints[:,:4], 'logMq_s1xz', injections = livepoints[:,:4], inj_cmap = p, show = True)
		return N_tmplts
	
	new_templates = np.concatenate(new_templates, axis = 0)

	return new_templates



[docs]def place_stochastically_in_tile(minimum_match, tile):
	"""
	Place templates with a stochastic placing algorithm withing a given tile, by iteratively proposing a new template to add to the bank inside the given tile.
	The proposal is accepted if the match of the proposal with the previously placed templates is smaller than ``minimum_match``. The iteration goes on until no template is found to have a distance smaller than the given threshold ``minimum_match``.
	
	
	Parameters
	----------
		minimum_match: float
			Minimum match between templates.
		
		tile: tuple
			An element of the ``tiling_handler`` object.
			It consists of a tuple ``(scipy.spatial.Rectangle, np.ndarray)``
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			A set of templates generated by the stochastic placing algorithm within the given tile
	"""
	dist_sq = 1-minimum_match

		#initial template
	new_templates = np.random.uniform(tile.rectangle.mins, tile.rectangle.maxes, (1, tile.D)) #(1,D)
	
	nothing_new = 0
	while nothing_new < 300:
		proposal = np.random.uniform(tile.rectangle.mins, tile.rectangle.maxes, tile.D) #(D,)
		diff = new_templates - proposal

		min_dist = np.min(np.sum(np.multiply(diff, np.matmul(diff, tile.metric)), axis = -1))

		if min_dist > dist_sq:
			new_templates = np.concatenate([new_templates, proposal[None,:]], axis = 0)
			nothing_new = 0
		else:
			nothing_new += 1
		
	return new_templates

#@do_profile(follow=[])
[docs]def place_stochastically(minimum_match, tiling, empty_iterations = 200, seed_bank = None, verbose = True):
	"""
	Place templates with a stochastic placing algorithm.
	It iteratively proposes a new template to add to the bank. The proposal is accepted if the match of the proposal with the previously placed templates is smaller than ``minimum_match``. The iteration goes on until no template is found to have a distance smaller than the given threshold ``minimum_match``.
	It can start from a given set of templates.

	The match of a proposal is computed against all the templats that have been added.
	
	Parameters
	----------
		minimum_match: float
			Minimum match between templates.
		
		tiling: tiling_handler
			A tiling object to compute the match with
		
		empty_iterations: int
			Number of consecutive templates that are not accepted before the placing algorithm is terminated
			
		seed_bank: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			A set of templates that provides a first guess for the bank
		
		verbose: bool
			Whether to print the progress bar
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			A set of templates generated by the stochastic placing algorithm
	"""
		#User communication stuff
	t_ = tqdm(dummy_iterator()) if verbose else dummy_iterator()

	MM = minimum_match

	if seed_bank is None:
		ran_id_ = np.random.choice(len(tiling))
		new_templates = np.random.uniform(tiling[ran_id_][0].mins, tiling[ran_id_][0].maxes, (1, len(tiling[ran_id_][0].maxes)))
	else:
		new_templates = np.asarray(seed_bank)

	nothing_new, i, max_nothing_new = 0, 0, 0
	
		#optimized version of the above... (not really helpful)
	if tiling.flow:
		with torch.no_grad():
			log_pdf_centers = tiling.flow.log_prob(tiling.get_centers().astype(np.float32)).numpy()
		proposal_list, log_pdf_theta_list = [], []
	
	try:
		for _ in t_:

			if verbose and i%100==0:t_.set_description("Templates added {} ({}/{} empty iterations)".format(new_templates.shape[0], int(max_nothing_new), int(empty_iterations)))
			if nothing_new >= empty_iterations: break
			
			if tiling.flow:
					#Unoptimized version - you need to make things in batches!
				#proposal = tiling.sample_from_flow(1)
				#metric = tiling.get_metric(proposal, flow = True) #using the flow if it is trained
				
					#optimized version of the above... (not really helpful)
				with torch.no_grad():
					if len(proposal_list)==0:
						proposal_list, log_pdf_theta_list = tiling.flow.sample_and_log_prob(1000)
						proposal_list, log_pdf_theta_list = list(proposal_list.numpy()), list(log_pdf_theta_list.numpy())

					proposal, log_pdf_theta = proposal_list.pop(0)[None,:], log_pdf_theta_list.pop(0)
						#checking if the proposal is inside the tiling
					if not tiling.is_inside(proposal)[0]: continue

					#proposal, log_pdf_theta = tiling.flow.sample_and_log_prob(1)
					#proposal = proposal.numpy()
					
						#FIXME: this kdtree may mess things up
					id_ = tiling.get_tile(proposal, kdtree = True)[0]
					metric = tiling[id_].metric
					
					factor = (2/metric.shape[0])*(log_pdf_theta-log_pdf_centers[id_])
					factor = np.exp(factor)
			
					metric = (metric.T*factor).T
			else:
				#FIXME: this thing is fucking slooooow! Maybe you should do a fancy buffer to parallelize this?
				proposal, tile_id = tiling.sample_from_tiling(1, tile_id = True)
				metric = tiling[tile_id[0]].metric

			diff = new_templates - proposal #(N_templates, D)
			
			
			max_match = np.max(1 - np.sum(np.multiply(diff, np.matmul(diff, metric)), axis = -1))
			#max_match = np.exp(-(1-max_match)) #do we need this?

			if (max_match < MM):
				new_templates = np.concatenate([new_templates, proposal], axis =0)
				nothing_new = 0
			else:
				nothing_new += 1
				max_nothing_new = max(max_nothing_new, nothing_new)
	
			i+=1
	except KeyboardInterrupt:
		pass
	
	if tiling.flow: del proposal_list, log_pdf_theta_list
	
	return new_templates


[docs]def place_iterative(match, t):
	"""
	Given a tile, it returns the templates within the tile obtained by iterative splitting.
	
	Parameters
	----------
	
		match: float
			Match defining the template volume
		
		t: tile
			The tile to cover with templates
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`
			Array with the generated templates 
	"""
	dist = avg_dist(match, t.D)
	is_ok = lambda tile_: tile_.N_templates(dist)<=1
	
	template_list = [(t, is_ok(t))]
	
	while True:
		if np.all([b for _, b in template_list]): break
		for t_ in template_list:
			if t_[1]: continue
			t_left, t_right = t_[0].split(None,2)
			extended_list = [(t_left, is_ok(t_left)), (t_right, is_ok(t_right))]
			template_list.remove(t_)
			template_list.extend(extended_list)
	new_templates = np.array([t_.center for t_, _ in template_list])
	
	return new_templates

#@do_profile(follow = [])
[docs]def place_random_tiling(minimum_match, tiling, N_livepoints, covering_fraction = 0.9, verbose = True):	
	"""
	Draw templates from the uniform distribution on the manifold. For each proposal, all the livepoints in the ellipse of constant ``minimum_match`` are killed. The iteration goes on until a fraction of ``covering_fraction`` of livepoints are alive.
	It follows `2202.09380 <https://arxiv.org/abs/2202.09380>`_
	
	Parameters
	----------
	
		minimum_match: float
			Minimum match between templates.
		
		tiling: tiling_handler
			Tiling handler that tiles the parameter space
		
		N_livepoints: int
			Number of livepoints to cover the space with
		
		covering_fraction: float
			Fraction of livepoints to be killed before terminating the loop
		
		verbose: bool
			Whether to display the progress bar
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			A set of templates generated by the placing algorithm
	
	"""
	assert 0<covering_fraction <=1., "The covering_fraction should be a fraction in (0,1]"
	MM = minimum_match
	dtype = np.float32
	
	livepoints = tiling.sample_from_tiling(N_livepoints,
				dtype = dtype, tile_id = False, p_equal = False)
	new_templates = []
	if tiling.flow:
		with torch.no_grad():
			log_pdf_centers = tiling.flow.log_prob(tiling.get_centers().astype(np.float32)).numpy()
	proposal_list, proposal_ids_, log_pdf_theta_list = [], [], []
	
	bar_str = '{} templates placed ({} % livepoints alive)'
	if verbose: it = tqdm(dummy_iterator(), desc = bar_str.format(len(new_templates), 100), leave = True)
	else: it = dummy_iterator()
	
	for _ in it: 
		if len(livepoints)<N_livepoints*(1-covering_fraction): break
		
			#Generating proposals
		if tiling.flow:
			with torch.no_grad():
				if len(proposal_list)==0:
					proposal_list, log_pdf_theta_list = tiling.flow.sample_and_log_prob(1000)
					proposal_list, log_pdf_theta_list = list(proposal_list.numpy()), list(log_pdf_theta_list.numpy())

				#FIXME: this thing is shit!! You should code all these operations inside the tiling!!
				proposal, log_pdf_theta = proposal_list.pop(0), log_pdf_theta_list.pop(0)
					#checking if the proposal is inside the tiling
				if not tiling.is_inside(proposal)[0]: continue

				id_ = tiling.get_tile(proposal, kdtree = True)[0]
				metric = tiling[id_].metric
					
				factor = (2/metric.shape[0])*(log_pdf_theta-log_pdf_centers[id_])
				factor = np.exp(factor)
			
				metric = (metric.T*factor).T
		else:
			if len(proposal_list)==0:
				proposal_list, proposal_ids_ = tiling.sample_from_tiling(1000,
						dtype = dtype, tile_id = True, p_equal = False)
				proposal_list = list(proposal_list)
				proposal_ids_ = list(proposal_ids_)
			proposal, id_ = proposal_list.pop(0), proposal_ids_.pop(0)
			metric = tiling[id_].metric
		
		diff = livepoints - proposal #(N,D)
		L_t = np.linalg.cholesky(metric).astype(dtype) #(D,D)
		diff_prime = scipy.linalg.blas.sgemm(1, diff, L_t)
		dist = np.sum(np.square(diff_prime), axis =1) #(N,) #This is the bottleneck of the computation, as it should be
	
		ids_kill = np.where(dist < 1- MM)[0]

		livepoints = np.delete(livepoints, ids_kill, axis = 0)

		new_templates.append(np.array(proposal, dtype = np.float64))
		if len(new_templates) %100 ==0 and verbose:
			it.set_description(bar_str.format(len(new_templates), round(100*len(livepoints)/N_livepoints, 1)))


	new_templates = np.array(new_templates)
	
	return new_templates

#@do_profile(follow=[])
[docs]def place_pruning(minimum_match, tiling, N_points, covering_fraction = 0.9, verbose = True):
	"""
	Given a tiling object, it covers the volume with points and covers them with templates.
	It uses a pruning method, where proposal are chosen from a large set of random points, called livepoints. The bank is created by selecting a proposal from the set of livepoints and removing (killing) the livepoints too close from the proposal. This methods effectively prunes the original set of livepoints, to remove the random points that are too close from each other.
	
	Parameters
	----------
	
		minimum_match: float
			Minimum match between templates.
		
		tiling: tiling_handler
			Tiling handler that tiles the parameter space
		
		N_points: int
			Number of livepoints to cover the space with
		
		covering_fraction: float
			Fraction of livepoints to be killed before terminating the loop
		
		verbose: bool
			Whether to display the progress bar
	
	Returns
	-------
		new_templates: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			A set of templates generated by the placing algorithm
	"""
	#TODO: maybe here you can use the tiling KDTree for saving some useless computations?
	#i.e. you should add a tiling label to all the generated livepoints and use it somehow...
	
	assert 0<covering_fraction <=1., "The covering_fraction should be a fraction in (0,1]"
	assert isinstance(N_points, int) and N_points>0, "N_points should be a positive integer"
	
	MM = minimum_match
	dtype = np.float32 #better to downcast to single precision! There is a mild speedup there
	
		#FIXME: add here the sampling from flow option!
	livepoints, id_tile_livepoints = tiling.sample_from_tiling(N_points,
				dtype = dtype, tile_id = True, p_equal = False) #(N_points, D)
	
	if False:
		#sorting livepoint by their metric determinant...
		_, vols = tiling.compute_volume()
		v_list_livepoints = [np.linalg.det(tiling[i].metric) for i in id_tile_livepoints]
		id_sort = np.argsort(v_list_livepoints)[::-1]
	else:
		#random shuffling
		id_sort = np.random.permutation(N_points)

	livepoints = livepoints[id_sort, :]
	id_tile_livepoints = id_tile_livepoints[id_sort]
	det_list = [t_.det for t_ in tiling]
	det_livepoints = np.array([det_list[id_] for id_ in id_tile_livepoints])
	del det_list
	
		#ordering the tile by volume in ascending order...
	_, vols = tiling.compute_volume()

	new_templates = []
	
	bar_str = 'Loops on tiles ({}/{} livepoints killed | {} templates placed)'
	if verbose: it = tqdm(dummy_iterator(), desc = bar_str.format(N_points -len(livepoints), N_points, len(new_templates)), leave = True)
	else: it = dummy_iterator()
	
	for _ in it:
			#choosing livepoints in whatever order they are set
		id_point = 0
		#id_point = np.random.randint(len(livepoints))
		point = livepoints[id_point,:]
		id_ = id_tile_livepoints[id_point]
		
		if tiling.flow: metric = tiling.get_metric(point, flow = True) #using the flow if it is trained
		else: metric = tiling[id_].metric
		
		diff = livepoints - point #(N,D)
		#TODO: you may insert here a distance cutoff (like 4 or 10 in coordinate distance...): this should account for the very very large unphysical tails of the metric!!
		
				#measuring metric match between livepoints and proposal
				#Doing things with cholesky is faster but requires non degenerate matrix
		L_t = np.linalg.cholesky(metric).astype(dtype) #(D,D)
			#BLAS seems to be faster for larger matrices but slower for smaller ones...
			#Maybe put a threshold on the number of livepoints?
		diff_prime = scipy.linalg.blas.sgemm(1, diff, L_t)
		dist = np.sum(np.square(diff_prime), axis =1) #(N,) #This is the bottleneck of the computation, as it should be
		
		#match = 1 - np.sum(np.multiply(diff, np.matmul(diff, metric)), axis = -1) #(N,)
	
		ids_kill = np.where(dist < 1- MM)[0]
			#This variant kind of works although the way to go (probably) is to use normalizing flows to interpolate the metric
		#scaled_dist = dist * np.power(det_livepoints/np.linalg.det(metric), 1/tiling[0].D)		
		#ids_kill = np.where(np.logical_and(dist < 1- MM, scaled_dist < 1- MM) )[0]

			#This operation is very slow! But maybe there is nothing else to do...
		livepoints = np.delete(livepoints, ids_kill, axis = 0)
		id_tile_livepoints = np.delete(id_tile_livepoints, ids_kill, axis = 0)
		det_livepoints = np.delete(det_livepoints, ids_kill, axis = 0)
		
				#this is very very subtle: if you don't allocate new memory with np.array, you won't decrease the reference to livepoints, which won't be deallocated. This is real real bad!!
		new_templates.append(np.array(point, dtype = np.float64))
		del point
			
			#communication and exit condition
		if len(livepoints)<=(1-covering_fraction)*N_points: break
		if len(new_templates) %100 ==0 and verbose: it.set_description(bar_str.format(N_points -len(livepoints), N_points, len(new_templates)) )
	
	new_templates = np.column_stack([new_templates])
	#if len(livepoints)>0: new_templates = np.concatenate([new_templates, livepoints], axis =0) #adding the remaining livepoints
	
	return new_templates

[docs]def get_cube_corners(boundaries):
	"""
	Given the boundaries of an hyper-rectangle, it computes all the corners of it
	
	Parameters
	----------
		boundaries: :class:`~numpy:numpy.ndarray`
			shape: (2,D) -
			An array with the boundaries for the model. Lower limit is boundaries[0,:] while upper limits is boundaries[1,:]
	
	Returns
	-------
		corners: :class:`~numpy:numpy.ndarray`
			shape: (N,D) -
			An array with the corners. Each row is a different corner
	
	"""
	corners = np.meshgrid(*boundaries.T)
	corners = [c.flatten() for c in corners]
	corners = np.column_stack(corners)
	return corners

[docs]def create_mesh(dist, tile, coarse_boundaries = None):
	"""
	Creates a mesh of points on an hypercube, given a metric.
	The points are approximately equally spaced with a distance ``dist``.
	
	Parameters
	----------
		dist: float
			Distance between templates
		
		tile: tuple
			An element of the ``tiling_handler`` object.
			It consists of a tuple ``(scipy.spatial.Rectangle, np.ndarray)``
	
		coarse_boundaries: :class:`~numpy:numpy.ndarray`
			shape: (2,D) -
			An array with the coarse boundaries of the tiling.
			If given, each tile is checked to belong to the border of the tiling. If it's the case, some templates are added to cover the boundaries

	Returns
	-------
		mesh: :class:`~numpy:numpy.ndarray`
			shape: (N,D) - 
			A mesh of N templates that cover the tile
	"""
	#dist: float
	#metric: (D,D)
	#boundaries (2,D)
	D = tile[0].maxes.shape[0]
	
		#bound_list keeps the dimension over which the tile is a boundary in the larger space
	if D < 2: coarse_boundaries = None
	if coarse_boundaries is not None:
		up_bound_list = np.where( np.isclose(tile[0].maxes, coarse_boundaries[1,:], 1e-4, 0) )[0].tolist() #axis where there is an up bound
		low_bound_list = np.where( np.isclose(tile[0].mins, coarse_boundaries[0,:], 1e-4, 0) )[0].tolist()
		bound_list = [ (1, up_) for up_ in up_bound_list]
		bound_list.extend( [ (0, low_) for low_ in low_bound_list])
	else: bound_list = []
	
		#Computing cholesky decomposition of the metric	
	metric = tile[1]
	L = np.linalg.cholesky(metric).T
	L_inv = np.linalg.inv(L)
	
		#computing boundaries and boundaries_prime
	boundaries = np.stack([tile[0].mins, tile[0].maxes], axis = 0)	
	corners = get_cube_corners(boundaries)#[0,:], boundaries[1,:])
	corners_prime = np.matmul(corners, L.T)
	center = (tile[0].mins+tile[0].maxes)/2. #(D,) #center
	center_prime = np.matmul(L, center) #(D,) #center_prime
	
		#computing the extrema of the boundaries (rectangle)
	boundaries_prime = np.array([np.amin(corners_prime, axis =0), np.amax(corners_prime, axis =0)])
	
		#creating a mesh in the primed coordinates (centered around center_prime)
	mesh_prime = []
	where_random = [] #list to keep track of the dimensions where templates should be drawn at random!
	
	for d in range(D):
		min_d, max_d = boundaries_prime[:,d]
		
		N = max(int((max_d-min_d)/dist), 1)
			#this tends to overcover...
		#grid_d = [np.linspace(min_d, max_d, N+1, endpoint = False)[1:]] 
			 #this imposes a constant distance but may undercover
		grid_d = [np.arange(center_prime[d], min_d, -dist)[1:][::-1], np.arange(center_prime[d], max_d, dist)]

		grid_d = np.concatenate(grid_d)
		
		if len(grid_d) <=1 and d >1: where_random.append(d)
		
		mesh_prime.append(grid_d)
		
		#creating the mesh in the primed space and inverting
	mesh_prime = np.meshgrid(*mesh_prime)
	mesh_prime = [g.flatten() for g in mesh_prime]
	mesh_prime = np.column_stack(mesh_prime) #(N,D)
	
	mesh = np.matmul(mesh_prime, L_inv.T)

		#we don't check the boundaries for the axis that will be drawn at random
	axis_ok = [i for i in range(D) if i not in where_random]
	ids_ok = np.logical_and(np.all(mesh[:,axis_ok] >= boundaries[0,axis_ok], axis =1), np.all(mesh[:,axis_ok] <= boundaries[1,axis_ok], axis = 1)) #(N,)
	mesh = mesh[ids_ok,:]

	
		#Whenever there is a single point in the grid, the templates along that dimension will be placed at random
	for id_random in where_random:
		mesh[:,id_random] =np.random.uniform(boundaries[0,id_random], boundaries[1,id_random], (mesh.shape[0], )) # center[id_random] #
	#warnings.warn('Random extraction for "non-important" dimensions disabled!')
	return mesh
		####
		#adding the boundaries
		####
		
		#Boundaries are added by creating a mesh in the D-1 plane of the tile boundary
	boundary_mesh = []
		#up_down keeps track whether we are at the min (0) or max (1) value along the d-th dimension
	for up_down, d in bound_list:
		ids_not_d = [d_ for d_ in range(D) if d_ is not d]
		new_dist = dist*np.sqrt(D/(D-1)) #this the distance between templates that must be achieved in the low dimensional manifold
		
			#creating the input for the boundary tiling
		rect_proj = Rectangle( tile[0].mins[ids_not_d], tile[0].maxes[ids_not_d]) #projected rectangle
		metric_proj = metric - np.outer(metric[:,d], metric[:,d]) /metric[d,d]
		metric_proj = metric_proj[tuple(np.meshgrid(ids_not_d,ids_not_d))].T #projected metric on the rectangle
		
		new_coarse_boundaries = np.stack([rect_proj.mins, rect_proj.maxes], axis =0) #(2,D)
		#new_coarse_boundaries = None
		new_mesh_ = create_mesh(new_dist, (rect_proj, metric_proj), new_coarse_boundaries) #(N,D-1) #mesh on the D-1 plane
		boundary_mesh_ = np.zeros((new_mesh_.shape[0], D))
		boundary_mesh_[:,ids_not_d] = new_mesh_
		boundary_mesh_[:,d] = boundaries[up_down,d]
		
		boundary_mesh.extend(boundary_mesh_)
		
	if len(boundary_mesh)>0:
		boundary_mesh = np.array(boundary_mesh)
		mesh = np.concatenate([mesh,boundary_mesh], axis =0)
	
	return mesh

###########################################################################################

#All the garbage here should be removed!!

def points_in_hull(points, hull, tolerance=1e-12):
	#"Check if points (N,D) are in the hull"
	if points.ndim == 1:
		points = points[None,:]
	
	value_list = [np.einsum('ij,j->i', points, eq[:-1])+eq[-1] for eq in hull.equations]
	value_list = np.array(value_list).T #(N, N_eqs)
	
	return np.prod(value_list<= tolerance, axis = 1).astype(bool) #(N,)

def all_line_hull_intersection(v, c, hull):
	#"Compute all the intersection between N_lines and a single hull"
	if c.ndim == 1:
		c = np.repeat(c[None,:], v.shape[0], axis =0)

	eq=hull.equations.T
	n,b=eq[:-1],eq[-1] #(N_faces, D), (N_faces,)
	
	den = np.matmul(v,n)+1e-18 #(N_lines, N_faces)
	num = np.matmul(c,n) #(N_lines, N_faces)
	
	alpha= -(b +num )/den #(N_lines, N_faces)

		#v (N_lines, D)
	res = c[:,None,:] + np.einsum('ij,ik->ijk', alpha,v) #(N_lines, N_faces, D)

	return res.reshape((res.shape[0]*res.shape[1], res.shape[2]))

def sample_from_hull(hull, N_points):
	#"Sample N_points from a convex hull"
	dims = hull.points.shape[-1]
	del_obj = scipy.spatial.Delaunay(hull.points) #Delaunay triangulation obj
	deln = hull.points[del_obj.simplices] #(N_triangles, 3, dims)
	vols = np.abs(np.linalg.det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims)	
	sample = np.random.choice(len(vols), size = N_points, p = vols / vols.sum()) #Decide where to sample (Gibbs sampling)
	samples = np.einsum('ijk, ij -> ik', deln[sample], scipy.stats.dirichlet.rvs([1]*(dims + 1), size = N_points))

	if False:
		plt.figure()
		plt.triplot(hull.points[:,0], hull.points[:,1], del_obj.simplices) #plot delaneuy triangulation
		plt.scatter(*samples.T, s = 2)
		plt.show()
	
	return samples

#@do_profile(follow=[])
def sample_from_hull_boundaries(hull, N_points, boundaries = None, max_iter = 1000):
	#"SamplesN_points from a convex hull. If boundaries are given, it will enforce them"
	dims = hull.points.shape[1]
	del_obj = scipy.spatial.Delaunay(hull.points)
	deln = hull.points[del_obj.simplices]
	vols = np.abs(np.linalg.det(deln[:, :dims, :] - deln[:, dims:, :])) / np.math.factorial(dims)
	
	samples_list = []
	N_samples = 0
	
		#100 is the max number of iterations, after them we break
	for i in range(max_iter):
		sample = np.random.choice(len(vols), size = N_points, p = vols / vols.sum())
		print(sample, sample.shape,  deln[sample].shape)
		samples = np.einsum('ijk, ij -> ik', deln[sample], scipy.stats.dirichlet.rvs([1]*(dims + 1), size = N_points))
	
		if boundaries is not None:
			ids_ok = np.logical_and(np.all(samples > boundaries[0,:], axis =1), np.all(samples < boundaries[1,:], axis = 1)) #(N,)
			samples = samples[ids_ok,:]	#enforcing boundaries on masses and spins
			samples = samples[np.where(samples[:,0]>=samples[:,1])[0],:] #enforcing mass cut

		if samples.shape[0]> 0:
			samples_list.append(samples)
			N_samples += samples.shape[0]

		if N_samples >= N_points: break
	
	if len(samples_list)>0:
		samples = np.concatenate(samples_list, axis =0)
	else:
		samples = None

	return samples

def plot_hull(hull, points = None):
	#"Plot the hull and a bunch of additional points"
	plt.figure()
	for simplex in hull.simplices:
		plt.plot(hull.points[simplex, 0], hull.points[simplex, 1], 'g-')
	plt.scatter(*hull.points[:,:2].T, alpha = 0.1, c = 'y', marker = 'o')
	if points is not None:
		plt.scatter(*points[:,:2].T, alpha = 0.3, c = 'r', marker = 's')
	plt.plot([10,100],[10,100])
	plt.show()
	return

def get_pad_points_2d(N_grid, boundaries):
	#"Get N points padding the boundary box"
	m1, M1 = boundaries[:,0]
	m2, M2 = boundaries[:,1]
	m, M = min(m1,m2), max(M1, M2)

		#creating points in 2D
	new_points = [*np.column_stack([M1*1.5*np.ones((N_grid,)), np.linspace(m2, M2,N_grid)])]
	new_points = new_points + [*np.column_stack([ np.linspace(m1, M1, N_grid), m2*0.2*np.ones((N_grid,))])]
	new_points = new_points + [*np.column_stack([ np.linspace(m, M, N_grid), np.linspace(m,M, N_grid)*1.5])]
	new_points = new_points + [np.array([.1,.1])]
		#new_points keeps a 2D grid over the mass boundaries
	new_points_2D = np.array(new_points)

	return new_points_2D

def get_pad_points(N_grid, boundaries):
	#"Get N points padding the boundary box"
	#FIXME: this function is shit
	m1, M1 = boundaries[:,0]
	m2, M2 = boundaries[:,1]
	m, M = min(m1,m2), max(M1, M2)

		#creating points in 2D
	new_points = [*np.column_stack([M1*1.52*np.ones((N_grid,)), np.linspace(m2, M2,N_grid)])]
	new_points = new_points + [*np.column_stack([ np.linspace(m1, M1, N_grid), m2*0.18*np.ones((N_grid,))])]
	new_points = new_points + [*np.column_stack([ np.linspace(m, M, N_grid), np.linspace(m,M, N_grid)*1.52])]
	new_points = new_points + [np.array([1,1])]
		#new_points keeps a 2D grid over the mass boundaries
	new_points_2D = np.array(new_points)
	
		#This is to add the rest: it's super super super super super ugly
	if boundaries.shape[1] > 2:
		new_points_list = []

			#creating grids
		s1_grid = 							 np.linspace(boundaries[0,2]*0.8, boundaries[1,2]*1.2, N_grid)
		if boundaries.shape[1] >3: s2_grid = np.linspace(boundaries[0,3]*0.8, boundaries[1,3]*1.2, N_grid)
		else: s2_grid = [0]
		if boundaries.shape[1] >4: s3_grid = np.linspace(boundaries[0,4]*0.8, boundaries[1,4]*1.2, N_grid)
		else: s3_grid = [0]
		if boundaries.shape[1] >5: s4_grid = np.linspace(boundaries[0,5]*0.8, boundaries[1,5]*1.2, N_grid)
		else: s4_grid = [0]
		if boundaries.shape[1] >6: s5_grid = np.linspace(boundaries[0,6]*0.8, boundaries[1,6]*1.2, N_grid)
		else: s5_grid = [0]
	
			#the super ugly way: there is a better way
		for s1 in s1_grid:
			for s2 in s2_grid:
				for s3 in s3_grid:
					for s4 in s4_grid:
						for s5 in s5_grid:
							temp = np.concatenate([new_points_2D, np.repeat([[s1,s2,s3,s4,s5]], new_points_2D.shape[0], axis =0) ], axis = 1)
							new_points_list.append(temp)
		new_points = np.concatenate(new_points_list, axis = 0) #(N,D)
		new_points = new_points[:,:boundaries.shape[1]]
	else:
		new_points = new_points_2D
				

	return new_points