Source code for mbank.parser

"""
mbank.parser
============

Gathers group of options useful for the many executables that make mbank handy
"""

import configparser
import numpy as np
import types
import os
from .flow.flowmodel import STD_GW_Flow
from .bank import cbc_bank
from .handlers import variable_handler
from .utils import get_boundaries_from_ranges
import warnings
import json
import sys

####################################################################################################################
#Parser stuff

def int_tuple_type(strings): #type for the grid-size parser argument
	strings = strings.replace("(", "").replace(")", "")
	mapped_int = map(int, strings.split(","))
	return tuple(mapped_int)

[docs]def updates_args_from_ini(ini_file, args, parser):
	"""	
	Updates the arguments of Namespace args according to the given `ini_file`.

	Parameters
	----------
	
	ini_file: str
		Filename of the ini file to load. It must readable by :class:`configparser.ConfigParser`
			
	args: argparse.Namespace
		A parser namespace object to be updated
			
	parser: argparse.ArgumentParser
		A parser object (compatibile with the given namespace)
	
	Returns
	-------
		args: argparse.Namespace
			Updated parser namespace object
	"""
	if not os.path.exists(ini_file):
		raise FileNotFoundError("The given ini file '{}' doesn't exist".format(ini_file))
		
		#reading the ini file
	config = configparser.ConfigParser()
	config.read(ini_file)
	assert len(config.sections()) ==1, "The ini file must have only one section"
	
		#casting to a dict and adding name entry
		#in principle this is not required, but makes things more handy
	ini_info = dict(config[config.sections()[0]])
	ini_info['run-name'] = config.sections()[0]

		#formatting the ini-file args
	args_to_read = []
	for k, v in ini_info.items():
		if v.lower() != 'false': #if it's a bool var, it is always store action
				#transforming 'pi' into an actual number and evaluating the expression
			if (k.find('range')>=0 or k.find('fixed-sky-loc-polarization')>=0) and v.find('pi')>=0:
				v = v.replace('pi', 'np.pi')
				v = ' '.join([str(eval(v_)) for v_ in v.split(' ')])
			args_to_read.extend('--{} {}'.format(k,v).split(' '))

	#args, _ = parser.parse_known_args(args_to_read, namespace = args) #this will update the existing namespace with the new values...
	
		#adding the new args to the namespace (if the values are not the default)
	new_data, _ = parser.parse_known_args(args_to_read, namespace = None)
	for k, v in vars(new_data).items():
		# set arguments in the args if they haven’t been set yet (i.e. they are not their default value)
		if getattr(args, k, None) == parser.get_default(k):
			setattr(args, k, v)
	return args

[docs]def add_metric_options(parser):
	"""
	Adds to the parser a set of arguments defining the metric
	
	Parameters
	----------
		
	parser: argparse.ArgumentParser
		The parser object to be updated
	"""
	parser.add_argument(
		"--variable-format", required = False,
		help="Choose which variables to include in the bank. Valid formats are those of `mbank.handlers.variable_format`")
	parser.add_argument(
		"--psd",  required = False,
		help="The input file for the PSD: it can be either a txt either a ligolw xml file")
	parser.add_argument(
		"--asd",  default = False, action='store_true',
		help="Whether the input file has an ASD (sqrt of the PSD)")
	parser.add_argument(
		"--ifo", default = 'L1', type=str, choices = ['L1', 'H1', 'V1'],
		help="Interferometer name: it can be L1, H1, V1. This is a field for the xml files for the PSD and the bank")
	parser.add_argument(
		"--f-min",  default = 10., type=float,
		help="Minium frequency for the scalar product")
	parser.add_argument(
		"--f-max",  default = 1024., type=float,
		help="Maximum frequency for the scalar product")
	parser.add_argument(
		"--df",  default = None, type=float,
		help="Spacing of the frequency grid where the PSD is evaluated")
	parser.add_argument(
		"--approximant", default = 'IMRPhenomPv2',
		help="LAL approximant for the bank generation")
	parser.add_argument(
		"--metric-type", default = 'symphony', type = str, choices = ['hessian', 'parabolic_fit_hessian', 'symphony'],
		help="Method to use to compute the metric.")


[docs]def add_flow_options(parser):
	"""
	Adds to the parser a set of arguments relevant to the training of a normalizing flow
	
	Parameters
	----------
		
	parser: argparse.ArgumentParser
		The parser object to be updated
	"""
	parser.add_argument(
		"--flow-file", default = 'flow.zip', type = str,
		help="File where the normalizing flow is saved/loaded (in zip format).")
	parser.add_argument(
		"--loss-function", default = 'll_mse', type = str, choices = STD_GW_Flow(1, 1, 1).available_losses,
		help="Method to use to compute the metric.")
	parser.add_argument(
		"--n-layers", default = 2, type = int,
		help="Number of layers for the flow model to train")
	parser.add_argument(
		"--hidden-features", default = 20, nargs = '+', type = int,
		help="Number of hidden features for the masked autoregressive flow to train.")
	parser.add_argument(
		"--n-epochs", default = 10000, type = int,
		help="Number of training epochs for the flow")
	parser.add_argument(
		"--learning-rate", default = 1e-3, type = float,
		help="Learning rate for the flow")
	parser.add_argument(
		"--train-fraction", default = 0.85, type = float,
		help="Fraction of the dataset to use for training")
	parser.add_argument(
		"--batch-size", default = 10000, type = int,
		help="Batch size for the training")
	parser.add_argument(
		"--min-delta", default = 0., type = float,
		help="Parameter for early stopping. If the validation loss doesn't decrease of more than min-delta for --patience times, the training will stop")
	parser.add_argument(
		"--patience", default = 5, type = int,
		help="Patience for early stopping. If the validation loss does not decrease enough for patience times, the training will stop")
	parser.add_argument(
		"--load-flow", default = False, action = 'store_true',
		help="Whether to load the flow from file (useful for plotting purposes)")

[docs]def add_general_options(parser):
	"""
	Adds to the parser some general options
	
	Parameters
	----------
		
	parser: argparse.ArgumentParser
		The parser object to be updated
	"""
	parser.add_argument(
		"--plot", action='store_true',
		help="Whether to make some plots. They will be store in run-dir")
	parser.add_argument(
		"--show", action='store_true',
		help="Whether to show the plots.")
	parser.add_argument(
		"--verbose", default = False, action = 'store_true',
		help="Whether to be verbose")
	parser.add_argument(
		"--run-dir", default = None,
		help="Output directory in which the outputs will be saved. If default is used, the run name will be appended.")
	parser.add_argument(
		"--run-name", default = 'flow_training',
		help="Name for the current run: it will set the name of some outputs")

[docs]def add_range_options(parser):
	"""
	Adds to the parser a set of arguments describing ranges on the physical quantities of a BBH
	
	Parameters
	----------
		
	parser: argparse.ArgumentParser
		The parser object to be updated
	"""
			#ranges for physical parameters
	parser.add_argument(
		"--m1-range", default = None, type=float, nargs = 2,
		help="Range values for the mass 1 (in solar masses)")
	parser.add_argument(
		"--m2-range", default = None, type=float, nargs = 2,
		help="Range values for the mass 2 (in solar masses)")
	parser.add_argument(
		"--mtot-range", default = None, type=float, nargs = 2,
		help="Range values for the total masses (in solar masses).")
	parser.add_argument(
		"--q-range", default = None, type=float, nargs = 2,
		help="Range values for the mass ratio.")
	parser.add_argument(
		"--mc-range", default = None, type=float, nargs = 2,
		help="Range values for the total masses (in solar masses).")
	parser.add_argument(
		"--eta-range", default = None, type=float, nargs = 2,
		help="Range values for the mass ratio.")
	parser.add_argument(
		"--s1-range", default = [-0.99,0.99], type=float, nargs = 2,
		help="Range values for magnitude of spin 1 (if applicable)")
	parser.add_argument(
		"--s2-range", default = [-0.99,0.99], type=float, nargs = 2,
		help="Range values for magnitude of spin 1 (if applicable)")
	parser.add_argument(
		"--chi-range", default = [-0.99,0.99], type=float, nargs = 2,
		help="Range values for effective spin parameter (if applicable)")
	parser.add_argument(
		"--theta-range", default = [-np.pi, np.pi], type=float, nargs = 2,
		help="Range values for theta angles of spins (if applicable)")
	parser.add_argument(
		"--phi-range", default = [-np.pi/2, np.pi/2], type=float, nargs = 2,
		help="Range values for phi angles of spins (if applicable)")
	parser.add_argument(
		"--e-range", default = [0., 0.5], type=float, nargs = 2,
		help="Range values for the eccentricity (if applicable)")
	parser.add_argument(
		"--meanano-range", default = [0., 1], type=float, nargs = 2,
		help="Range values for the mean anomaly (if applicable). TODO: find a nice default...")
	parser.add_argument(
		"--iota-range", default = [0., np.pi], type=float, nargs = 2,
		help="Range values for iota (if applicable)")
	parser.add_argument(
		"--ref-phase-range", default = [-np.pi, np.pi], type=float, nargs = 2,
		help="Range values for reference phase (if applicable)")

[docs]def add_tiling_generation_options(parser):
	"""
	Adds to the parser a set of arguments relevant to the generation of a tiling
	
	Parameters
	----------
		
	parser: argparse.ArgumentParser
		The parser object to be updated
	"""
	parser.add_argument(
		"--grid-size", default = None, type=int_tuple_type,
		help="Number of grid points for each dimension. The number of grid must match the number extra dimensions. If None, the grid size will be a set of ones")
	parser.add_argument(
		"--tile-tolerance", default = 0.1, type = float,
		help="Maximum tolerated variation of the relative difference of the metric determinant between parent and child in the iterative splitting procedure")
	parser.add_argument(
		"--max-depth", default = 6, type = int,
		help="Maximum number of iterative splitting before terminating.")

[docs]def add_template_placement_options(parser):
	"""
	Adds to the parser a set of arguments relevant to template placing
	
	Parameters
	----------
		
	parser: argparse.ArgumentParser
		The parser object to be updated
	"""
	parser.add_argument(
		"--mm", required = False, type = float, default = None,
		help="Minimum match for the bank (a.k.a. average distance between templates)")
	parser.add_argument(
		"--placing-method", default = 'random', type = str, choices = cbc_bank('Mq_nonspinning').placing_methods,
		help="Which placing method to use for each tile")
	parser.add_argument(
		"--n-livepoints", default = 2000, type = int,
		help="Parameter to control the number of livepoints to use in the `random` and `pruning` placing method. For `random` (or related), it represents the number of livepoints to use for the estimation of the coverage fraction. For `pruning`, it amounts to the the ratio between the number of livepoints and the number of templates placed by ``uniform`` placing method.")
	parser.add_argument(
		"--covering-fraction", default = 0.9, type = float,
		help="Parameter to control the fraction of livepoints dead before terminating the bank generation with the `random` and `pruning` placing method. The higher the threshold, the higher the nuber of templates in the final bank.")
	parser.add_argument(
		"--empty-iterations", default = 100, type = float,
		help="Number of consecutive rejected proposal after which the `stochastic` placing method stops.")

[docs]def add_injections_options(parser):
	"""
	Adds to the parser a set of arguments relevant to injection recovery
	
	Parameters
	----------
		
	parser: argparse.ArgumentParser
		The parser object to be updated
	"""
	parser.add_argument(
		"--n-injs", type = int, default = None,
		help="Number of injections. If inj-file is specified, they will be read from it; otherwise they will be randomly drawn from the tiling and saved to file. If None, all the injections will be read from inj-file and it will throw an error if such file is not provided.")
	parser.add_argument(
		"--fixed-sky-loc-polarization", type = float, nargs = 3, default = None,
		help="Sky localization and polarization angles for the signal injections. They must be a tuple of float in the format (longitude,latitude,polarization). If None, the angles will be loaded from the injection file, if given, or uniformly drawn from the sky otherwise.")
	parser.add_argument(
		"--inj-file", type = str, default = None,
		help="An xml injection file to load the injections from. If not provided, the injections will be performed at random in each tile (injs-per-tile). If no path to the file is provided, it is understood it is located in run-dir.")
	parser.add_argument(
		"--stat-dict", type = str, default = None,
		help="The name of the file in which the results of the injection study will be saved (either json or pkl). If None, a suitable default will be provided.")
	parser.add_argument(
		"--mchirp-window", type = float, default = 0.1,
		help="The window in relative chirp mass inside which the templates are considered for full match (if --full-match is specified)")
	parser.add_argument(
		"--full-match", action='store_true', default = False,
		help="Whether to perform the full standard match computation. If False, a metric approximation to the match will be used")
	parser.add_argument(
		"--full-symphony-match", action='store_true', default = False,
		help="Whether to perform the full symphony match computation. If False, a metric approximation to the match will be used")
	parser.add_argument(
		"--seed", type = int, default = None,
		help="Random seed for extracting the random injections (if it applies)")
	parser.add_argument(
		"--use-ray", action='store_true', default = False,
		help="Whether to use ray package to parallelize the match computation")

[docs]def make_sub_file(run_dir, run_name, memory_requirement = 4, disk_requirement = 4, cpu_requirement = 1):
	"""
	Creates an condor submit file to run any mbank executable. The commmand to run and its arguments are taken from the command line arguments
	
	Parameters
	----------
		
	run_dir: str
		The run directory where the submit file is executed
	
	run_name: str
		A name for the run, to identify the executable
	
	memory_requirement: int
		Memory requirements in GB for the condor job

	disk_requirement: int
		Disk requirements in GB for the condor job
	
	cpu_requirement: int
		Number of CPUs requested for the condor job
	"""
	
	sub_file_str = """Universe = vanilla
batch_name = {3}
Executable = {0}
arguments = "{1}"
getenv = true
Log = {2}_{3}.log
Error = {2}_{3}.err
Output = {2}_{3}.out
request_memory = {4}GB
request_disk = {5}GB
request_cpus = {6}

queue
"""

	if not run_dir.endswith('/'): run_dir = run_dir+'/'
	
	if '--make-sub' in sys.argv:
		sys.argv.remove('--make-sub')
	else:
		warnings.warn('The option --make-sub was given in the inifile. Make sure you remove it from your ini before launching the condor job!')

	exec_name = sys.argv[0].split('/')[-1]
	batch_name = '_'.join([exec_name, run_name])
	
	sub_file_str = sub_file_str.format(sys.argv[0], ' '.join(sys.argv[1:]),
		run_dir, batch_name,
		memory_requirement, disk_requirement, cpu_requirement)
	
	sub_file = '{}{}_{}.sub'.format(run_dir, exec_name, run_name)
	with open(sub_file, 'w') as f:
		f.write(sub_file_str)
	f.close()		
	print('#####')
	print("Submit file generated @ {0}\nSubmit a job it with condor_submit {0}\n".format(sub_file))
	print("Monitor it with: tail -f {}{}_{}.err".format(run_dir, exec_name, run_name))
	print('#####\n')
	print(sub_file_str)
	return


[docs]def save_args(args, filename):
	"""
	Given a set of arguments, it stores them in **json** format to a file `filename`
	
	Parameters
	----------
		
	args: argparse.Namespace
		Arguments encoded in a parser Namespace object
	
	filename: str
		Name of the file to save the arguments to
	"""
	json_str = json.dumps(args.__dict__, indent = 2)
	json_str = json_str.replace(",\n    ", ", ")
	json_str = json_str.replace("[\n    ", "[ ")
	json_str = json_str.replace("\n  ]", " ]")

	with open(filename, 'w', encoding='utf-8') as f:
		f.write(json_str)

	return

[docs]def get_boundary_box_from_args(args):
	"""
	Given the arguments stored in a parser, it returns a rectangular boundary box for the parameter space defined by the arguments
	
	Parameters
	----------
		
	args: argparse.Namespace
		Arguments encoded in a parser Namespace object
	"""
	var_handler = variable_handler()
	format_info = var_handler.format_info[args.variable_format]
	if format_info['mass_format'] in ['m1m2', 'logm1logm2']:
		assert args.m1_range and args.m2_range, "If mass format is m1m2 or logm1logm2, --m1-range and --m2-range must be given"
		var1_min, var1_max = args.m1_range
		var2_min, var2_max = args.m2_range
	elif format_info['mass_format'] in ['Mq', 'logMq']:
		assert args.mtot_range and args.q_range, "If mass format is Mq or logMq, --mtot-range and --q-range must be given"
		var1_min, var1_max = args.mtot_range
		var2_min, var2_max = args.q_range
	elif format_info['mass_format'] == 'mceta':
		assert args.mc_range and args.eta_range, "If mass format is mceta, --mc-range and --eta-range must be given"
		var1_min, var1_max = args.mc_range
		var2_min, var2_max = args.eta_range

		#Ranges for quantities other than masses
	s1_min, s1_max = getattr(args, 's1_range', (np.nan, np.nan))
	s2_min, s2_max = getattr(args, 's2_range', (np.nan, np.nan))
	chi_min, chi_max = getattr(args, 'chi_range', (np.nan, np.nan))
	theta_min, theta_max = getattr(args, 'theta_range', (np.nan, np.nan))
	phi_min, phi_max = getattr(args, 'phi_range', (np.nan, np.nan))
	e_min, e_max = getattr(args, 'e_range', (np.nan, np.nan))
	meanano_min, meanano_max = getattr(args, 'meanano_range', (np.nan, np.nan))
	iota_min, iota_max = getattr(args, 'iota_range', (np.nan, np.nan))
	ref_phase_min, ref_phase_max = getattr(args, 'ref_phase_range', (np.nan, np.nan))

	return get_boundaries_from_ranges(args.variable_format,
				(var1_min, var1_max), (var2_min, var2_max),
				chi_range = (chi_min, chi_max), s1_range = (s1_min, s1_max), s2_range = (s2_min, s2_max),
				theta_range = (theta_min, theta_max), phi_range = (phi_min, phi_max),
				iota_range = (iota_min, iota_max), ref_phase_range = (ref_phase_min, ref_phase_max),
				e_range = (e_min, e_max), meanano_range = (meanano_min, meanano_max))

[docs]class boundary_keeper:
	"""
	Class to keep a set of boundaries on the masses defined by the argument parsers.
	The object is callable to check whether a given point, with a given variable format is inside the mass boundaries.
	An extra boundary box can be specified at each call to enforce other type of boundaries
	"""
	def __init__(self, args):
			#Making a copy of the interesting args inside a SimpleNamespace object
		
		defaults = {
			'mtot_range': (0, np.inf),
			'mc_range': (0, np.inf),
			'm1_range': (0, np.inf),
			'm2_range': (0, np.inf),
			'q_range': (1, np.inf),
			'eta_range': (0., 0.25),
		}
		
		self.b_args = types.SimpleNamespace()
		for k in dir(args):
			v = getattr(args, k)
			if k in defaults.keys() and not v:
				v = defaults[k]
			if k.endswith('_range'): setattr(self.b_args, k, v)
		
			#cache for the boundaries for a given variable format
		self.b_cache_format = ''
		self.b_cache = None
		self.var_handler = variable_handler() 
	
	def set_variable_format(self, variable_format):
		if self.b_cache_format != variable_format:
			self.b_cache_format = variable_format
			setattr(self.b_args, 'variable_format', variable_format)
			self.b_cache = get_boundary_box_from_args(self.b_args)
		return
	
[docs]	def sample(self, n_samples, variable_format):
		"Samples from the uniform distribution in the coordinates"
		self.set_variable_format(variable_format)
		
		samples = []
		while len(samples)<=n_samples:
		
			new_samples = np.random.uniform(*self.b_cache, (n_samples, self.b_cache.shape[1]) )
			new_samples = new_samples[self(new_samples, variable_format)]
			samples = np.concatenate([samples, new_samples], axis = 0) if len(samples) else new_samples
		return samples[:n_samples]
	
	def volume_box(self, variable_format):
		self.set_variable_format(variable_format)
		return np.prod(np.abs(self.b_cache[1]-self.b_cache[0]))
		
	def volume(self, n_samples, variable_format, n_vars = 50):
	
		self.set_variable_format(variable_format)
		vol = np.prod(np.abs(self.b_cache[1]-self.b_cache[0]))

		vols = []
		for i in range(n_vars):
			samples = np.random.uniform(*self.b_cache, (n_samples, self.b_cache.shape[1]) )
			n_inside = sum(self(samples, variable_format))
		
			vols.append(vol*(n_inside/n_samples))
		
		vol = np.mean(vols)
		std_error_mean = np.std(vols, ddof=1)/np.sqrt(n_vars)
		

		return vol, std_error_mean
		
	
	def __call__(self, theta, variable_format):
		theta = np.atleast_2d(theta)
		self.set_variable_format(variable_format)
		#ids_inside = np.full((theta.shape[0],), True)
		ids_inside = np.logical_and(np.all(theta > self.b_cache[0,:], axis =1), np.all(theta < self.b_cache[1,:], axis = 1)) #(N,)
		
			#Checking for masses
		m1, m2 = np.full(theta[:,0].shape, 0.), np.full(theta[:,0].shape, 0.)
		m1[ids_inside], m2[ids_inside] = self.var_handler.get_BBH_components(theta[ids_inside], variable_format)[:,:2].T

		with warnings.catch_warnings():
			warnings.simplefilter("ignore", category = RuntimeWarning)
			if getattr(self.b_args, 'm1_range', None):
				ids_inside = np.logical_and(ids_inside, np.logical_and(m1>self.b_args.m1_range[0], m1<self.b_args.m1_range[1]))
			if getattr(self.b_args, 'm2_range', None):
				ids_inside = np.logical_and(ids_inside, np.logical_and(m2>self.b_args.m2_range[0], m2<self.b_args.m2_range[1]))
			if getattr(self.b_args, 'mtot_range', None):
				M = m1 + m2
				ids_inside = np.logical_and(ids_inside, np.logical_and(M>self.b_args.mtot_range[0], M<self.b_args.mtot_range[1]))
			if getattr(self.b_args, 'q_range', None):
				q = m1/m2
				ids_inside = np.logical_and(ids_inside, np.logical_and(q>self.b_args.q_range[0], q<self.b_args.q_range[1]))
			if getattr(self.b_args, 'mc_range', None):
				mc = (m1*m2)**(3/5)/(m1+m2)**(1/5)
				ids_inside = np.logical_and(ids_inside, np.logical_and(mc>self.b_args.mc_range[0], mc<self.b_args.mc_range[1]))
			if getattr(self.b_args, 'eta_range', None):
				eta = (m1*m2)/np.square(m1+m2)
				ids_inside = np.logical_and(ids_inside, np.logical_and(eta>self.b_args.eta_range[0], eta<self.b_args.eta_range[1]))
		return ids_inside