In [1]:
import sys
sys.path.insert(0,'/work/yifan.wang/machinelearning/bns/BnsLib/src')

# Import Libraries

In [2]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except Exception:
    print('noGPU')
    pass

import h5py  # noqa: E402
import numpy as np  # noqa: E402
import os  # noqa: E402
from tensorflow import keras  # noqa: E402
from argparse import ArgumentParser  # noqa: E402

from BnsLib.network import H5pyHandler, FileHandler, MultiFileHandler,\
                           PrefetchedFileGeneratorMP  # noqa: E402
from BnsLib.data import number_segments  # noqa: E402
from BnsLib.types import MultiArrayIndexer  # noqa: E402
from BnsLib.utils import inverse_string_format  # noqa: E402

2022-04-12 09:44:09.081589: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /work/yifan.wang/eccsearch/C:/work/yifan.wang/1-ecc-waveform-PE/IMRPhenomDecc:/work/yifan.wang/lscsoft/opt/accomlal/lib/:/work/yifan.wang/lscsoft/MultiNest/lib:
2022-04-12 09:44:09.081621: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


noGPU


2022-04-12 09:44:23.946216: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /work/yifan.wang/eccsearch/C:/work/yifan.wang/1-ecc-waveform-PE/IMRPhenomDecc:/work/yifan.wang/lscsoft/opt/accomlal/lib/:/work/yifan.wang/lscsoft/MultiNest/lib:
2022-04-12 09:44:23.946256: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-04-12 09:44:23.946282: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (condor1): /proc/driver/nvidia/version does not exist


# File handler

In [51]:
class NoiseHandler(H5pyHandler):
    def __init__(self, *args, window_size=2048, **kwargs):
        super().__init__(*args, **kwargs)
        self.window_size = window_size
        with h5py.File(self.file_path, 'r') as fp:
            self.calculate_lengths(fp)
    
    def __len__(self):
        return len(self.indexer)
    
    def calculate_lengths(self, fp):
        self.indexer = MultiArrayIndexer()
        for key in sorted(fp['H1'].keys(), key=lambda inp: int(inp)):
            ds = fp[f'H1/{key}']
            nsegs = number_segments(len(ds),
                                    self.window_size,
                                    self.window_size)
            self.indexer.add_length(nsegs, name=key)
    
    def _getitem_open(self, index, fp):
        key, idx = list(self.indexer[int(index)].items())[0]
        sidx = idx * self.window_size
        eidx = sidx + self.window_size
        data = fp[f'H1/{key}'][sidx:eidx]
        return np.expand_dims(data, axis=-1)
    
    def serialize(self):
        dic = super().serialize()
        dic.update({'window_size': self.window_size})
        return dic


class SignalHandler(H5pyHandler):
    def __init__(self, *args, nsamples=2048, **kwargs):
        super().__init__(*args, **kwargs)
        self.nsamples = nsamples
    
    def __len__(self):
        if self.file is None:
            with h5py.File(self.file_path, 'r') as fp:
                length = len(fp['data/0'])
        else:
            length = len(self.file['data/0'])
        return length
    
    def _getitem_open(self, index, fp):
        snr = np.random.randint(5, 15)
        data = fp['data/0'][index]
        nsamples = min(len(data), self.nsamples)
        label = np.array([1, 0])
        return snr * np.expand_dims(data[-nsamples:], axis=-1), label
    
    def serialize(self):
        dic = super().serialize()
        dic.update({'nsamples': self.nsamples})
        return dic


class NoSignalHandler(FileHandler):
    def __init__(self, shape=(2048, 1)):
        super().__init__(None)
        self.shape = shape
    
    def __contains__(self, index):
        return index == -1
    
    def __len__(self):
        return 1
    
    def open(self, mode='r'):
        return
    
    def close(self):
        return
    
    def __enter__(self):
        return
    
    def __exit__(self, exc_type, exc_code, exc_traceback):
        return
    
    def __getitem__(self, index):
        return np.zeros(self.shape), np.array([0, 1])
    
    def serialize(self):
        dic = {'shape': self.shape}
        return dic
    
    @classmethod
    def from_serialized(cls, dic):
        return cls(shape=dic['shape'])


class MultiHandler(MultiFileHandler):
    @classmethod
    def from_serialized(cls, dic):
        handlers = [NoiseHandler, SignalHandler, NoSignalHandler]
        return super().from_serialized(dic, handlers)
    
    def split_index_to_groups(self, index):
        nidx, sidx = index
        return {'noise': nidx, 'signal': sidx}
    
    def format_return(self, inp):
        signal, label = inp['signal']
        noise = inp['noise']
        return signal + noise, label


def get_generator(signal_files, noise_file, batch_size=16, shuffle=True,
                  nsig=None, noirange=None, seed=None, noise_per_signal=None,
                  ratio=1, prefetch=10, workers=10, input_shape=None,
                  output_shape=None):
    if input_shape is None:
        input_shape = (4 * 2048, 1)
    if output_shape is None:
        output_shape = (1, 1)
    
    # Setting up file-handlers
    mh = MultiHandler()
    mh.input_shape = input_shape
    mh.output_shape = output_shape
    nosig = NoSignalHandler(shape=mh.input_shape)
    mh.add_file_handler(nosig, group="signal")
    
    # Setting up signal handlers
    if not isinstance(signal_files, list):
        signal_files = [signal_files]
    total_sigs = 0
    for sigfile in signal_files:
        fh = SignalHandler(sigfile, base_index=total_sigs,
                           nsamples=mh.input_shape[0])
        length = len(fh)
        total_sigs += length
        mh.add_file_handler(fh, group="signal")
    
    # Setting up noise handlers
    nh = NoiseHandler(noise_file, window_size=mh.input_shape[0])
    mh.add_file_handler(nh, group="noise")
    
    # Generate index list
    if nsig is None:
        nsig = total_sigs
    sigidxs = np.array(list(range(nsig)), dtype=int)
    print('sigidxs: ',sigidxs)
    
    if noirange is None:
        noirange = (0, len(nh))
    elif isinstance(noirange, int):
        noirange = (0, noirange)
    noiidxs = np.array(list(range(*noirange)), dtype=int)
    print('noiidxs: ',noiidxs)
    
    rs = np.random.RandomState(seed)
    index_list = []
    if noise_per_signal is None:
        noise_per_signal = 1
    for i in range(noise_per_signal):
        nidxs = rs.randint(0, len(noiidxs), size=len(sigidxs))
        index_list.extend(list(np.stack([noiidxs[nidxs], sigidxs]).T))
    print('index_list: ', index_list)
    num_pure_noise = int(len(index_list) / ratio)
    pure_noise_indices = np.random.choice(np.arange(len(nh)),
                                          size=num_pure_noise)
    index_list.extend([[pni, -1] for pni in pure_noise_indices])
    print('index_list: ', index_list)
    print('num_pure_noise: ',num_pure_noise)
    # Instantiate generator
    generator = PrefetchedFileGeneratorMP(mh, index_list,
                                          batch_size=batch_size,
                                          shuffle=shuffle,
                                          prefetch=prefetch,
                                          workers=workers)
    
    return generator

# Machine Learning Model

In [5]:
def get_model():
    # from tensorflow.keras import layers

    def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
        # Normalization and Attention
        x = keras.layers.LayerNormalization(epsilon=1e-6)(inputs)
        x = keras.layers.MultiHeadAttention(
            key_dim=head_size, num_heads=num_heads, dropout=dropout
        )(x, x)
        x = keras.layers.Dropout(dropout)(x)
        res = x + inputs

        # Feed Forward Part
        x = keras.layers.LayerNormalization(epsilon=1e-6)(res)
        x = keras.layers.Conv1D(filters=ff_dim, kernel_size=1,
                                activation="relu")(x)
        x = keras.layers.Dropout(dropout)(x)
        x = keras.layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
        return x + res

    def build_model(
        input_shape,
        head_size,
        num_heads,
        ff_dim,
        num_transformer_blocks,
        mlp_units,
        dropout=0,
        mlp_dropout=0,
    ):
        inputs = keras.Input(shape=input_shape)
        x = inputs
        for _ in range(num_transformer_blocks):
            x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

        x = keras.layers.GlobalAveragePooling1D(data_format="channels_first")(x)  # noqa: E501
        
        for dim in mlp_units:
            x = keras.layers.Dense(dim, activation="relu")(x)
            x = keras.layers.Dropout(mlp_dropout)(x)
        outputs = keras.layers.Dense(2, activation="softmax")(x)
        return keras.Model(inputs, outputs)
    
    input_shape = (2048, 1)
    
    model = build_model(input_shape,
                        head_size=256,
                        num_heads=4,
                        ff_dim=4,
                        num_transformer_blocks=4,
                        mlp_units=[128],
                        mlp_dropout=0.4,
                        dropout=0.25)
    return model


def compile_model(model):
    binacc = keras.metrics.BinaryAccuracy()
    opti = keras.optimizers.Adam(learning_rate=0.002, beta_1=0.9,
                                 beta_2=0.999, epsilon=1e-8)
    model.compile(loss='binary_crossentropy', metrics=[binacc],
                  optimizer=opti)
    return model

In [6]:
model = get_model()
model = compile_model(model)

2022-04-12 09:45:29.876702: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
model.input_names

['input_1']

In [10]:
model.input_shape

(None, 2048, 1)

In [8]:
model.input_shape[1:]

(2048, 1)

In [9]:
model.output_shape[1:]

(2,)

# The filter() function extracts elements from an iterable (list, tuple etc.) for which a function returns True.

In [11]:
form = 'signals-{index}.hdf'

sigfiles = list(filter(lambda fn: inverse_string_format(fn, form) is not None,  # noqa: E501
                           os.listdir('./transformer/t1-interactive/whitened/')))
sigfiles = [os.path.join('./transformer/t1-interactive/whitened/', fn) for fn in sigfiles]

In [12]:
sigfiles

['./transformer/t1-interactive/whitened/signals-32.hdf',
 './transformer/t1-interactive/whitened/signals-35.hdf',
 './transformer/t1-interactive/whitened/signals-16.hdf',
 './transformer/t1-interactive/whitened/signals-11.hdf',
 './transformer/t1-interactive/whitened/signals-18.hdf',
 './transformer/t1-interactive/whitened/signals-1.hdf',
 './transformer/t1-interactive/whitened/signals-6.hdf',
 './transformer/t1-interactive/whitened/signals-8.hdf',
 './transformer/t1-interactive/whitened/signals-24.hdf',
 './transformer/t1-interactive/whitened/signals-23.hdf',
 './transformer/t1-interactive/whitened/signals-34.hdf',
 './transformer/t1-interactive/whitened/signals-33.hdf',
 './transformer/t1-interactive/whitened/signals-22.hdf',
 './transformer/t1-interactive/whitened/signals-9.hdf',
 './transformer/t1-interactive/whitened/signals-25.hdf',
 './transformer/t1-interactive/whitened/signals-7.hdf',
 './transformer/t1-interactive/whitened/signals-0.hdf',
 './transformer/t1-interactive/whiten

In [13]:
valsplit = 0.8
valsplitidx = int(len(sigfiles) * valsplit)

In [14]:
len(sigfiles)

40

In [15]:
valsplitidx

32

In [16]:
trsigfiles = sigfiles[:valsplitidx]
valsigfiles = sigfiles[valsplitidx:]

In [17]:
trsigfiles

['./transformer/t1-interactive/whitened/signals-32.hdf',
 './transformer/t1-interactive/whitened/signals-35.hdf',
 './transformer/t1-interactive/whitened/signals-16.hdf',
 './transformer/t1-interactive/whitened/signals-11.hdf',
 './transformer/t1-interactive/whitened/signals-18.hdf',
 './transformer/t1-interactive/whitened/signals-1.hdf',
 './transformer/t1-interactive/whitened/signals-6.hdf',
 './transformer/t1-interactive/whitened/signals-8.hdf',
 './transformer/t1-interactive/whitened/signals-24.hdf',
 './transformer/t1-interactive/whitened/signals-23.hdf',
 './transformer/t1-interactive/whitened/signals-34.hdf',
 './transformer/t1-interactive/whitened/signals-33.hdf',
 './transformer/t1-interactive/whitened/signals-22.hdf',
 './transformer/t1-interactive/whitened/signals-9.hdf',
 './transformer/t1-interactive/whitened/signals-25.hdf',
 './transformer/t1-interactive/whitened/signals-7.hdf',
 './transformer/t1-interactive/whitened/signals-0.hdf',
 './transformer/t1-interactive/whiten

In [18]:
valsigfiles

['./transformer/t1-interactive/whitened/signals-5.hdf',
 './transformer/t1-interactive/whitened/signals-29.hdf',
 './transformer/t1-interactive/whitened/signals-2.hdf',
 './transformer/t1-interactive/whitened/signals-20.hdf',
 './transformer/t1-interactive/whitened/signals-27.hdf',
 './transformer/t1-interactive/whitened/signals-36.hdf',
 './transformer/t1-interactive/whitened/signals-31.hdf',
 './transformer/t1-interactive/whitened/signals-38.hdf']

In [19]:
noifile = os.path.join('./transformer/t1-interactive/whitened/', 'noise_mp.hdf')

In [20]:
noise = h5py.File(noifile,'r')

In [21]:
noise.keys()

<KeysViewHDF5 ['H1', 'L1']>

In [22]:
noise['H1']['1238205073']

<HDF5 dataset "1238205073": shape (20596736,), type "<f8">

In [23]:
valsplitidx * 20_000

640000

In [24]:
valsplitidx

32

In [53]:
gen = get_generator(trsigfiles, noifile,
                        nsig=100,
                        noirange=(0, 100),
                        input_shape=model.input_shape[1:],
                        output_shape=model.output_shape[1:])

sigidxs:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
noiidxs:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
index_list:  [array([42,  0]), array([36,  1]), array([47,  2]), array([50,  3]), array([10,  4]), array([67,  5]), array([93,  6]), array([5, 7]), array([96,  8]), array([86,  9]), array([26, 10]), array([67, 11]), array([65, 12]), array([ 6, 13]), array([41, 14]), array([12, 15]), array([66, 16]), array([71, 17]), array([91, 18]), array([26, 19]), array([49, 20]),

In [54]:
valgen = get_generator(valsigfiles, noifile,
                           nsig=100,
                           noirange=(20_000,
                                     20_100),
                           input_shape=model.input_shape[1:],
                           output_shape=model.output_shape[1:])

sigidxs:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
noiidxs:  [20000 20001 20002 20003 20004 20005 20006 20007 20008 20009 20010 20011
 20012 20013 20014 20015 20016 20017 20018 20019 20020 20021 20022 20023
 20024 20025 20026 20027 20028 20029 20030 20031 20032 20033 20034 20035
 20036 20037 20038 20039 20040 20041 20042 20043 20044 20045 20046 20047
 20048 20049 20050 20051 20052 20053 20054 20055 20056 20057 20058 20059
 20060 20061 20062 20063 20064 20065 20066 20067 20068 20069 20070 20071
 20072 20073 20074 20075 20076 20077 20078 20079 20080 20081 20082 20083
 20084 20085 20086 20087 20088 20089 20090 20091 20092 20093 20094 20095
 20096 20097 20098 20099]
index_list:  [array([20067,     0]), array([20056,     1]), arra

# Play around with the signal handeler

In [25]:
fh = SignalHandler(sigfiles[0], base_index=0,
                           nsamples=2048)

In [26]:
fh

<__main__.SignalHandler at 0x7f64143d6dd8>

In [28]:
#length is 10000
len(fh)

10000

In [30]:
f = h5py.File(sigfiles[0],'r')

In [31]:
f.keys()

<KeysViewHDF5 ['data', 'labels', 'params']>

In [36]:
f['data']['0']

<HDF5 dataset "0": shape (10000, 32768), type "<f8">

In [37]:
32768/16

2048.0

# csv logger

In [43]:
csvpath = os.path.join('./', 'history.csv')
csvlogger = keras.callbacks.CSVLogger(csvpath)

In [44]:
csvlogger

<keras.callbacks.CSVLogger at 0x7f63f064f908>

In [45]:
ckptpath = os.path.join('./', 'model_{epoch}')
ckpt = keras.callbacks.ModelCheckpoint(ckptpath, save_best_only=False,
                                           save_weights_only=False,
                                           save_freq='epoch')

In [None]:
with gen, valgen:
    model.fit(gen, validation_data=valgen, shuffle=False,
                  workers=0, use_multiprocessing=False,
                  callbacks=[csvlogger, ckpt], epochs=100)

Epoch 1/100

2022-04-12 11:02:32.215352: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ./model_1/assets


INFO:tensorflow:Assets written to: ./model_1/assets


Epoch 2/100



INFO:tensorflow:Assets written to: ./model_2/assets


INFO:tensorflow:Assets written to: ./model_2/assets


Epoch 3/100



INFO:tensorflow:Assets written to: ./model_3/assets


INFO:tensorflow:Assets written to: ./model_3/assets


Epoch 4/100



INFO:tensorflow:Assets written to: ./model_4/assets


INFO:tensorflow:Assets written to: ./model_4/assets


Epoch 5/100



INFO:tensorflow:Assets written to: ./model_5/assets


INFO:tensorflow:Assets written to: ./model_5/assets


Epoch 6/100



INFO:tensorflow:Assets written to: ./model_6/assets


INFO:tensorflow:Assets written to: ./model_6/assets


Epoch 7/100