#!/usr/bin/env python # coding: utf-8 # # Convert DAS H5 file into Miniseed # This will convert DAS h5 file into miniseed including decimation. # By Taka'aki Taira, Peter Hubbard, Yuancong Gou # import h5py import numpy as np import matplotlib.pyplot as plt import glob import datetime import pytz import obspy import io from obspy import read, Trace, Stream, UTCDateTime #from tqdm.notebook import tqdm,trange from tqdm import tqdm,trange # ## Data (h5 file) location data_path = './MBARI_GL20m_OCP5m_FS1kHz_2022-09-11' print("# data_path = ", data_path) # ## Channel range start_channel = 0 channel_num = 10245 end_channel = start_channel + channel_num print("# start_channel = ", start_channel) print("# end_channel = ", end_channel) chan_range = [start_channel, end_channel] print("# chan_range = ", chan_range) # low pass filter fl = 50 # 1sps case. teleseismic event. Alaska etc #fl = 1 print("# fl = ", fl, "Hz") # ## Raeding h5 file # original code from Peter. added start_unix_time from the first h5 file. this is used to define the startime time of miniseed file. for h5name in sorted(glob.glob(data_path + '/*.h5')): print("# h5name = ", h5name) f = h5py.File(h5name, 'r') list(f.keys()) RawData = f['Acquisition']['Raw[0]']['RawData'] RawDataTime = f['Acquisition']['Raw[0]']['RawDataTime'] RawData[100][0:10] # Original code from Peter Hubbard row1 = 0 count = 0 for h5name in tqdm(sorted(glob.glob(data_path + '/*.h5'))): print("# h5name = ", h5name) count += 1 f = h5py.File(h5name, 'r') RawData = f['Acquisition']['Raw[0]']['RawData'] RawDataTime = f['Acquisition']['Raw[0]']['RawDataTime'] if row1 == 0: all_RawData = np.transpose(RawData[:,chan_range[0]:chan_range[1]]) all_RawDataTime = np.array(RawDataTime) row1 = all_RawData.shape[0] start_unix_time = RawDataTime[0] else: all_RawData = np.append(all_RawData,np.transpose(RawData[:,chan_range[0]:chan_range[1]]),axis=1) all_RawDataTime = np.append(all_RawDataTime,RawDataTime,axis=0) row1 = all_RawData.shape[0] f.close() # This depends on the shape of raw data array, which may differ from different set-up. #all_RawData = np.transpose(all_RawData) # if the data is in HDF5 PRODML format, # to obtain phase shift values in “rad” divide each sample value by 10430.378850470453 #all_RawData = all_RawData*np.pi/2**15 all_RawData = all_RawData / 10430.378850470453 channels = all_RawData.shape[0] total_time = (all_RawDataTime[-1] - all_RawDataTime[0])/10**6 print(str(count)+' files imported. There are '+ str(total_time) + ' seconds recorded on '+ str(channels) + ' channels...') print ("# start_unix_time = ", start_unix_time) time_utc = datetime.datetime.fromtimestamp(start_unix_time/10**6, pytz.timezone('UTC')) print(time_utc) # ## Sampling rate # Original code from Peter Hubbard # In[15]: fs = 1/(all_RawDataTime[1]-all_RawDataTime[0])*10**6 print("# h5 file raw data sampling rate fs =", fs, "Hz") t = (all_RawDataTime - all_RawDataTime[0])/10**6 print(t) # ## Reading data # 1. read each trace, 2. decimate into 100sps data (or 1sps) assuming that the original is 1000sps data. station code=channel, location code = BK, channel/component = EPX, location code = 00 # # for i in trange(all_RawData.shape[0]): data = all_RawData[i,:] tr = Trace(data=data) tr.stats.sampling_rate = fs tr.stats.starttime = time_utc tr.stats.network = "MARS" tr.stats.station = str(start_channel+i).zfill(5) tr.stats.channel = "EPX" # 1sps case #tr.stats.channel = "LPX" tr.stats.location = "00" # remove linear trend #tr.detrend(type='linear') # remove mean #tr.detrend(type='demean') # taper #tr.taper(0.05) # lowpass filter #tr_fil = tr.copy().filter('lowpass', freq=fl, corners=6, zerophase=True) # assuming original 1000 Hz data -> decimated into 100Hz #tr_deci = tr_fil.copy() #.decimate(factor=10,,no_filter=True) # 1sps case #tr_deci = tr_fil.copy().decimate(factor=5).decimate(factor=5).decimate(factor=2).decimate(factor=5).decimate(factor=2).decimate(factor=5) #print(tr_deci.stats) if i == 0: st = Stream(traces=tr) else: st += Stream(traces=tr) print(st) """ Original code from Peter Hubbard The conversion of optical phase shift is in details described in this SEAFOM report : https://seafom.com/mdocs-posts/seafom_msp_02-august-2018-pdf , as shown in equations 9 and 10 on page 26. """ def optic_phase_to_starin(obspy_st,gauge_length, refractive_index=1.468, optical_wavelength=1550, pe_scaling_factor = 0.78): #-------------------- # This function is in-place operation in order to save memory, make a copy before using # # obspy_st : Obspy Stream object # gauge_length : gauge length in m # refractive_index : refractive index which depends on type of fiber # optical_wavelength : the operational optical (vacuum) wavelength of the DAS system # pe_scaling_factor : the photo-elastic scaling factor for longitudinal strain in isotropic material #-------------------- channels = len(obspy_st) record_length = len(obspy_st[0].data) strain = np.zeros((channels,record_length)) for i in trange(channels): data = obspy_st[i].data sig_phi = data #radians sig_ep2 = optical_wavelength*(10**-9)*sig_phi/(4*np.pi*refractive_index*gauge_length*pe_scaling_factor) # nm epsilon obspy_st[i].data = sig_ep2 return obspy_st # In[20]: st = optic_phase_to_starin(st,gauge_length=20) # ## output path msOUT = data_path+'/'+'strain_'+str(st[0].stats.starttime)+'_'+str(st[0].stats.endtime)+'.mseed' print("# msOUT = ", msOUT) # ## Writing data into miniseed format st.write(msOUT, format="MSEED")