Prepare Data for MCMTpy

Format of MCMTpy input data:

  • ASDF file format, and the sampling rate dt is consistent with that of GFs database.

  • Must be ENZ three-component data and the name of the channel must be ‘E’/’N’/’Z’ order.

Note

Pyasdf files are automatically sorted by name. So you must make sure that the order of the Trace is still ENZ after the automatic sorting, generally the name of the channel ‘BHE’/’BHN’/’BHZ’ will be fine.

  • Trace.Stats.sac.t1/Trace.Stats.sac.t2 must be included in the trace header file to represent the P/S phase time.

  • The trace header file must include the station and network name.

  • The trace header file must include b/e/o/stla/stlo/stdp/evlo/evla/evdp/mag/dist/az/baz, time information, event longitude and latitude information, azimuth information.

  • The trace header file must include starttime/endtime.

  • All traces need to retain a uniform number of sampling points before T1, such as p_n0 = 50.

Example

  • We have provided some useful scripts for data preprocessing, and the example_path need to de changed to your path, and run this notebook.

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3
  4import os
  5import glob
  6import obspy
  7import shutil
  8from obspy import Stream
  9from obspy.taup import TauPyModel
 10from MCMTpy.utils.asdf_function import Add_waveforms,Add_stationxml,get_QuakeML,Add_quakeml
 11from MCMTpy.utils.asdf_function import get_StationXML
 12
 13#----------------------------------------------------#
 14# 1.read raw data
 15def read_data(allfiles_path):
 16
 17    allfiles = sorted( glob.glob(allfiles_path) )
 18    data_raw = Stream()
 19    for i in range(0,len(allfiles),1):
 20        try:
 21            tr = obspy.read(allfiles[i])
 22            data_raw += tr
 23        except Exception:
 24            print(allfiles[i],': no such file or obspy read error');continue
 25    data = data_raw.copy()
 26    
 27    return data
 28
 29#----------------------------------------------------#
 30# 2.get_taup_tp_ts
 31def get_taup_tp_ts(model,depth,distance,degree=None):
 32    if degree==False:
 33        distance = distance/111.19
 34
 35    time_p = model.get_travel_times(source_depth_in_km=depth,
 36                                    distance_in_degree=distance,
 37                                    phase_list=["p", "P"])
 38
 39    time_s = model.get_travel_times(source_depth_in_km=depth,
 40                                    distance_in_degree=distance,
 41                                    phase_list=["s", "S"])
 42
 43    ray_p = time_p[0].ray_param
 44    tp = time_p[0].time
 45    angle_p = time_p[0].incident_angle
 46
 47    ray_s = time_s[0].ray_param
 48    ts = time_s[0].time
 49    angle_s = time_s[0].incident_angle
 50
 51    return ray_p,tp,angle_p,ray_s,ts,angle_s
 52
 53#----------------------------------------------------#
 54# 3.preprocess
 55def preprocess(data,freqmin,freqmax,samp_freq,p_n0,npts):
 56
 57    data.detrend(type='demean')
 58    data.detrend(type='simple')
 59    data.taper(max_percentage=0.05)
 60
 61    data.filter('bandpass', freqmin=freqmin, freqmax=freqmax, corners=4, zerophase=True)  # bandpass
 62    data.resample(samp_freq,window='hanning',no_filter=True, strict_length=False)         # resample
 63
 64    for i in range(0,len(data),1):
 65        b = data[i].stats.sac['b']
 66        t1 = data[i].stats.sac['t1']
 67        t_start = data[i].stats.starttime + (t1-b) - p_n0/samp_freq
 68        t_end_npts = t_start+npts/samp_freq
 69        t_endtime = data[i].stats.endtime
 70        if t_end_npts > t_endtime:
 71            t_end = t_endtime
 72        else:
 73            t_end = t_end_npts
 74
 75        data[i].trim(t_start, t_end)
 76
 77    for i in range(0,round(len(data)/3)):
 78        t_start_1 = data[3*i].stats.starttime
 79        t_start_2 = data[3*i+1].stats.starttime
 80        t_start_3 = data[3*i+2].stats.starttime
 81        t_end_1 = data[3*i].stats.endtime
 82        t_end_2 = data[3*i+1].stats.endtime
 83        t_end_3 = data[3*i+2].stats.endtime
 84        t_start = max(t_start_1,t_start_2,t_start_3)
 85        t_end = min(t_end_1,t_end_2,t_end_3)
 86        data[3*i:3*i+3].trim(t_start, t_end)
 87
 88    # velocity(nm/s) to velocity(cm/s)
 89    for i in range(0,len(data),1):
 90        data[i].data = 1e-6*data[i].data
 91
 92#----------------------------------------------------#
 93# 4.write_ASDF
 94def write_ASDF(data_enz,Output_path,source_name,ASDF_filename):
 95    source_lat   = data_enz[0].stats.sac.evla
 96    source_lon   = data_enz[0].stats.sac.evlo
 97    source_depth = data_enz[0].stats.sac.evdp
 98    source_time  = data_enz[0].stats.starttime + data_enz[0].stats.sac.b
 99
100    Output_file_path = os.path.join(Output_path, ASDF_filename+'.h5')
101    catalog_create   = get_QuakeML(source_name, source_lat, source_lon, source_depth,source_time)
102    Add_quakeml(Output_file_path,catalog_create)
103
104    Station_num = round(len(data_enz)/3)
105    for i in range(0,Station_num,1): 
106        net_sta_name     = data_enz[3*i].stats.network+'_'+data[3*i].stats.station
107        station_network  = data_enz[3*i].stats.network
108        station_name     = data_enz[3*i].stats.station
109        station_lat      = data_enz[3*i].stats.sac.stla
110        station_lon      = data_enz[3*i].stats.sac.stlo
111        station_depth    = 0
112        station_distance = data_enz[3*i].stats.sac.dist
113        station_az       = data_enz[3*i].stats.sac.az
114        station_baz      = data[3*i].stats.sac.baz
115        tp               = data_enz[3*i].stats.sac.t1
116        ts               = data_enz[3*i].stats.sac.t2
117        stream           = data_enz[3*i:3*i+3].copy()
118        starttime        = data_enz[3*i].stats.starttime
119        time_before_first_arrival = p_n0/samp_freq
120
121        inv=get_StationXML(station_network, 
122                           station_name,
123                           station_lat, 
124                           station_lon, 
125                           station_depth, 
126                           'enz')
127
128        Add_waveforms([stream], 
129                      catalog_create,
130                      Output_file_path,
131                      source_name,
132                      tp,
133                      ts,
134                      station_distance,
135                      station_az,
136                      station_baz) 
137
138        Add_stationxml(inv,
139                       Output_file_path) 
140
141        for j in range(0,3,1): 
142            CHN = stream[j].stats.channel
143            sac_filename = os.path.join(Output_path,source_name+'.'+station_network+'.'+station_name+'.'+CHN+'.sac')
144            stream[j].write(sac_filename, format='SAC')
145#------------------------------------------------------------#
146
147
148
149
150#------------------------------------------------------------#
151# 0. main
152# The root directory of the project
153example_path = '/Users/yf/3.Project/8.MCMTpy/MCMTpy-master/data/example_yunnan'
154freqmin       = 0.005                               # pre filtering frequency bandwidth (hz)
155freqmax       = 2                                   # note this cannot exceed Nquist frequency (hz)
156samp_freq     = 5                                   # targeted sampling rate (hz)
157p_n0          = 50                                  # number of sampling points before P wave arrives
158npts          = 2048                                # data length
159Output_path   = os.path.join(example_path,"YN.202105212148_Inv/YN.202105212148_MCMTpy")
160source_name   = "source_enz"                        # asdf‘s source_tag
161ASDF_filename = "YN.202105212148"                   # asdf filename
162
163
164
165
166
167#------------------------------------------------------------#
168# we expect no parameters need to be changed below
169#------------------------------------------------------------#
170# 1. read raw data
171allfiles_path = os.path.join(example_path,'YN.202105212148_Inv/YN.202105212148_raw/*.SAC')  
172data = read_data(allfiles_path)
173
174# 2. taup ray tracing
175model_path = os.path.join(example_path,"v_model/v_model.npz")
176model = TauPyModel(model=model_path)
177for i in range(0,len(data),1):
178    depth = data[i].stats.sac['evdp']
179    distance = data[i].stats.sac['dist']
180    ray_p,tp,angle_p,ray_s,ts,angle_s = get_taup_tp_ts(model,depth,distance,degree=False)
181    data[i].stats.sac["t1"]=tp
182    data[i].stats.sac["t2"]=ts
183
184# 3. preprocess
185preprocess(data,freqmin,freqmax,samp_freq,p_n0,npts)
186
187# 4. output ASDF and SAC data
188shutil.rmtree(Output_path)
189os.mkdir(Output_path)
190write_ASDF(data,Output_path,source_name,ASDF_filename)

Note

Please follow the above parameter instructions and set all parameters correctly before running the program. Otherwise, it is easy to report an error!