Prepare Data for MCMTpy¶
Format of MCMTpy input data:¶
ASDF file format, and the sampling rate
dtis consistent with that of GFs database.Must be ENZ three-component data and the name of the channel must be ‘E’/’N’/’Z’ order.
Note
Pyasdf files are automatically sorted by name. So you must make sure that the order of the Trace is still ENZ after the automatic sorting, generally the name of the channel ‘BHE’/’BHN’/’BHZ’ will be fine.
Trace.Stats.sac.t1/Trace.Stats.sac.t2 must be included in the trace header file to represent the P/S phase time.
The trace header file must include the station and network name.
The trace header file must include b/e/o/stla/stlo/stdp/evlo/evla/evdp/mag/dist/az/baz, time information, event longitude and latitude information, azimuth information.
The trace header file must include starttime/endtime.
All traces need to retain a uniform number of sampling points before T1, such as p_n0 = 50.
Example¶
We have provided some useful scripts for data preprocessing, and the example_path need to de changed to your path, and run this notebook.
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4import os
5import glob
6import obspy
7import shutil
8from obspy import Stream
9from obspy.taup import TauPyModel
10from MCMTpy.utils.asdf_function import Add_waveforms,Add_stationxml,get_QuakeML,Add_quakeml
11from MCMTpy.utils.asdf_function import get_StationXML
12
13#----------------------------------------------------#
14# 1.read raw data
15def read_data(allfiles_path):
16
17 allfiles = sorted( glob.glob(allfiles_path) )
18 data_raw = Stream()
19 for i in range(0,len(allfiles),1):
20 try:
21 tr = obspy.read(allfiles[i])
22 data_raw += tr
23 except Exception:
24 print(allfiles[i],': no such file or obspy read error');continue
25 data = data_raw.copy()
26
27 return data
28
29#----------------------------------------------------#
30# 2.get_taup_tp_ts
31def get_taup_tp_ts(model,depth,distance,degree=None):
32 if degree==False:
33 distance = distance/111.19
34
35 time_p = model.get_travel_times(source_depth_in_km=depth,
36 distance_in_degree=distance,
37 phase_list=["p", "P"])
38
39 time_s = model.get_travel_times(source_depth_in_km=depth,
40 distance_in_degree=distance,
41 phase_list=["s", "S"])
42
43 ray_p = time_p[0].ray_param
44 tp = time_p[0].time
45 angle_p = time_p[0].incident_angle
46
47 ray_s = time_s[0].ray_param
48 ts = time_s[0].time
49 angle_s = time_s[0].incident_angle
50
51 return ray_p,tp,angle_p,ray_s,ts,angle_s
52
53#----------------------------------------------------#
54# 3.preprocess
55def preprocess(data,freqmin,freqmax,samp_freq,p_n0,npts):
56
57 data.detrend(type='demean')
58 data.detrend(type='simple')
59 data.taper(max_percentage=0.05)
60
61 data.filter('bandpass', freqmin=freqmin, freqmax=freqmax, corners=4, zerophase=True) # bandpass
62 data.resample(samp_freq,window='hanning',no_filter=True, strict_length=False) # resample
63
64 for i in range(0,len(data),1):
65 b = data[i].stats.sac['b']
66 t1 = data[i].stats.sac['t1']
67 t_start = data[i].stats.starttime + (t1-b) - p_n0/samp_freq
68 t_end_npts = t_start+npts/samp_freq
69 t_endtime = data[i].stats.endtime
70 if t_end_npts > t_endtime:
71 t_end = t_endtime
72 else:
73 t_end = t_end_npts
74
75 data[i].trim(t_start, t_end)
76
77 for i in range(0,round(len(data)/3)):
78 t_start_1 = data[3*i].stats.starttime
79 t_start_2 = data[3*i+1].stats.starttime
80 t_start_3 = data[3*i+2].stats.starttime
81 t_end_1 = data[3*i].stats.endtime
82 t_end_2 = data[3*i+1].stats.endtime
83 t_end_3 = data[3*i+2].stats.endtime
84 t_start = max(t_start_1,t_start_2,t_start_3)
85 t_end = min(t_end_1,t_end_2,t_end_3)
86 data[3*i:3*i+3].trim(t_start, t_end)
87
88 # velocity(nm/s) to velocity(cm/s)
89 for i in range(0,len(data),1):
90 data[i].data = 1e-6*data[i].data
91
92#----------------------------------------------------#
93# 4.write_ASDF
94def write_ASDF(data_enz,Output_path,source_name,ASDF_filename):
95 source_lat = data_enz[0].stats.sac.evla
96 source_lon = data_enz[0].stats.sac.evlo
97 source_depth = data_enz[0].stats.sac.evdp
98 source_time = data_enz[0].stats.starttime + data_enz[0].stats.sac.b
99
100 Output_file_path = os.path.join(Output_path, ASDF_filename+'.h5')
101 catalog_create = get_QuakeML(source_name, source_lat, source_lon, source_depth,source_time)
102 Add_quakeml(Output_file_path,catalog_create)
103
104 Station_num = round(len(data_enz)/3)
105 for i in range(0,Station_num,1):
106 net_sta_name = data_enz[3*i].stats.network+'_'+data[3*i].stats.station
107 station_network = data_enz[3*i].stats.network
108 station_name = data_enz[3*i].stats.station
109 station_lat = data_enz[3*i].stats.sac.stla
110 station_lon = data_enz[3*i].stats.sac.stlo
111 station_depth = 0
112 station_distance = data_enz[3*i].stats.sac.dist
113 station_az = data_enz[3*i].stats.sac.az
114 station_baz = data[3*i].stats.sac.baz
115 tp = data_enz[3*i].stats.sac.t1
116 ts = data_enz[3*i].stats.sac.t2
117 stream = data_enz[3*i:3*i+3].copy()
118 starttime = data_enz[3*i].stats.starttime
119 time_before_first_arrival = p_n0/samp_freq
120
121 inv=get_StationXML(station_network,
122 station_name,
123 station_lat,
124 station_lon,
125 station_depth,
126 'enz')
127
128 Add_waveforms([stream],
129 catalog_create,
130 Output_file_path,
131 source_name,
132 tp,
133 ts,
134 station_distance,
135 station_az,
136 station_baz)
137
138 Add_stationxml(inv,
139 Output_file_path)
140
141 for j in range(0,3,1):
142 CHN = stream[j].stats.channel
143 sac_filename = os.path.join(Output_path,source_name+'.'+station_network+'.'+station_name+'.'+CHN+'.sac')
144 stream[j].write(sac_filename, format='SAC')
145#------------------------------------------------------------#
146
147
148
149
150#------------------------------------------------------------#
151# 0. main
152# The root directory of the project
153example_path = '/Users/yf/3.Project/8.MCMTpy/MCMTpy-master/data/example_yunnan'
154freqmin = 0.005 # pre filtering frequency bandwidth (hz)
155freqmax = 2 # note this cannot exceed Nquist frequency (hz)
156samp_freq = 5 # targeted sampling rate (hz)
157p_n0 = 50 # number of sampling points before P wave arrives
158npts = 2048 # data length
159Output_path = os.path.join(example_path,"YN.202105212148_Inv/YN.202105212148_MCMTpy")
160source_name = "source_enz" # asdf‘s source_tag
161ASDF_filename = "YN.202105212148" # asdf filename
162
163
164
165
166
167#------------------------------------------------------------#
168# we expect no parameters need to be changed below
169#------------------------------------------------------------#
170# 1. read raw data
171allfiles_path = os.path.join(example_path,'YN.202105212148_Inv/YN.202105212148_raw/*.SAC')
172data = read_data(allfiles_path)
173
174# 2. taup ray tracing
175model_path = os.path.join(example_path,"v_model/v_model.npz")
176model = TauPyModel(model=model_path)
177for i in range(0,len(data),1):
178 depth = data[i].stats.sac['evdp']
179 distance = data[i].stats.sac['dist']
180 ray_p,tp,angle_p,ray_s,ts,angle_s = get_taup_tp_ts(model,depth,distance,degree=False)
181 data[i].stats.sac["t1"]=tp
182 data[i].stats.sac["t2"]=ts
183
184# 3. preprocess
185preprocess(data,freqmin,freqmax,samp_freq,p_n0,npts)
186
187# 4. output ASDF and SAC data
188shutil.rmtree(Output_path)
189os.mkdir(Output_path)
190write_ASDF(data,Output_path,source_name,ASDF_filename)
Note
Please follow the above parameter instructions and set all parameters correctly before running the program. Otherwise, it is easy to report an error!