diff --git a/uvplot/tests.py b/uvplot/tests.py index f4e7990..9607fa5 100644 --- a/uvplot/tests.py +++ b/uvplot/tests.py @@ -84,3 +84,31 @@ def test_uvcut(): assert_allclose(uvt.weights, uv.weights[uvcut]) assert uv.header == uvt.header + + +def test_uvbin(): + uvtable_filename= "/tmp/uvtable.txt" + uvtable, wle = create_sample_uvtable(uvtable_filename) + + uv = UVTable(filename=uvtable_filename, wle=wle, columns=COLUMNS_V0) + + uvbin_size = 5e4 + uvmin = 3e6 + uvmax = uvmin + uvbin_size + + uv.uvbin(uvbin_size) + + # Check contents of each uvbin agrees with bin_quantity + bin_re, bin_re_err = uv.bin_quantity(uv.re) + + assert_allclose(bin_re, uv.bin_re) + assert_allclose(bin_re_err, uv.bin_re_err) + + # Check the contents of one bin is correct + idx = (uv.uvdist >= uvmin) & (uv.uvdist < uvmax) + + if np.sum(idx > 0): + assert np.all(uv.bin_index[idx] == uv.bin_index[idx][0]) + + id_bin = (uv.bin_uvdist >= uvmin) & (uv.bin_uvdist < uvmax) + assert np.sum(idx) == np.sum(uv.bin_count[id_bin]) diff --git a/uvplot/uvtable.py b/uvplot/uvtable.py index 9699c7f..3ccd957 100644 --- a/uvplot/uvtable.py +++ b/uvplot/uvtable.py @@ -403,7 +403,7 @@ def uvdist(self): return self._uvdist - def uvbin(self, uvbin_size, **kwargs): + def uvbin(self, uvbin_size, use_std=False): """ Compute the intervals (bins) of the uv-distances given the size of the bin (bin_size_wle). @@ -417,29 +417,74 @@ def uvbin(self, uvbin_size, **kwargs): To compute the weights, we do not need to divide by the weight_corr factor since it cancels out when we compute """ - self.nbins = np.ceil(self.uvdist.max() / uvbin_size).astype('int') + uvdist = self.uvdist # Cache as its a property + self.nbins = np.ceil(uvdist.max() / uvbin_size).astype('int') self.bin_uvdist = np.zeros(self.nbins) self.bin_weights = np.zeros(self.nbins) self.bin_count = np.zeros(self.nbins, dtype='int') - self.uv_intervals = [] self.uv_bin_edges = np.arange(self.nbins + 1, dtype='float64') * uvbin_size - for i in range(self.nbins): - uv_interval = np.where((self.uvdist >= self.uv_bin_edges[i]) & - (self.uvdist < self.uv_bin_edges[i + 1])) - self.bin_count[i] = len(uv_interval[0]) - if self.bin_count[i] != 0: - self.bin_uvdist[i] = self.uvdist[uv_interval].sum() / self.bin_count[i] - self.bin_weights[i] = np.sum(self.weights[uv_interval]) - else: - self.bin_uvdist[i] = self.uv_bin_edges[i] + 0.5 * uvbin_size + # Pre-compute the bins each data point falls into + norm = 1 / uvbin_size + self.bin_index = np.floor(uvdist * norm).astype('int32') + # Increment to make sure that only the last bin includes the upper edge + increment = ((uvdist == self.uv_bin_edges[self.bin_index+1]) & + (self.bin_index+1 != self.nbins)) + self.bin_index[increment] += 1 + + + # Setup the data arrays + self.bin_re = np.zeros(self.nbins, dtype='float64') + self.bin_re_err = np.zeros(self.nbins, dtype='float64') + + self.bin_im = np.zeros(self.nbins, dtype='float64') + self.bin_im_err = np.zeros(self.nbins, dtype='float64') + + + # Use blocking since its faster and needs less memory + BLOCK = 65536 + for i in range(0, len(uvdist), BLOCK): + tmp_uv = uvdist[i:i+BLOCK] + tmp_re = self.re[i:i+BLOCK] + tmp_im = self.im[i:i+BLOCK] + tmp_w = self.weights[i:i+BLOCK] + + idx = self.bin_index[i:i+BLOCK] + + self.bin_count += np.bincount(idx, minlength=self.nbins) + + self.bin_uvdist += np.bincount(idx, weights=tmp_w*tmp_uv, + minlength=self.nbins) - self.uv_intervals.append(uv_interval) + self.bin_re += np.bincount(idx, weights=tmp_w*tmp_re, + minlength=self.nbins) + self.bin_im += np.bincount(idx, weights=tmp_w*tmp_im, + minlength=self.nbins) - self.bin_re, self.bin_re_err = self.bin_quantity(self.re, **kwargs) - self.bin_im, self.bin_im_err = self.bin_quantity(self.im, **kwargs) + self.bin_weights += np.bincount(idx, weights=tmp_w, + minlength=self.nbins) + + if use_std: + self.bin_re_err += np.bincount(idx, weights=tmp_w*tmp_re**2, + minlength=self.nbins) + self.bin_im_err += np.bincount(idx, weights=tmp_w*tmp_im**2, + minlength=self.nbins) + + idx = self.bin_count > 0 + w = self.bin_weights[idx] + self.bin_re[idx] /= w + self.bin_im[idx] /= w + self.bin_uvdist[idx] /= w + if use_std: + self.bin_re_err[idx] /= w + self.bin_im_err[idx] /= w + self.bin_re_err = np.sqrt(bin_re_err - bin_re**2) + self.bin_im_err = np.sqrt(bin_im_err - bin_im**2) + else: + self.bin_re_err[idx] = 1 / np.sqrt(w) + self.bin_im_err[idx] = 1 / np.sqrt(w) def bin_quantity(self, x, use_std=False): """ @@ -460,17 +505,30 @@ def bin_quantity(self, x, use_std=False): """ bin_x, bin_x_err = np.zeros(self.nbins), np.zeros(self.nbins) - - for i in range(self.nbins): - - if self.bin_count[i] != 0: - bin_x[i] = np.sum(x[self.uv_intervals[i]] * self.weights[self.uv_intervals[i]]) / \ - self.bin_weights[i] - - if use_std is True: - bin_x_err[i] = np.std(x[self.uv_intervals[i]]) - else: - bin_x_err[i] = 1. / np.sqrt(self.bin_weights[i]) + + # Use blocking since its faster and needs less memory + BLOCK = 65536 + for i in range(0, len(self.u), BLOCK): + tmp_x = x[i:i+BLOCK] + tmp_w = self.weights[i:i+BLOCK] + + idx = self.bin_index[i:i+BLOCK] + + bin_x += np.bincount(idx, weights=tmp_w*tmp_x, + minlength=self.nbins) + + if use_std: + bin_x_err += np.bincount(idx, weights=tmp_w*tmp_x**2, + minlength=self.nbins) + + idx = self.bin_count > 0 + w = self.bin_weights[idx] + bin_x[idx] /= w + if use_std: + bin_x_err[idx] /= w + bin_x_err = np.sqrt(bin_x_err - bin_x**2) + else: + bin_x_err[idx] = 1 / np.sqrt(w) return bin_x, bin_x_err