Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ea73526
created skeleton for refine_acceleration() and it's helper method
JSMTHWCK Mar 11, 2026
62f5132
created new function align_datasets in utils, reworking refine_accele…
JSMTHWCK Mar 20, 2026
496336a
align datasets created, refine_acceleration working implmentation
JSMTHWCK Mar 23, 2026
bba3cc9
added comments to explain logic
JSMTHWCK Mar 24, 2026
cdd1903
moved method from filters to info, created test methods
JSMTHWCK Mar 24, 2026
4a9dae4
working impl, few hacks to remove before PR
JSMTHWCK Mar 25, 2026
c01d180
updated signature of resample and adjusted docs for PR
JSMTHWCK Mar 26, 2026
6e92979
updated some sphinx docs, removed unnecessary import
JSMTHWCK Mar 26, 2026
3bcb8e8
extra uneeded imports removed
JSMTHWCK Mar 26, 2026
5d4e6dc
fixed unit tests issues
JSMTHWCK Mar 26, 2026
50c98e4
Merge branch 'development' of github.com:MideTechnology/endaq-python …
JSMTHWCK Mar 26, 2026
d98c01d
Disabled fail-on-warning pytest arguments
StokesMIDE Mar 26, 2026
2a2972a
Removed warning-as-error arguments
StokesMIDE Mar 26, 2026
ee63c99
added future_stack = False to accomodate for pandas 3.0.0
JSMTHWCK Mar 26, 2026
bc1208f
Merge branch 'feature/ES-876_combined_acceleration' of github.com:Mid…
JSMTHWCK Mar 26, 2026
2f34f59
added way of getting sensor range, added pin to pandas dependency
JSMTHWCK Mar 26, 2026
9dcc735
modifying version specifications for requirements.txt
JSMTHWCK Mar 31, 2026
8abc503
Merge branch 'development' of github.com:MideTechnology/endaq-python …
JSMTHWCK Apr 1, 2026
c4fe01b
moved methods into from ide/info.py to ide/util.py, added test methods
JSMTHWCK Apr 1, 2026
8d5e916
updated Exceptions to ValueErrors, updated some typing errors
JSMTHWCK Apr 3, 2026
1933d85
Merge branch 'development' of github.com:MideTechnology/endaq-python …
JSMTHWCK Apr 3, 2026
571a5d3
added test methods for get_acelerometer_bounds
JSMTHWCK Apr 3, 2026
1f3afa5
added accelerometer bound tests for PE and PR sensors
JSMTHWCK Apr 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion endaq/batch/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _make_peak_windows(ch_data_cache: analyzer.CalcCache, margin_len):
if sys.version_info < (3, 9):
return aligned_peak_data.stack().stack().reorder_levels(levels)

return aligned_peak_data.stack(future_stack=True).stack().reorder_levels(levels)
return aligned_peak_data.stack(future_stack=True).stack(future_stack = False).reorder_levels(levels)


def _make_vc_curves(ch_data_cache: analyzer.CalcCache):
Expand Down
1 change: 0 additions & 1 deletion endaq/calc/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ def ellip(

return df


def _fftnoise(f):
"""
Generate time series noise for a given range of frequencies with random phase using ifft.
Expand Down
112 changes: 99 additions & 13 deletions endaq/calc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

from __future__ import annotations

import bisect
import typing
from typing import Optional, Union, Literal
from typing import List, Optional, Union, Literal
import warnings

import numpy as np
Expand Down Expand Up @@ -129,31 +130,45 @@ def to_dB(
}


def resample(df: pd.DataFrame, sample_rate: Optional[float] = None) -> pd.DataFrame:
def resample(
df: pd.DataFrame,
sample_rate: Optional[float] = None,
num_samples: Optional[int] = None
) -> pd.DataFrame:
"""
Resample a dataframe to a desired sample rate (in Hz)

Resample a dataframe to a desired sample rate (in Hz) or a desired number of points.
Note that ``sample_rate`` and ``num_samples`` are mutually exclusive. If
neither of sample_rate or num_samples is suplied, it will use the same sample_rate
as it currently does, but makes the time stamps uniformly spaced.

:param df: The DataFrame to resample, indexed by time
:param sample_rate: The desired sample rate to resample the given data to.
If one is not supplied, then it will use the same as it currently does, but
make the time stamps uniformly spaced
:param num_samples: The desired number of samples to resample the given data to.

:return: The resampled data in a DataFrame
"""
if sample_rate is None:
num_samples_after_resampling = len(df)
else:
if sample_rate is not None and num_samples is not None:
raise ValueError("Only one of `sample_rate` and `num_samples` can be set.")

if sample_rate is not None:
dt = sample_spacing(df)
num_samples_after_resampling = int(dt * len(df) * sample_rate)
elif num_samples is not None:
num_samples_after_resampling = num_samples
else:
num_samples_after_resampling = len(df)

resampled_data, resampled_time = scipy.signal.resample(
df,
num_samples_after_resampling,
t=df.index.values.astype(np.float64),
)
resampled_time = pd.date_range(
df.iloc[0].name, df.iloc[-1].name,
periods=num_samples_after_resampling,
)

if resampled_time[0] != df.index[0] or resampled_time[-1] != df.index[-1]:
resampled_time = pd.date_range(
df.index[0], df.index[-1],
periods=num_samples_after_resampling,
)

# Check for datetimes, if so localize
if 'datetime' in str(df.index.dtype):
Expand Down Expand Up @@ -462,3 +477,74 @@ def to_altitude(df: pd.DataFrame,

# Return DataFrame with New Altitude Column
return alt_df

def align_dataframes(dfs: List[pd.DataFrame]) -> List[pd.DataFrame]:
"""
Resamples the given dataframes to all be equal-sized with resampled uniform timestamps.
Any timestamps outside of the shared range will be dropped.

:param dfs: a List of dataframes with DateTimeIndex to align.

:return: a list of dataframes in the same order that they were inputted in.
"""
aligned_start = max([df.index[0] for df in dfs])
aligned_end = min([df.index[-1] for df in dfs])

if aligned_start >= aligned_end:
raise ValueError("No range of time shared between dataframes")
left_idx = [bisect.bisect_right(df.index, aligned_start) - 1 for df in dfs] #the most left point in bound
right_idx = [bisect.bisect_left(df.index, aligned_end) for df in dfs] #the first right point out of bounds

#removes the start / end points
trimmed_dfs = [dfs[i][left_idx[i] + 1: right_idx[i] - 1] for i in range(len(dfs))]

for i, (df, l_idx) in enumerate(zip(dfs, left_idx)):
#if the original timestamp is too early
if df.index[l_idx] != aligned_start:
#change in time and acceleration
dt = (df.index[l_idx] - df.index[l_idx - 1]).total_seconds()
da = (df.iloc[l_idx] - df.iloc[l_idx - 1]) / dt
new_dt = (aligned_start - df.index[l_idx]).total_seconds()
#compute the new point
new_point = df.iloc[l_idx - 1] + new_dt * da
#and add it back to the dataframe
trimmed_dfs[i] = pd.concat([
pd.DataFrame([new_point], index= [aligned_start]),
trimmed_dfs[i]
])
#in the case that the data is already in the correct point, add it back in
else:
trimmed_dfs[i] = pd.concat([df.loc[[aligned_start]], trimmed_dfs[i]])

#repeating the steps above, with slight indexing differences to accomodate the right index
for i, (df, r_idx) in enumerate(zip(dfs, right_idx)):
if df.index[r_idx] != aligned_end:
dt = (df.index[r_idx] - df.index[r_idx - 1]).total_seconds()
da = (df.iloc[r_idx] - df.iloc[r_idx - 1]) / dt
new_dt = (aligned_end - (df.index[r_idx - 1])).total_seconds()
new_point = df.iloc[r_idx - 1] + new_dt * da
trimmed_dfs[i] = pd.concat([
trimmed_dfs[i],
pd.DataFrame([new_point], index = [aligned_end])
])
else:
trimmed_dfs[i] = pd.concat([trimmed_dfs[i], df.loc[[aligned_end]]])

#resamples the data to the dataframe with the most points available
total_samples = max(tdf.shape[0] for tdf in trimmed_dfs)
resampled_dfs = [resample(df, num_samples = total_samples) for df in trimmed_dfs]

"""
In the current implementation of scipy's resample, there can be some inconsistent rounding point
when creating the datetimes. For this reason, we will find one that meets spec (correct start
and end points) and use that for all.
"""
datepoints = None
for df in resampled_dfs:
if df.index[0] == aligned_start and df.index[-1] == aligned_end:
datepoints = df.index
break
if datepoints is None:
raise ValueError("resampling error, timestamps incosistent with inputs")
for df in resampled_dfs: df.index = datepoints
return resampled_dfs
140 changes: 136 additions & 4 deletions endaq/ide/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from __future__ import annotations
import typing

from collections import defaultdict
from collections import defaultdict, namedtuple
import datetime
import dateutil.tz
import warnings
Expand All @@ -14,15 +14,16 @@
import pandas.io.formats.style
import idelib.dataset

from .measurement import MeasurementType, ANY, get_channels
from .measurement import MeasurementType, ANY, get_channels, ACCELERATION
from .files import get_doc
from .util import parse_time

from .util import parse_time, get_accelerometer_info, get_accelerometer_bounds
from endaq.calc import utils, filters

__all__ = [
"get_channel_table",
"to_pandas",
"get_primary_sensor_data",
"get_unified_acceleration"
]


Expand Down Expand Up @@ -433,3 +434,134 @@ def get_primary_sensor_data(

#Return only the subchannels with right units
return data[channels.name]

# ============================================================================
#
# ============================================================================
def get_unified_acceleration(doc: idelib.dataset.Dataset) -> pd.DataFrame:
"""
Computes a more accurate acceleration channel by filtering frequencies
out of range and combining frequencies where multiple accelerometers are accurate.

Note that this method uses :py:func:`calc.utils.resample()`, which can cause artifacts
at the start or end of the data due to a assumption about signal periodicity. If important
data is recorded within those bounds, it is not recommended to use this method.

:param doc: An open `Dataset` object, see :py:func:`~endaq.ide.get_doc()`
for more.

:return: a pandas Dataframe with the flattened acceleration data. Output channel names
are [X, Y, Z].
"""

acceleration_channels = get_channels(doc, ACCELERATION, False)

dfs = [to_pandas(ch) for ch in acceleration_channels]

cleaned_dfs = []
for idx in range(len(dfs)):
df = dfs[idx].copy()
#drop non-axes channels
df = df[[col for col in df.columns if col[0] in 'XYZ']]
#0 mean all data
for col in df.columns:
df[col] = df[col] - np.mean(df[col])
cleaned_dfs.append(df)

aligned_dfs = utils.align_dataframes(cleaned_dfs)
aligned_sr = 1 / (aligned_dfs[0].index[1] - aligned_dfs[0].index[0]).total_seconds()
sensor_info = [get_accelerometer_info(ch) for ch in acceleration_channels]
noises = np.array([si['noise'] for si in sensor_info])
bounds = [(si['low_cutoff'], min(si['high_cutoff'], int(aligned_sr / 2) - 1)) for si in sensor_info]
#filters out the frequencies that the sensors can not accurately detect
dfs = [filters.butterworth(df, low_cutoff= l_bound, high_cutoff= r_bound)
for (df, (l_bound, r_bound)) in zip(aligned_dfs, bounds)]
for df in dfs: df.columns = ['X', 'Y', 'Z']

hz_overlaps = _find_all_overlaps(bounds = bounds)
averaged_dfs = []

#for each range, the good frequencies are isolated and averaged with the other datasets
#who share the same freuqency range
for k, v in hz_overlaps.items():
cur_overlap_dfs = [dfs[v_i] for v_i in v]
cur_noises = noises[v]
hz_overlap_dfs = [filters.butterworth(df, low_cutoff=k[0], high_cutoff=k[1])
for df in cur_overlap_dfs]
bound_avgd_df = _weighted_avg(hz_overlap_dfs, cur_noises)
averaged_dfs.append(bound_avgd_df)

return sum(averaged_dfs)

def _find_all_overlaps(
bounds: typing.List[typing.Tuple[int, int]]
) -> typing.Dict[typing.Tuple[int, int], typing.List[int]]:
"""
finds **all** possible overlaps of a list of start and end bounds. if an end bound and a start
bound share the same value, it is not considered overlapping.

:param bounds: a list of each start and end bounds, where the end bound is strictly greater
than the starting bounds.

:return: a dictionary where the keys are the bounds for every overlap, and the values are
the indices that belong in each bound, respective to ``bounds``
"""

if len(bounds) == 0:
return {}
#labeling the components
lblHz = namedtuple("labeledTuple", ("idx", "Hz"))
labels = range(len(bounds))

lower_bounds = list(map(lblHz, labels, [b[0] for b in bounds]))
upper_bounds = list(map(lblHz, labels, [b[1] for b in bounds]))
bounds_sorted = sorted(lower_bounds + upper_bounds, key= lambda b: b.Hz)

open_intervals: typing.List[lblHz.idx] = np.array([bounds_sorted[0].idx])
closed_intervals: dict[tuple[int, int], typing.List[int]] = {}

left_bound = bounds_sorted[0].Hz
bounds_sorted = bounds_sorted[1:]

for point in bounds_sorted:
if left_bound != point.Hz:
if open_intervals.size != 0:
closed_intervals[left_bound, point.Hz] = open_intervals
left_bound = point.Hz
if point.idx in open_intervals:
open_intervals = np.setdiff1d(open_intervals, [point.idx])
else:
open_intervals = np.append(open_intervals, point.idx)
return closed_intervals


def _weighted_avg(dfs: typing.List[pd.DataFrame], noise: typing.List[float]):
"""
Combines multiple dataframe into one by applying a weighted average base on noise in a linear-inverse fashion.
For sensors A and B, if the noise ratio is 2:1, the weighing ratio will be 1:2. Note that a value of 0 is
considered in the weighting.

:param dfs: the dataframes to normalize. It is assumed that all dataframes have
the same DateTimeIndex values.
:param noise: The noise value to the index-respective dataframe. Noise will be normalized.

:return: a single dataframe with the weighted values
"""
if len(dfs) != len(noise) or len(dfs) == 0:
raise ValueError("dataframes and noise need to have equal, non zero number of elements")
#normalize the weightings. If there is a weighting with 0 noise, return that instead
if 0 in noise:
return dfs[noise.index(0)]
noise_normalized = np.array(noise) / sum(noise)

#formula for the inverse is for array [a0, a1, ..., an], inverse is
# [a1a2...an, a0a2...an, a1a2...an], and then normalized
new_noise = 1 / noise_normalized
for n in noise_normalized:
new_noise *= n

new_noise = new_noise / sum(new_noise)
#in the case that len(dfs) == 1, the noise is 1 and the original is returned
return pd.DataFrame(sum([n * d for n, d in zip(new_noise, dfs)]),
columns = dfs[0].columns, index = dfs[0].index)

Loading
Loading