Thanks for suggestion on how to resolve the Movesense timestamp issue. Before I was pointed to this article, I have attempted to interpolate from the announcement timestamps.
There are fundamentally two approaches I have attempted here:
You can get reference_time from the json file name in Movesense Showcase app. It is straight forward to get the sample data size.
This approach does not need you to remember what sample frequency you set at the time of recording.
However, you may come across another issue: the time delta is not always 20. You may get 19. This is the only way to prevent the timestamps from being out of step after interpolation. Root cause: The announcement timestamps captured in the json file are not evenly incremented to begin with.
Any suggestion on how we should address this?
def _get_timestamp_interval(sample_frequency: int = 104, output_time_unit: Literal['second', 'millisecond', 'nanosecond'] = 'millisecond') -> int:
"""
Calculate the time interval between samples based on the sample frequency.
:param sample_frequency: The frequency of sampling in Hertz (Hz). Default is 104 Hz.
:param output_time_unit: The desired output time unit ('second', 'millisecond', 'nanosecond').
Default is 'millisecond'.
:return: Time interval in the specified unit.
"""
# Calculate the time interval in milliseconds
time_interval_ms = 1000 / sample_frequency # in milliseconds
# Use match syntax to convert to the desired time unit
match output_time_unit:
case 'second':
return int(time_interval_ms / 1000) # Convert to seconds
case 'millisecond':
return int(time_interval_ms) # Already in milliseconds
case 'nanosecond':
return int(time_interval_ms * 1_000_000) # Convert to nanoseconds
case _:
raise ValueError("Invalid time unit. Choose from 'second', 'millisecond', or 'nanosecond'.")
def calculate_timestamps(reference_time: pd.Timestamp, time_interval: int, num_samples: int) -> List[pd.Timestamp]:
"""
Generate a list of timestamps based on a starting datetime and a time interval.
:param reference_time: The starting datetime for the timestamps.
:param time_interval: The time interval in milliseconds between each timestamp.
:param num_samples: The number of timestamps to generate.
:return: A list of generated timestamps.
"""
_delta = pd.Timedelta(milliseconds=time_interval) # Convert time interval to Timedelta
# Create an array of sample indices
sample_indices = np.arange(num_samples)
# Calculate timestamps using vectorized operations
timestamps = reference_time + sample_indices * _delta
return timestamps.tolist() # Convert to list before returning
def verify_timestep_increment_distribution(self, df: pd.DataFrame) -> None:
"""
Verify the distribution of timestep increments in a DataFrame.
This function calculates the increment between consecutive timesteps,
adds it as a new column to the DataFrame, and then prints a summary
of the increment distribution.
Args:
df (pd.DataFrame): A DataFrame with a 'timestep' column.
Returns:
None: Prints the verification results.
"""
# Ensure the DataFrame is sorted by timestep
df = df.sort_values('timestep')
# Calculate the increment between consecutive timesteps
df['increment'] = df['timestep'].diff()
# Count occurrences of each unique increment
increment_counts: Dict[int, int] = df['increment'].value_counts().to_dict()
# Print results
print()
print(f"Data File: {self.file_name}")
print(f"Sensor ID: {self.device_id}")
print(f"Reference Time: {self.start_time}")
print(f"Raw Data Type: {self.raw_data_type.upper()}")
print("Timestep Increment Distribution Results:")
print("-----------------------------------------------------")
print("Increment | Count")
print("-----------------------------------------------------")
for increment, count in sorted(increment_counts.items()):
print(f"{increment:9.0f} | {count}")
print("-----------------------------------------------------")
print(f"Total timesteps: {len(df)}")
print(f"Unique increments: {len(increment_counts)}")
# Additional statistics
print("\nAdditional Statistics:")
print(f"Min increment: {df['increment'].min()}")
print(f"Max increment: {df['increment'].max()}")
print(f"Median increment: {df['increment'].median()}")
print()