-
Notifications
You must be signed in to change notification settings - Fork 48
feat: Floor datetimes on ingestion #2146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 11 commits
8edbf8a
3b2770f
34a2dc5
8d2d2d3
86ad774
30fb2ad
7f0caa1
6c08284
f590e58
0566ceb
4afd828
a857806
f30429c
3a56e0c
dd2fcc4
aa93fe1
e0f88a8
aaebfaa
43fe0b5
9624f09
860c700
2228410
0d92e52
b7d3cf0
531d2db
067bd92
fa31e0e
cd4cfd2
d4e56bc
c66b334
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -345,6 +345,24 @@ def check_schema_unit_against_type(self, data, **kwargs): | |
| f"The unit required for this message type should be convertible to an energy price unit, got incompatible unit: {posted_unit}" | ||
| ) | ||
|
|
||
| @validates_schema | ||
| def check_single_value_zero_duration_for_non_instantaneous_sensor( | ||
| self, data, **kwargs | ||
| ): | ||
| """Reject inputs where a non-instantaneous sensor cannot infer any resolution.""" | ||
|
|
||
| required_resolution = data["sensor"].event_resolution | ||
| inferred_resolution = data["duration"] / len(data["values"]) | ||
|
|
||
| if ( | ||
| required_resolution != timedelta(hours=0) | ||
| and len(data["values"]) == 1 | ||
| and inferred_resolution == timedelta(hours=0) | ||
| ): | ||
| raise ValidationError( | ||
| f"Cannot infer a non-zero resolution from one value over zero duration. This sensor requires a resolution of {required_resolution}." | ||
| ) | ||
|
|
||
| @validates_schema | ||
| def check_resolution_compatibility_of_sensor_data(self, data, **kwargs): | ||
| """Ensure event frequency is compatible with the sensor's event resolution. | ||
|
|
@@ -362,10 +380,6 @@ def check_resolution_compatibility_of_sensor_data(self, data, **kwargs): | |
| # The event frequency is inferred by assuming sequential, equidistant values within a time interval. | ||
| # The event resolution is assumed to be equal to the event frequency. | ||
| inferred_resolution = data["duration"] / len(data["values"]) | ||
| if len(data["values"]) == 1 and inferred_resolution == timedelta(hours=0): | ||
| raise ValidationError( | ||
| f"Cannot infer a non-zero resolution from one value over zero duration. This sensor requires a resolution of {required_resolution}." | ||
| ) | ||
| if inferred_resolution % required_resolution != timedelta(hours=0): | ||
| raise ValidationError( | ||
| f"Resolution of {inferred_resolution} is incompatible with the sensor's required resolution of {required_resolution}." | ||
|
|
@@ -385,7 +399,7 @@ def check_multiple_instantaneous_values(self, data, **kwargs): | |
| ) | ||
|
|
||
| @post_load() | ||
| def post_load_sequence(self, data: dict, **kwargs) -> BeliefsDataFrame: | ||
| def post_load_sequence(self, data: dict, **kwargs) -> dict[str, BeliefsDataFrame]: | ||
| """ | ||
| If needed, upsample and convert units, then deserialize to a BeliefsDataFrame. | ||
| Returns a dict with the BDF in it, as that is expected by webargs when used with as_kwargs=True. | ||
|
|
@@ -454,7 +468,11 @@ def load_bdf(sensor_data: dict) -> BeliefsDataFrame: | |
| start = sensor_data["start"] | ||
| sensor = sensor_data["sensor"] | ||
|
|
||
| if frequency := sensor.get_attribute("frequency"): | ||
| if sensor.event_resolution != timedelta(0) and sensor.get_attribute( | ||
| "round_datetimes_on_ingestion", True | ||
| ): | ||
| start = pd.Timestamp(start).floor(sensor.event_resolution) | ||
| elif frequency := sensor.get_attribute("frequency"): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the historical purpose of this attribute? How does that relate to the new functionality?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I looked into this a bit more. The historical purpose of frequency seems to be to align incoming measurements to a configured Pandas frequency by rounding. That is useful when a sensor stores instantaneous data, where This PR adds a different behavior for non-instantaneous sensors. For those, the sensor already has an interval grid through I renamed the new flag to |
||
| start = pd.Timestamp(start).round(frequency) | ||
|
|
||
| if event_resolution == timedelta(hours=0): | ||
|
|
@@ -482,6 +500,7 @@ def load_bdf(sensor_data: dict) -> BeliefsDataFrame: | |
| s, | ||
| source=source, | ||
| sensor=sensor_data["sensor"], | ||
| event_resolution=event_resolution, | ||
| **belief_timing, | ||
| ) | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.