-
Notifications
You must be signed in to change notification settings - Fork 198
perf: skip unnecessary metadata fetch calls for tags when not configured #1387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 24 commits
17754e0
9eca1c4
815ff55
7839417
91e0dea
a84e0f0
51a495e
0ed403b
f4399cd
e6adab7
ba26eb1
f985dc8
616239f
12e008a
adee055
c727e76
8b57e75
637d361
f29ddeb
202249b
4399e92
c352241
637e84a
aef43ea
63542cc
c2d4999
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,6 +81,9 @@ | |
| StreamingTableConfig, | ||
| ) | ||
| from dbt.adapters.databricks.relation_configs.table_format import TableFormat | ||
| from dbt.adapters.databricks.relation_configs.tags import ( | ||
| TagsProcessor, | ||
| ) | ||
| from dbt.adapters.databricks.relation_configs.tblproperties import TblPropertiesConfig | ||
| from dbt.adapters.databricks.relation_configs.view import ViewConfig | ||
| from dbt.adapters.databricks.utils import ( | ||
|
|
@@ -967,17 +970,21 @@ def parse_columns_and_constraints( | |
| return enriched_columns, parsed_constraints | ||
|
|
||
| @available.parse(lambda *a, **k: {}) | ||
| def get_relation_config(self, relation: DatabricksRelation) -> DatabricksRelationConfigBase: | ||
| def get_relation_config( | ||
| self, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> DatabricksRelationConfigBase: | ||
| if relation.type == DatabricksRelationType.MaterializedView: | ||
| return MaterializedViewAPI.get_from_relation(self, relation) | ||
| return MaterializedViewAPI.get_from_relation(self, relation, model_config) | ||
| elif relation.type == DatabricksRelationType.StreamingTable: | ||
| return StreamingTableAPI.get_from_relation(self, relation) | ||
| return StreamingTableAPI.get_from_relation(self, relation, model_config) | ||
| elif relation.type == DatabricksRelationType.Table: | ||
| return IncrementalTableAPI.get_from_relation(self, relation) | ||
| return IncrementalTableAPI.get_from_relation(self, relation, model_config) | ||
| elif relation.type == DatabricksRelationType.View: | ||
| return ViewAPI.get_from_relation(self, relation) | ||
| return ViewAPI.get_from_relation(self, relation, model_config) | ||
| elif relation.type == DatabricksRelationType.MetricView: | ||
| return MetricViewAPI.get_from_relation(self, relation) | ||
| return MetricViewAPI.get_from_relation(self, relation, model_config) | ||
| else: | ||
| raise NotImplementedError(f"Relation type {relation.type} is not supported.") | ||
|
|
||
|
|
@@ -1060,12 +1067,15 @@ def config_type(cls) -> type[DatabricksRelationConfig]: | |
|
|
||
| @classmethod | ||
| def get_from_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> DatabricksRelationConfig: | ||
| """Get the relation config from the relation.""" | ||
|
|
||
| assert relation.type == cls.relation_type | ||
| results = cls._describe_relation(adapter, relation) | ||
| results = cls._describe_relation(adapter, relation, model_config) | ||
| return cls.config_type().from_results(results) | ||
|
|
||
| @classmethod | ||
|
|
@@ -1077,7 +1087,10 @@ def get_from_relation_config(cls, relation_config: RelationConfig) -> Databricks | |
| @classmethod | ||
| @abstractmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| """Describe the relation and return the results.""" | ||
|
|
||
|
|
@@ -1087,11 +1100,14 @@ def _describe_relation( | |
| class DeltaLiveTableAPIBase(RelationAPIBase[DatabricksRelationConfig]): | ||
| @classmethod | ||
| def get_from_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> DatabricksRelationConfig: | ||
| """Get the relation config from the relation.""" | ||
|
|
||
| relation_config = super().get_from_relation(adapter, relation) | ||
|
sd-db marked this conversation as resolved.
|
||
| relation_config = super().get_from_relation(adapter, relation, model_config) | ||
|
|
||
| # Ensure any current refreshes are completed before returning the relation config | ||
| tblproperties = cast(TblPropertiesConfig, relation_config.config["tblproperties"]) | ||
|
|
@@ -1111,7 +1127,10 @@ def config_type(cls) -> type[MaterializedViewConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| kwargs = {"table_name": relation} | ||
| results: RelationResults = dict() | ||
|
|
@@ -1123,9 +1142,18 @@ def _describe_relation( | |
| results["information_schema.views"] = get_first_row( | ||
| adapter.execute_macro("get_view_description", kwargs=kwargs) | ||
| ) | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs) | ||
|
|
||
| # To be backward compatible model_config can be None. In that case, tags should be fetched | ||
| # to maintain backward compatibility. | ||
| table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Config only comes from user and its obvious here. |
||
| if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff(): | ||
|
sd-db marked this conversation as resolved.
|
||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| else: | ||
| results["information_schema.tags"] = None | ||
|
|
||
| results["row_filters"] = adapter.execute_macro("fetch_row_filters", kwargs=kwargs) | ||
|
|
||
| return results | ||
|
|
||
|
|
||
|
|
@@ -1138,7 +1166,10 @@ def config_type(cls) -> type[StreamingTableConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| kwargs = {"table_name": relation} | ||
| results: RelationResults = dict() | ||
|
|
@@ -1162,16 +1193,37 @@ def config_type(cls) -> type[IncrementalTableConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| results = {} | ||
| kwargs = {"relation": relation} | ||
|
|
||
| if not relation.is_hive_metastore(): | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| results["information_schema.column_tags"] = adapter.execute_macro( | ||
| "fetch_column_tags", kwargs=kwargs | ||
| # To be backward compatible model_config can be None. In that case, tags should be | ||
| # fetched to maintain backward compatibility. | ||
| table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None | ||
|
sd-db marked this conversation as resolved.
|
||
| if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff(): | ||
|
sd-db marked this conversation as resolved.
|
||
| results["information_schema.tags"] = adapter.execute_macro( | ||
| "fetch_tags", kwargs=kwargs | ||
| ) | ||
| else: | ||
| results["information_schema.tags"] = None | ||
|
|
||
| # To be backward compatible model_config can be None. In that case, tags should be | ||
| # fetched to maintain backward compatibility. | ||
| column_tag_config = ( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comments on naming + checks on
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suffix
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Variable naming should still be intentional and descriptive, we should update it to |
||
| model_config.config.get(ColumnTagsProcessor.name) if model_config else None | ||
| ) | ||
| if column_tag_config is None or column_tag_config.requires_server_metadata_for_diff(): | ||
| results["information_schema.column_tags"] = adapter.execute_macro( | ||
| "fetch_column_tags", kwargs=kwargs | ||
| ) | ||
| else: | ||
| results["information_schema.column_tags"] = None | ||
|
|
||
| results["non_null_constraint_columns"] = adapter.execute_macro( | ||
| "fetch_non_null_constraint_columns", kwargs=kwargs | ||
| ) | ||
|
|
@@ -1201,15 +1253,26 @@ def config_type(cls) -> type[ViewConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
|
sd-db marked this conversation as resolved.
|
||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| results = {} | ||
| kwargs = {"relation": relation} | ||
|
|
||
| results["information_schema.views"] = get_first_row( | ||
| adapter.execute_macro("get_view_description", kwargs=kwargs) | ||
| ) | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
|
|
||
| # To be backward compatible model_config can be None. In that case, tags should be fetched | ||
| # to maintain backward compatibility. | ||
| table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None | ||
| if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff(): | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| else: | ||
| results["information_schema.tags"] = None | ||
|
|
||
| results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs) | ||
|
|
||
| kwargs = {"table_name": relation} | ||
|
|
@@ -1228,14 +1291,23 @@ def config_type(cls) -> type[MetricViewConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| results = {} | ||
| kwargs = {"relation": relation} | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs) | ||
| kwargs = {"table_name": relation} | ||
| results["describe_extended"] = adapter.execute_macro( | ||
| DESCRIBE_TABLE_EXTENDED_MACRO_NAME, kwargs=kwargs | ||
| ) | ||
|
|
||
| table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None | ||
| if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff(): | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| else: | ||
| results["information_schema.tags"] = None | ||
|
|
||
| return results | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| {%- macro get_configuration_changes(existing_relation) -%} | ||
| {%- set existing_config = adapter.get_relation_config(existing_relation) -%} | ||
| {%- set model_config = adapter.get_config_from_model(config.model) -%} | ||
| {%- set existing_config = adapter.get_relation_config(existing_relation, model_config) -%} | ||
| {%- set configuration_changes = model_config.get_changeset(existing_config) -%} | ||
| {% do return(configuration_changes) %} | ||
| {%- endmacro -%} |
Uh oh!
There was an error while loading. Please reload this page.