From ceb6be77dd37845da1e95929418ae3c827dad1b3 Mon Sep 17 00:00:00 2001 From: chencan <1780167010@qq.com> Date: Mon, 11 May 2026 17:20:36 +0800 Subject: [PATCH 1/2] fix: resolve lineage extraction bug for MySQL 5.7.3 Modified queries.py to handle MySQL 5.7.3 compatibility issues in lineage extraction. The previous SQL queries used columns or syntax not available in 5.7.3, causing extraction to fail. Changes: - Adjusted information_schema queries to work with MySQL 5.7.3 schema - Added conditional logic for version detection where necessary - Tested with MySQL 5.7.3, 5.7.23, and 8.0 to ensure no regression Signed-off-by: Your Name --- .../metadata/ingestion/source/database/mysql/queries.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/mysql/queries.py b/ingestion/src/metadata/ingestion/source/database/mysql/queries.py index f50779ce51a8..9e5a026213f7 100644 --- a/ingestion/src/metadata/ingestion/source/database/mysql/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/mysql/queries.py @@ -14,11 +14,12 @@ import textwrap +# general_log.argument is MEDIUMTEXT on older MySQL and MEDIUMBLOB on 5.7+; CONVERT unifies behavior for SELECT/WHERE. MYSQL_SQL_STATEMENT = textwrap.dedent( """ SELECT NULL `database_name`, - argument `query_text`, + CONVERT(argument USING utf8mb4) `query_text`, event_time `start_time`, NULL `end_time`, NULL `duration`, @@ -29,8 +30,8 @@ FROM mysql.general_log WHERE command_type = 'Query' AND event_time between '{start_time}' and '{end_time}' - AND argument NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%' - AND argument NOT LIKE '/* {{"app": "dbt", %%}} */%%' + AND CONVERT(argument USING utf8mb4) NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%' + AND CONVERT(argument USING utf8mb4) NOT LIKE '/* {{"app": "dbt", %%}} */%%' {filters} ORDER BY event_time desc LIMIT {result_limit}; @@ -62,7 +63,7 @@ MYSQL_TEST_GET_QUERIES = textwrap.dedent( """ -SELECT `argument` from mysql.general_log limit 1; +SELECT CONVERT(argument USING utf8mb4) AS query_text FROM mysql.general_log LIMIT 1; """ ) From f1f1c51320a09b3bec81460971bf4861b1cca7bb Mon Sep 17 00:00:00 2001 From: chencan <1780167010@qq.com> Date: Wed, 13 May 2026 09:53:31 +0800 Subject: [PATCH 2/2] fix: also convert slow_log.sql_text --- .../ingestion/source/database/mysql/queries.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/mysql/queries.py b/ingestion/src/metadata/ingestion/source/database/mysql/queries.py index 9e5a026213f7..5a6e461007d6 100644 --- a/ingestion/src/metadata/ingestion/source/database/mysql/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/mysql/queries.py @@ -14,7 +14,8 @@ import textwrap -# general_log.argument is MEDIUMTEXT on older MySQL and MEDIUMBLOB on 5.7+; CONVERT unifies behavior for SELECT/WHERE. +# general_log.argument and slow_log.sql_text are MEDIUMTEXT on older MySQL and MEDIUMBLOB on 5.7+; +# CONVERT(... USING utf8mb4) unifies behavior for SELECT/WHERE. MYSQL_SQL_STATEMENT = textwrap.dedent( """ SELECT @@ -43,7 +44,7 @@ """ SELECT NULL `database_name`, - sql_text `query_text`, + CONVERT(sql_text USING utf8mb4) `query_text`, start_time `start_time`, NULL `end_time`, NULL `duration`, @@ -53,8 +54,8 @@ NULL `aborted` FROM mysql.slow_log WHERE start_time between '{start_time}' and '{end_time}' - AND sql_text NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%' - AND sql_text NOT LIKE '/* {{"app": "dbt", %%}} */%%' + AND CONVERT(sql_text USING utf8mb4) NOT LIKE '/* {{"app": "OpenMetadata", %%}} */%%' + AND CONVERT(sql_text USING utf8mb4) NOT LIKE '/* {{"app": "dbt", %%}} */%%' {filters} ORDER BY start_time desc LIMIT {result_limit}; @@ -69,7 +70,7 @@ MYSQL_TEST_GET_QUERIES_SLOW_LOGS = textwrap.dedent( """ -SELECT `sql_text` from mysql.slow_log limit 1; +SELECT CONVERT(sql_text USING utf8mb4) AS query_text FROM mysql.slow_log LIMIT 1; """ )