diff --git a/__pycache__/config.cpython-313.pyc b/__pycache__/config.cpython-313.pyc index da22857..977ff42 100644 Binary files a/__pycache__/config.cpython-313.pyc and b/__pycache__/config.cpython-313.pyc differ diff --git a/__pycache__/database.cpython-313.pyc b/__pycache__/database.cpython-313.pyc index e358c71..adee875 100644 Binary files a/__pycache__/database.cpython-313.pyc and b/__pycache__/database.cpython-313.pyc differ diff --git a/services/__pycache__/db_connector.cpython-313.pyc b/services/__pycache__/db_connector.cpython-313.pyc index 7d7e475..bb0fb77 100644 Binary files a/services/__pycache__/db_connector.cpython-313.pyc and b/services/__pycache__/db_connector.cpython-313.pyc differ diff --git a/services/__pycache__/ml_mapper.cpython-313.pyc b/services/__pycache__/ml_mapper.cpython-313.pyc index c2295fb..0198e5f 100644 Binary files a/services/__pycache__/ml_mapper.cpython-313.pyc and b/services/__pycache__/ml_mapper.cpython-313.pyc differ diff --git a/services/__pycache__/transformers.cpython-313.pyc b/services/__pycache__/transformers.cpython-313.pyc index 9548421..386a636 100644 Binary files a/services/__pycache__/transformers.cpython-313.pyc and b/services/__pycache__/transformers.cpython-313.pyc differ diff --git a/services/db_connector.py b/services/db_connector.py index 8715dbf..cfecb09 100644 --- a/services/db_connector.py +++ b/services/db_connector.py @@ -54,7 +54,16 @@ def create_sqlalchemy_engine(db_type, host, port, db_name, user, password, chars elif db_type == "Microsoft SQL Server": # Requires: pip install pymssql + # For Thai data: use 'utf8' or 'cp874' (Thai Windows codepage) + # If source contains legacy TIS-620, try 'cp874' charset mssql_charset = charset if charset else "utf8" + + query_params = {"charset": mssql_charset} + + # For legacy Thai databases, add TDS version for better compatibility + if charset in ['tis620', 'cp874', 'latin1']: + query_params["tds_version"] = "7.0" # Compatible with older SQL Server + connection_url = URL.create( "mssql+pymssql", username=user, @@ -62,7 +71,7 @@ def create_sqlalchemy_engine(db_type, host, port, db_name, user, password, chars host=host, port=port_int or 1433, database=db_name, - query={"charset": mssql_charset} + query=query_params ) else: diff --git a/views/__pycache__/migration_engine.cpython-313.pyc b/views/__pycache__/migration_engine.cpython-313.pyc index f63f3fb..8865a58 100644 Binary files a/views/__pycache__/migration_engine.cpython-313.pyc and b/views/__pycache__/migration_engine.cpython-313.pyc differ diff --git a/views/migration_engine.py b/views/migration_engine.py index 549b0be..c6bc7a5 100644 --- a/views/migration_engine.py +++ b/views/migration_engine.py @@ -54,6 +54,7 @@ def generate_select_query(config_data, source_table, db_type='MySQL'): """ Generate a SELECT query based on configuration. Applies TRIM at source for MSSQL CHAR columns to prevent padding. + For SQL Server: Also cleans non-breaking spaces and control characters at source. """ try: if not config_data or 'mappings' not in config_data: @@ -65,8 +66,23 @@ def generate_select_query(config_data, source_table, db_type='MySQL'): continue source_col = mapping['source'] - # Apply TRIM at source for MSSQL to handle CHAR padding - if db_type == 'Microsoft SQL Server' and 'TRIM' in mapping.get('transformers', []): + + # Special handling for SQL Server text columns + if db_type == 'Microsoft SQL Server': + col_expr = f'"{source_col}"' + + # Apply TRIM if specified in transformers + if 'TRIM' in mapping.get('transformers', []): + col_expr = f'TRIM({col_expr})' + + # Clean non-breaking spaces and problematic characters for VARCHAR/NVARCHAR/TEXT columns + # REPLACE(col, CHAR(160), ' ') -> replace nbsp with regular space + # REPLACE(col, CHAR(0), '') -> remove null bytes + col_expr = f'REPLACE(REPLACE({col_expr}, CHAR(160), \' \'), CHAR(0), \'\')' + + selected_cols.append(f'{col_expr} AS "{source_col}"') + elif 'TRIM' in mapping.get('transformers', []): + # Other databases: just apply TRIM if needed selected_cols.append(f'TRIM("{source_col}") AS "{source_col}"') else: selected_cols.append(f'"{source_col}"') @@ -293,19 +309,56 @@ def render_migration_engine_page(): src_sel = st.selectbox("Source Profile", ds_options, key="src_sel") st.session_state.migration_src_profile = src_sel - charset_options = ["utf8mb4 (Default)", "tis620 (Thai Legacy)", "latin1 (Raw Bytes)"] + # Get source DB type to show appropriate charset options + src_db_type = None + if src_sel != "Select Profile...": + row = datasources[datasources['name'] == src_sel].iloc[0] + ds_detail = db.get_datasource_by_id(int(row['id'])) + src_db_type = ds_detail['db_type'] + + # Show charset options based on DB type + if src_db_type == 'Microsoft SQL Server': + charset_options = [ + "utf8 (Default - Modern)", + "cp874 (Thai Windows Codepage - แนะนำสำหรับข้อมูลไทยเก่า)", + "latin1 (Raw Bytes)" + ] + help_text = "SQL Server: ใช้ cp874 สำหรับข้อมูลไทยแบบเก่า" + elif src_db_type == 'MySQL': + charset_options = [ + "utf8mb4 (Default)", + "tis620 (Thai Legacy)", + "latin1 (Raw Bytes)" + ] + help_text = "MySQL: ใช้ tis620 ถ้าภาษาไทยเพี้ยน" + else: + charset_options = [ + "utf8 (Default)", + "latin1 (Raw Bytes)" + ] + help_text = "เลือก charset ตามฐานข้อมูลต้นทาง" + src_charset_sel = st.selectbox( - "Source Charset (ถ้าภาษาไทยเพี้ยนให้ลอง tis620)", - charset_options, - key="src_charset_sel" + "Source Charset", + charset_options, + key="src_charset_sel", + help=help_text ) + + # Map selection to actual charset value charset_map = { "utf8mb4 (Default)": None, + "utf8 (Default - Modern)": None, + "utf8 (Default)": None, "tis620 (Thai Legacy)": "tis620", + "cp874 (Thai Windows Codepage - แนะนำสำหรับข้อมูลไทยเก่า)": "cp874", "latin1 (Raw Bytes)": "latin1" } st.session_state.src_charset = charset_map.get(src_charset_sel) + if src_charset_sel.startswith("cp874"): + st.info("💡 **cp874** จะแก้ปัญหา non-breaking space และตัวอักษรไทยเก่าใน SQL Server") + if src_sel != "Select Profile...": if st.button("🔍 Test Source"): with st.spinner("Connecting..."):