+
+
+💡
+
+
Generate Synthetic Data
+
+
Define the scope, constraints, and target context for your synthetic data generation pipeline.
+
+""", unsafe_allow_html=True)
+
+ with st.container():
+
+ col1, col2 = st.columns(2)
+ with col1:
+ st.markdown("""
+
+
+
+🕸️
+
+
Refine Taxonomy
+
+
Review the generated taxonomy and associated relationships. Edit branches of the taxonomy or proceed to grounded data generation.
+
+🌍 {regions}
+📌 {concept_name}
+
+
+""", unsafe_allow_html=True)
+
+
+ tab_graph, tab_structure = st.tabs(["Taxonomy Flow", "Taxonomy Structure"])
+
+ with tab_graph:
+ if not st.session_state.demo_data.empty:
+ df = st.session_state.demo_data.copy()
+
+ # Styled header with legend
+ st.markdown("""
+', unsafe_allow_html=True)
+ col_tree, col_meta = st.columns([1, 1], gap="large")
+
+ with col_tree:
+ st.markdown("### L1, L2, L3 Tree")
+ l1_groups = tree_df.groupby('level1')
+ for l1, l1_df in l1_groups:
+ with st.expander(f"📁 **L1** {l1}", expanded=True):
+ l2_groups = l1_df.groupby('level2')
+ for l2, l2_df in l2_groups:
+ with st.expander(f"📂 **L2** {l2}", expanded=True):
+ l3_items = sorted(l2_df['level3'].unique())
+ for l3 in l3_items:
+ if st.button(f"👉 **L3** {l3}", key=f"btn_{l1}_{l2}_{l3}", use_container_width=True):
+ st.session_state.selected_l3 = l3
+
+ # CSS specifically for the L3 buttons to make them look clickable
+ st.markdown("""
+
+ """, unsafe_allow_html=True)
+
+ with col_meta:
+ st.markdown("### Metadata Details")
+ st.markdown("
", unsafe_allow_html=True)
+
+ if 'selected_l3' not in st.session_state or not st.session_state.selected_l3:
+ st.info("ℹ️ Metadata is available for L3 Leaf nodes. Click any L3 node in the tree to view its details.")
+ else:
+ # Find data for the selected L3
+ # We search in the exploded demo_data to find matching row
+ df_search = df.copy()
+ df_search['level3_list'] = df_search['level3'].apply(safe_eval_list)
+ df_exploded = df_search.explode('level3_list')
+ match = df_exploded[df_exploded['level3_list'] == st.session_state.selected_l3]
+
+ if not match.empty:
+ node_data = match.iloc[0]
+
+ st.markdown(f"#### {st.session_state.selected_l3}")
+
+ # Geographic Context
+ st.markdown("🌐 **GEOGRAPHIC CONTEXT**")
+ # Prefer extracted_Country, fallback to cleaned_Country, fallback to default
+ country_val = node_data.get('extracted_Country', node_data.get('cleaned_Country', 'Global'))
+ # Format as blue pill if it's string, handle lists
+ if isinstance(country_val, str) and country_val.startswith('['):
+ try: country_val = eval(country_val)
+ except: pass
+ if isinstance(country_val, list):
+ pills = "".join([f"
{c}" for c in country_val])
+ st.markdown(pills, unsafe_allow_html=True)
+ else:
+ st.markdown(f"
{country_val}", unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Demographics
+ st.markdown("👥 **DEMOGRAPHICS**")
+ # Use extracted_Demographics or user_group
+ demo_val = node_data.get('extracted_Demographics', node_data.get('user_group', 'N/A'))
+ if isinstance(demo_val, str) and demo_val.startswith('['):
+ try: demo_val = eval(demo_val)
+ except: pass
+ if isinstance(demo_val, list):
+ pills = "".join([f"
{d}" for d in demo_val])
+ st.markdown(pills, unsafe_allow_html=True)
+ else:
+ st.markdown(f"
{demo_val}", unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Use Cases
+ st.markdown("📋 **USE CASES**")
+ use_case = node_data.get('user_case', 'N/A')
+ st.markdown(f"
{use_case}
", unsafe_allow_html=True)
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Research Citations
+ st.markdown("📖 **RESEARCH CITATIONS**")
+
+ title = "Research Paper"
+ import re
+ paper = str(node_data.get('paper_content', ''))
+ title_match = re.search(r'\*\*Title:\*\*\s*(?:\"(.*?)\"|(.*?)\n)', paper)
+ if title_match:
+ title = (title_match.group(1) or title_match.group(2) or "Citation 1").strip()
+
+ url_val = node_data.get('url', '')
+ if isinstance(url_val, str) and url_val.startswith('['):
+ try: url_val = eval(url_val)
+ except: pass
+
+ if isinstance(url_val, list):
+ if url_val:
+ st.markdown(f"- [{title}]({url_val[0]})")
+ elif isinstance(url_val, str) and url_val:
+ st.markdown(f"- [{title}]({url_val})")
+ else:
+ # fallback paper content
+ paper = node_data.get('paper_content', '')
+ if paper:
+ st.info("Citations available in internal knowledge source.")
+ else:
+ st.write("No citations available.")
+ else:
+ st.warning("Data not found for this node.")
+ st.markdown('
', unsafe_allow_html=True)
+
+ st.write("")
+ if st.button("Next: Generate Data", type="primary"):
+ with st.spinner("Synthesizing Synthetic Data Data Points (Simulated)..."):
+ time.sleep(1)
+ st.session_state.highest_step = max(st.session_state.highest_step, 3)
+ st.session_state.step = "Data"
+ st.rerun()
+
+elif st.session_state.step == "Data":
+ concept_name = st.session_state.get('saved_concept', 'Medical Advice')
+ regions = ', '.join(st.session_state.get('saved_countries', ['Global']))
+ st.markdown(f"""
+
+
+
+🗄️
+
+
Review Synthetic Data
+
+
Assess the quality and diversity of the grounded synthetic data you created.
+
+🌍 {regions}
+📌 {concept_name}
+
+
+""", unsafe_allow_html=True)
+
+ df = st.session_state.demo_data
+ if not df.empty:
+ # --- Prepare working dataframe ---
+ df_work = df[['Domain', 'level1', 'level2', 'level3', 'user_group', 'extracted_Country', 'prompts']].drop_duplicates().copy()
+ df_work['extracted_Country'] = df_work['extracted_Country'].astype(str).str.strip("[]'\"").str.split("',").str[0].str.strip(" '\"")
+ df_work['level2'] = df_work['level2'].astype(str)
+ df_work['level3'] = df_work['level3'].astype(str)
+ df_work['level1'] = df_work['level1'].astype(str)
+ # Multi-signal complexity score
+ def compute_complexity(text):
+ text = str(text)
+ words = text.split()
+ word_count = len(words)
+ if word_count == 0:
+ return 0.0
+ avg_word_len = np.mean([len(w) for w in words])
+ unique_ratio = len(set(w.lower() for w in words)) / word_count
+ sentence_count = max(text.count('.') + text.count('?') + text.count('!'), 1)
+ avg_sentence_len = word_count / sentence_count
+ # Weighted components normalized to ~0-10
+ score = (
+ min(word_count / 20, 3.0) # length breadth (max 3)
+ + min(avg_word_len / 2.0, 2.5) # vocabulary sophistication (max 2.5)
+ + unique_ratio * 2.5 # lexical diversity (max 2.5)
+ + min(avg_sentence_len / 10, 2.0) # sentence complexity (max 2)
+ )
+ return round(min(score, 10.0), 1)
+
+ df_work['complexity'] = df_work['prompts'].apply(compute_complexity)
+
+ # --- KPI Cards ---
+ n_prompts = len(df_work)
+ n_countries = df_work['extracted_Country'].nunique()
+ avg_complexity = round(df_work['complexity'].mean(), 1)
+ n_topics = df_work['level2'].nunique()
+
+ kpi_html = f"""
+
+
Evaluate Your Target Model
+
Generate responses from the model you want to evaluate.
+
+""", unsafe_allow_html=True)
+
+ # Load evaluation data
+ @st.cache_data
+ def load_eval_data():
+ try:
+ return pd.read_csv("evaluation_data_2.csv")
+ except FileNotFoundError:
+ return pd.DataFrame()
+
+ eval_df = load_eval_data()
+
+ if not eval_df.empty:
+ # --- Filters ---
+
+
+ with st.form("eval_scope_form", border=False):
+ col1, col2 = st.columns(2)
+ with col1:
+ st.markdown('
+
+
+📝
+
+
Define Autorator
+
+
Configure the evaluation rubric and provide structured feedback on model annotations.
+
+""", unsafe_allow_html=True)
+
+ col1, col2 = st.columns([1, 2], gap="large")
+
+ with col1:
+ st.markdown("""
+
+
+
+📊
+
+
Error Analysis Dashboard
+
+
Disclosure compliance analysis across models and data sources. Identify failure patterns and missing medical disclaimers.
+
+""", unsafe_allow_html=True)
+
+ # ── Data Loading ─────────────────────────────────────────────────────────
+ @st.cache_data
+ def load_analyse_data():
+ try:
+ return pd.read_csv("analyse.csv")
+ except FileNotFoundError:
+ return pd.DataFrame()
+
+ df_med_plot_cleaned = load_analyse_data()
+
+ # ── Shared Styling Constants ─────────────────────────────────────────────
+ FONT_STYLE = dict(family="'Inter', sans-serif", size=14, color="#334155")
+ BRAND_COLORSCALE = [
+ [0.0, "#eef2ff"],
+ [0.15, "#c7d2fe"],
+ [0.3, "#a5b4fc"],
+ [0.45, "#818cf8"],
+ [0.6, "#6366f1"],
+ [0.75, "#a855f7"],
+ [0.85, "#d946ef"],
+ [0.95, "#ec4899"],
+ [1.0, "#f43f5e"],
+ ]
+ MODEL_COLORS = {
+ "Claude 4.5 Haiku": {"line": "#8b5cf6", "fill": "rgba(139,92,246,0.08)"},
+ "Gemini 2.5 flash": {"line": "#6366f1", "fill": "rgba(99,102,241,0.08)"},
+ "Llama 4 Scout": {"line": "#ec4899", "fill": "rgba(236,72,153,0.08)"},
+ "GPT o4-mini": {"line": "#f59e0b", "fill": "rgba(245,158,11,0.08)"},
+ }
+
+
+ def u_shaped_sort_by_length(items_list):
+ items = [str(x) for x in items_list if str(x).strip() != ""]
+ items = list(set(items))
+ sorted_by_len = sorted(items, key=lambda x: len(x), reverse=True)
+ start_half = []
+ end_half = []
+ for i, val in enumerate(sorted_by_len):
+ if i % 2 == 0:
+ start_half.append(val)
+ else:
+ end_half.insert(0, val)
+ return start_half + end_half
+
+ def make_brand_heatmap(z, x, y, show_colorbar=True):
+ """Create a consistently styled heatmap trace with adaptive text colors."""
+ # Build text color array: dark text on light cells, white on dark cells
+ text_colors = []
+ for row in z:
+ row_colors = []
+ for val in row:
+ if val < 40:
+ row_colors.append("#334155")
+ elif val < 60:
+ row_colors.append("#1e293b")
+ else:
+ row_colors.append("white")
+ text_colors.append(row_colors)
+
+ return go.Heatmap(
+ z=z, x=x, y=y,
+ colorscale=BRAND_COLORSCALE,
+ zmin=0, zmax=100,
+ text=z,
+ texttemplate="%{text:.1f}%",
+ textfont=dict(size=13, family="'Inter', sans-serif"),
+ showscale=show_colorbar,
+ colorbar=dict(
+ title=dict(text="Rate (%)No bi-grams shared across all models.
',
+ unsafe_allow_html=True)
+
+ unique_cols = st.columns(len(models))
+ for idx, m in enumerate(models):
+ with unique_cols[idx]:
+ m_color = MODEL_COLORS.get(m, default_color)["line"]
+ unique_list = sorted(unique_phrases.get(m, set()))
+ st.markdown(f"""
+