From 0d705dff3600df0ccf22289656aac3e909c6649a Mon Sep 17 00:00:00 2001 From: Devanshu-gif <23f2004399@ds.study.iitm.ac.in> Date: Wed, 30 Jul 2025 02:22:24 +0530 Subject: [PATCH 1/3] updated results --- app.py | 195 ++++++++++++++++++++++++++++------------- requirements.txt | 4 +- templates/index.html | 55 +++++++++++- templates/results.html | 72 +++++++++++---- 4 files changed, 245 insertions(+), 81 deletions(-) diff --git a/app.py b/app.py index d33013e..4b719c6 100644 --- a/app.py +++ b/app.py @@ -3,21 +3,30 @@ import pandas as pd import xml.etree.ElementTree as ET import re +import requests +import time from flask import Flask, render_template, request, send_from_directory, flash, redirect, url_for from werkzeug.utils import secure_filename +import yaml # --- Configuration --- -# Define the paths for file uploads and processed files. -# It's good practice to use absolute paths. +# IMPORTANT: You must get your own API key from Google AI Studio. +# https://aistudio.google.com/app/apikey +with open("key.yaml", "r") as f: + config = yaml.safe_load(f) +GEMINI_API_KEY = config.get("GEMINI_API_KEY") +GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={GEMINI_API_KEY}" + UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'uploads') PROCESSED_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processed') -ALLOWED_EXTENSIONS = {'txt', 'log', 'json', 'xml'} +# --- UPDATED: Added 'csv' to allowed extensions --- +ALLOWED_EXTENSIONS = {'txt', 'log', 'json', 'xml', 'csv'} # --- App Initialization --- app = Flask(__name__) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['PROCESSED_FOLDER'] = PROCESSED_FOLDER -app.config['SECRET_KEY'] = 'supersecretkey' # Change this in a real application +app.config['SECRET_KEY'] = 'supersecretkey' # --- Helper Functions --- @@ -26,54 +35,60 @@ def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS -def parse_log_file(file_path): - """ - A generic log parser. This is a basic example. - Real-world log files can be very complex. This function tries to extract - common patterns like IP addresses, timestamps, and request methods. - You will likely need to customize this regex for your specific log formats. - """ - # Example Regex: Captures IP, timestamp, request method/path, status, and size. - # This is a common pattern for web server logs (e.g., Apache, Nginx). - log_pattern = re.compile(r'(?P\S+) \S+ \S+ \[(?P.*?)\] "(?P.*?)" (?P\d{3}) (?P\S+)') - data = [] - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - for line in f: - match = log_pattern.match(line) - if match: - data.append(match.groupdict()) - if not data: - # Fallback for unstructured logs: treat each line as a single message. - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - data = [{'message': line.strip()} for line in f] - return pd.DataFrame(data) - -def parse_json_file(file_path): +def get_regex_from_gemini(log_sample): """ - Parses a JSON file. Handles both standard JSON and line-delimited JSON (JSONL). + Sends a sample of the log file to the Gemini API to determine a parsing regex. + This version asks for the raw regex string directly to avoid JSON formatting errors. """ - try: - # Try parsing as a standard JSON array of objects - df = pd.read_json(file_path) - except ValueError: - # If that fails, try parsing as line-delimited JSON - with open(file_path, 'r') as f: - data = [json.loads(line) for line in f] - df = pd.DataFrame(data) - return df - -def parse_xml_file(file_path): + if GEMINI_API_KEY == "YOUR_API_KEY_HERE": + raise ValueError("Gemini API key is not configured. Please set it in your environment or in the script.") + + # --- UPDATED PROMPT --- + # Asks for the raw regex string, not a JSON object. This is more robust. + prompt = f""" + Analyze the following log data sample. Your task is to generate a single Python-compatible regular expression (regex) that can capture the distinct columns. + + Data Sample: + --- + {log_sample} + --- + + Instructions: + 1. Create a Python regex string with named capture groups (e.g., `?P...`). The group names should be concise, descriptive, and in snake_case. These names will become the column headers. + 2. **CRITICAL RULE:** If a timestamp (containing date, time, or both) is present, you MUST capture the entire timestamp in a single group named `timestamp`. Do NOT split it into multiple columns. + 3. The regex MUST account for variable content, like messages that can contain spaces. Use non-greedy matching (`.*?`) for such fields. + 4. The regex should match the entire line from start (`^`) to end (`$`). + 5. **Return ONLY the raw regex string and absolutely nothing else.** Do not wrap it in quotes, markdown, or JSON. + + Example output for a log like "2025-07-27 19:56:34 [INFO] Quality check passed Machine-007 Operator-07": + ^(?P\\d{{4}}-\\d{{2}}-\\d{{2}} \\d{{2}}:\\d{{2}}:\\d{{2}}) \\[(?P\\w+)\\] (?P.*?) (?PMachine-\\d{{3}}) (?POperator-\\d{{2}})$ """ - Parses an XML file. Assumes a structure where the root has many - child elements, and each child is a record. - """ - tree = ET.parse(file_path) - root = tree.getroot() - data = [] - for elem in root: - record = {child.tag: child.text for child in elem} - data.append(record) - return pd.DataFrame(data) + + payload = { + "contents": [{"parts": [{"text": prompt}]}] + } + + headers = {'Content-Type': 'application/json'} + + # --- Retry Logic --- + retries = 3 + backoff_factor = 1 + for i in range(retries): + try: + response = requests.post(GEMINI_API_URL, headers=headers, json=payload, timeout=60) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + if e.response is not None and e.response.status_code == 503 and i < retries - 1: + print(f"Service unavailable, retrying in {backoff_factor} seconds...") + time.sleep(backoff_factor) + backoff_factor *= 2 + continue + else: + raise e + + raise requests.exceptions.RequestException("Failed to get a response from the API after several retries.") + # --- Flask Routes --- @@ -85,15 +100,16 @@ def index(): @app.route('/upload', methods=['POST']) def upload_file(): """Handles the file upload and processing logic.""" + # --- FIXED: Redirect to 'index' on error --- if 'file' not in request.files: flash('No file part') - return redirect(request.url) + return redirect(url_for('index')) file = request.files['file'] if file.filename == '': flash('No selected file') - return redirect(request.url) + return redirect(url_for('index')) if file and allowed_file(file.filename): filename = secure_filename(file.filename) @@ -101,46 +117,99 @@ def upload_file(): file.save(upload_path) try: - # Determine file type and parse accordingly ext = filename.rsplit('.', 1)[1].lower() df = None + if ext in ['log', 'txt']: - df = parse_log_file(upload_path) + with open(upload_path, 'r', encoding='utf-8', errors='ignore') as f: + log_sample = "".join(f.readlines(10)) + + if not log_sample: + flash("File is empty.") + return redirect(url_for('index')) + + # --- NEW ROBUST PARSING --- + gemini_response = get_regex_from_gemini(log_sample) + + # Safely extract the text from the API response + regex_pattern = gemini_response.get('candidates', [{}])[0].get('content', {}).get('parts', [{}])[0].get('text') + + if not regex_pattern: + raise ValueError("LLM did not return a regex pattern.") + + # Clean up potential markdown formatting from the response + if regex_pattern.strip().startswith("```"): + regex_pattern = re.sub(r'```(python)?\n', '', regex_pattern) + regex_pattern = regex_pattern.strip().replace('```', '') + + print("\n--- Generated Regex Pattern ---") + print(regex_pattern.strip()) + print("-----------------------------\n") + + pattern = re.compile(regex_pattern.strip()) + parsed_data = [] + with open(upload_path, 'r', encoding='utf-8', errors='ignore') as f: + for line in f: + match = pattern.match(line.strip()) + if match: + parsed_data.append(match.groupdict()) + + df = pd.DataFrame(parsed_data) + elif ext == 'json': - df = parse_json_file(upload_path) + try: + with open(upload_path, 'r', encoding='utf-8') as f: + json_data = json.load(f) + df = pd.json_normalize(json_data) + except (json.JSONDecodeError, TypeError): + df = pd.read_json(upload_path, lines=True, encoding='utf-8') + elif ext == 'xml': - df = parse_xml_file(upload_path) + tree = ET.parse(upload_path) + root = tree.getroot() + records = [{child.tag: child.text for child in elem} for elem in root] + df = pd.DataFrame(records) + + # --- NEW: Added direct CSV handling --- + elif ext == 'csv': + df = pd.read_csv(upload_path) + if df is None or df.empty: flash('Could not parse the file. The format might be unsupported or the file is empty.') return redirect(url_for('index')) - # Create the output CSV base_filename = filename.rsplit('.', 1)[0] - csv_filename = f"{base_filename}.csv" + csv_filename = f"{base_filename}_processed.csv" # Suffix to avoid name collision csv_path = os.path.join(app.config['PROCESSED_FOLDER'], csv_filename) df.to_csv(csv_path, index=False) - # Redirect to the results page - return render_template('results.html', csv_filename=csv_filename) + preview_df = pd.read_csv(csv_path) + preview_limit = 100 + preview_headers = preview_df.columns.values.tolist() + rows = preview_df.head(preview_limit).values.tolist() + + return render_template('results.html', + csv_filename=csv_filename, + headers=preview_headers, + rows=rows) except Exception as e: - flash(f'An error occurred while processing the file: {e}') + flash(f'An error occurred: {e}') return redirect(url_for('index')) else: + # --- FIXED: Redirect to 'index' on error --- flash('File type not allowed.') - return redirect(request.url) + return redirect(url_for('index')) @app.route('/download/') def download_file(filename): """Serves the processed CSV file for download.""" return send_from_directory(app.config['PROCESSED_FOLDER'], filename, as_attachment=True) - # --- Main Execution --- if __name__ == '__main__': - # Create upload and processed directories if they don't exist os.makedirs(UPLOAD_FOLDER, exist_ok=True) os.makedirs(PROCESSED_FOLDER, exist_ok=True) app.run(debug=True) diff --git a/requirements.txt b/requirements.txt index e5ca5cc..7048bde 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ flask pandas -lxmal \ No newline at end of file +lxmal +requests +PyYAML \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 0ca0dbc..765375c 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,4 +1,3 @@ - @@ -38,7 +37,9 @@

Log Data Processor

-
+ + +

Log Data Processor

.log, .txt, .json, .xml up to 50MB

+ + + +
+ + \ No newline at end of file diff --git a/templates/results.html b/templates/results.html index 8c8c3a2..37ef5e4 100644 --- a/templates/results.html +++ b/templates/results.html @@ -11,27 +11,69 @@ - + -
-
- - - +
+
+
+ + + +
+

Success!

+

Your file has been processed. Preview the first 100 rows below.

-

Success!

-

Your file has been processed and converted to CSV.

- - - Download CSV File - -
- Process another file + + + + +
+
+ + + + {% for header in headers %} + + + {% endfor %} + + + + {% for row in rows %} + + {% for cell in row %} + + + {% endfor %} + + {% endfor %} + +
+ {{ header }} +
+ {{ cell }} +
+
+
- + \ No newline at end of file From 217933bcbe6b85f3501c8b3d88b4a3d50b86177a Mon Sep 17 00:00:00 2001 From: Devanshu-gif <23f2004399@ds.study.iitm.ac.in> Date: Wed, 6 Aug 2025 05:06:54 +0530 Subject: [PATCH 2/3] Generalised log data struture --- app.py | 175 ++++++++++++++++++++++------------------- templates/results.html | 34 ++++++-- 2 files changed, 121 insertions(+), 88 deletions(-) diff --git a/app.py b/app.py index 4b719c6..3d64af1 100644 --- a/app.py +++ b/app.py @@ -5,82 +5,97 @@ import re import requests import time -from flask import Flask, render_template, request, send_from_directory, flash, redirect, url_for -from werkzeug.utils import secure_filename +import io import yaml +from flask import Flask, render_template, request, flash, redirect, url_for +from werkzeug.utils import secure_filename # --- Configuration --- -# IMPORTANT: You must get your own API key from Google AI Studio. -# https://aistudio.google.com/app/apikey +# Reading the key from your local key.yaml file with open("key.yaml", "r") as f: config = yaml.safe_load(f) GEMINI_API_KEY = config.get("GEMINI_API_KEY") + +# Using the correct, powerful model for this task GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={GEMINI_API_KEY}" +# Using local folders for testing UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'uploads') PROCESSED_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processed') -# --- UPDATED: Added 'csv' to allowed extensions --- ALLOWED_EXTENSIONS = {'txt', 'log', 'json', 'xml', 'csv'} +# The canonical, final schema for all logs +GENERIC_SCHEMA = [ + 'timestamp', 'log_level', 'message', 'service_name', + 'host_name', 'trace_id', 'error_details', 'metadata' +] + # --- App Initialization --- app = Flask(__name__) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['PROCESSED_FOLDER'] = PROCESSED_FOLDER -app.config['SECRET_KEY'] = 'supersecretkey' +app.config['SECRET_KEY'] = 'a-very-secret-key-for-local-testing' # --- Helper Functions --- def allowed_file(filename): """Checks if the file's extension is in the ALLOWED_EXTENSIONS set.""" - return '.' in filename and \ - filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def get_regex_from_gemini(log_sample): """ Sends a sample of the log file to the Gemini API to determine a parsing regex. - This version asks for the raw regex string directly to avoid JSON formatting errors. """ - if GEMINI_API_KEY == "YOUR_API_KEY_HERE": - raise ValueError("Gemini API key is not configured. Please set it in your environment or in the script.") + if not GEMINI_API_KEY: + raise ValueError("GEMINI_API_KEY not found in key.yaml.") - # --- UPDATED PROMPT --- - # Asks for the raw regex string, not a JSON object. This is more robust. + # --- UPDATED PROMPT with stricter message definition --- prompt = f""" - Analyze the following log data sample. Your task is to generate a single Python-compatible regular expression (regex) that can capture the distinct columns. - - Data Sample: + You are an expert log data transformation engine. Your task is to analyze a raw log sample and generate a Python-compatible regular expression (regex) that extracts key information and maps it to a predefined generic schema. + + **Generic Log Schema:** + - `timestamp`: The full timestamp of the event. + - `log_level`: The severity of the event (e.g., INFO, WARN, ERROR). + - `message`: The primary, human-readable message. + - `service_name`: The application or service that generated the log. + - `host_name`: The hostname of the machine. + - `trace_id`: A unique identifier for correlating logs. + - `error_details`: Stack trace or detailed error messages. + - `metadata`: A catch-all for any other structured data. + + **Log Sample to Analyze:** --- {log_sample} --- - Instructions: - 1. Create a Python regex string with named capture groups (e.g., `?P...`). The group names should be concise, descriptive, and in snake_case. These names will become the column headers. - 2. **CRITICAL RULE:** If a timestamp (containing date, time, or both) is present, you MUST capture the entire timestamp in a single group named `timestamp`. Do NOT split it into multiple columns. - 3. The regex MUST account for variable content, like messages that can contain spaces. Use non-greedy matching (`.*?`) for such fields. - 4. The regex should match the entire line from start (`^`) to end (`$`). - 5. **Return ONLY the raw regex string and absolutely nothing else.** Do not wrap it in quotes, markdown, or JSON. - - Example output for a log like "2025-07-27 19:56:34 [INFO] Quality check passed Machine-007 Operator-07": - ^(?P\\d{{4}}-\\d{{2}}-\\d{{2}} \\d{{2}}:\\d{{2}}:\\d{{2}}) \\[(?P\\w+)\\] (?P.*?) (?PMachine-\\d{{3}}) (?POperator-\\d{{2}})$ - """ - - payload = { - "contents": [{"parts": [{"text": prompt}]}] - } - + **Your Instructions:** + 1. Create a single Python regex with named capture groups (e.g., `?P...`). + 2. The name of each capture group **MUST** match a key from the **Generic Log Schema** if possible. Use the following mapping rules: + - For a unique identifier (like a UUID, `correlation_id`, `request_id`), name the capture group `trace_id`. + - For an exception or stack trace (like `exception_details`, `stack_trace`), name the capture group `error_details`. + - For a service or application name, use `service_name`. + - For a server or machine name, use `host_name`. + 3. If a field from the log does not logically map to any standard schema key, create a capture group with a descriptive, snake_case name (e.g., `machine_id`, `user_id`). The Python code will handle putting these into the `metadata` field. + 4. **CRITICAL RULE:** The `timestamp` group must capture the entire date and time component as one field. + 5. **CRITICAL RULE for `message`:** The `message` is the human-readable text that typically follows the timestamp and log level but comes BEFORE any other structured key-value pairs or identifiers. + 6. The regex should match the entire line from start (`^`) to end (`$`). + 7. **Return ONLY the raw regex string and absolutely nothing else.** + + **Example Output for a log like "2025-08-05 17:15:21 | WARNING | Profile updated | email-dispatcher | host-02 | a870-d0a6 | TimeoutException | ...":** + ^(?P.*?)\\s*\\|\\s*(?P\\w+)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P{{.*}})$ + """ + payload = {"contents": [{"parts": [{"text": prompt}]}]} headers = {'Content-Type': 'application/json'} - # --- Retry Logic --- retries = 3 - backoff_factor = 1 + backoff_factor = 2 for i in range(retries): try: - response = requests.post(GEMINI_API_URL, headers=headers, json=payload, timeout=60) + response = requests.post(GEMINI_API_URL, headers=headers, json=payload, timeout=90) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: - if e.response is not None and e.response.status_code == 503 and i < retries - 1: - print(f"Service unavailable, retrying in {backoff_factor} seconds...") + if e.response is not None and e.response.status_code in [503, 429] and i < retries - 1: time.sleep(backoff_factor) backoff_factor *= 2 continue @@ -89,7 +104,6 @@ def get_regex_from_gemini(log_sample): raise requests.exceptions.RequestException("Failed to get a response from the API after several retries.") - # --- Flask Routes --- @app.route('/', methods=['GET']) @@ -100,7 +114,6 @@ def index(): @app.route('/upload', methods=['POST']) def upload_file(): """Handles the file upload and processing logic.""" - # --- FIXED: Redirect to 'index' on error --- if 'file' not in request.files: flash('No file part') return redirect(url_for('index')) @@ -112,32 +125,34 @@ def upload_file(): return redirect(url_for('index')) if file and allowed_file(file.filename): - filename = secure_filename(file.filename) - upload_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) - file.save(upload_path) - try: + filename = secure_filename(file.filename) + upload_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) + file.save(upload_path) + + with open(upload_path, 'r', encoding='utf-8', errors='ignore') as f: + file_content = f.read() + + if not file_content: + flash("The uploaded file appears to be empty.") + return redirect(url_for('index')) + ext = filename.rsplit('.', 1)[1].lower() df = None if ext in ['log', 'txt']: - with open(upload_path, 'r', encoding='utf-8', errors='ignore') as f: - log_sample = "".join(f.readlines(10)) + log_sample = "\n".join(file_content.splitlines()[:10]) if not log_sample: flash("File is empty.") return redirect(url_for('index')) - # --- NEW ROBUST PARSING --- gemini_response = get_regex_from_gemini(log_sample) - - # Safely extract the text from the API response regex_pattern = gemini_response.get('candidates', [{}])[0].get('content', {}).get('parts', [{}])[0].get('text') if not regex_pattern: raise ValueError("LLM did not return a regex pattern.") - - # Clean up potential markdown formatting from the response + if regex_pattern.strip().startswith("```"): regex_pattern = re.sub(r'```(python)?\n', '', regex_pattern) regex_pattern = regex_pattern.strip().replace('```', '') @@ -147,68 +162,62 @@ def upload_file(): print("-----------------------------\n") pattern = re.compile(regex_pattern.strip()) - parsed_data = [] - with open(upload_path, 'r', encoding='utf-8', errors='ignore') as f: - for line in f: - match = pattern.match(line.strip()) - if match: - parsed_data.append(match.groupdict()) - + parsed_data = [match.groupdict() for line in file_content.splitlines() if (match := pattern.match(line.strip()))] df = pd.DataFrame(parsed_data) + if df.empty: + print("AI regex parsing failed. Using fallback method.") + flash("AI parsing could not find a pattern. Displaying raw lines instead.") + parsed_data = [{'message': line} for line in file_content.splitlines()] + df = pd.DataFrame(parsed_data) + elif ext == 'json': - try: - with open(upload_path, 'r', encoding='utf-8') as f: - json_data = json.load(f) - df = pd.json_normalize(json_data) - except (json.JSONDecodeError, TypeError): - df = pd.read_json(upload_path, lines=True, encoding='utf-8') + df = pd.read_json(upload_path) elif ext == 'xml': tree = ET.parse(upload_path) - root = tree.getroot() - records = [{child.tag: child.text for child in elem} for elem in root] + records = [{child.tag: child.text for child in elem} for elem in tree.getroot()] df = pd.DataFrame(records) - # --- NEW: Added direct CSV handling --- elif ext == 'csv': df = pd.read_csv(upload_path) - if df is None or df.empty: - flash('Could not parse the file. The format might be unsupported or the file is empty.') + flash('Parsing failed. The application could not structure the data.') return redirect(url_for('index')) - base_filename = filename.rsplit('.', 1)[0] - csv_filename = f"{base_filename}_processed.csv" # Suffix to avoid name collision - csv_path = os.path.join(app.config['PROCESSED_FOLDER'], csv_filename) - df.to_csv(csv_path, index=False) + metadata_cols = [col for col in df.columns if col not in GENERIC_SCHEMA] + + if metadata_cols: + df['metadata'] = df[metadata_cols].apply( + lambda row: row.to_json(), axis=1 + ) + df = df.drop(columns=metadata_cols) + + df = df.reindex(columns=GENERIC_SCHEMA) + + csv_string = df.to_csv(index=False) + + csv_filename = f"{filename.rsplit('.', 1)[0]}_processed.csv" - preview_df = pd.read_csv(csv_path) - preview_limit = 100 - preview_headers = preview_df.columns.values.tolist() - rows = preview_df.head(preview_limit).values.tolist() + preview_headers = df.columns.values.tolist() + rows = df.head(100).fillna('').values.tolist() return render_template('results.html', csv_filename=csv_filename, headers=preview_headers, - rows=rows) + rows=rows, + full_csv_data=csv_string) except Exception as e: + print(f"An unexpected error occurred: {e}") flash(f'An error occurred: {e}') return redirect(url_for('index')) else: - # --- FIXED: Redirect to 'index' on error --- flash('File type not allowed.') return redirect(url_for('index')) -@app.route('/download/') -def download_file(filename): - """Serves the processed CSV file for download.""" - return send_from_directory(app.config['PROCESSED_FOLDER'], filename, as_attachment=True) - -# --- Main Execution --- if __name__ == '__main__': os.makedirs(UPLOAD_FOLDER, exist_ok=True) os.makedirs(PROCESSED_FOLDER, exist_ok=True) diff --git a/templates/results.html b/templates/results.html index 37ef5e4..784b741 100644 --- a/templates/results.html +++ b/templates/results.html @@ -35,9 +35,10 @@

Success!

- + + Process Another File @@ -61,9 +62,13 @@

Success!

{% for row in rows %} {% for cell in row %} - - {{ cell }} + + {% if cell == '' or cell is none %} + null + {% else %} + {{ cell }} + {% endif %} {% endfor %} @@ -72,8 +77,27 @@

Success!

-
+ + + \ No newline at end of file From 232a50eca40f100954935d918aed9fef23d5fb84 Mon Sep 17 00:00:00 2001 From: Devanshu-gif <23f2004399@ds.study.iitm.ac.in> Date: Wed, 6 Aug 2025 06:03:31 +0530 Subject: [PATCH 3/3] some issues resolved --- app.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 3d64af1..cafd1ad 100644 --- a/app.py +++ b/app.py @@ -71,7 +71,7 @@ def get_regex_from_gemini(log_sample): **Your Instructions:** 1. Create a single Python regex with named capture groups (e.g., `?P...`). 2. The name of each capture group **MUST** match a key from the **Generic Log Schema** if possible. Use the following mapping rules: - - For a unique identifier (like a UUID, `correlation_id`, `request_id`), name the capture group `trace_id`. + - For a unique identifier (like a UUID, `correlation_id`, `request_id`, `tid`, `euid`), name the capture group `trace_id`. - For an exception or stack trace (like `exception_details`, `stack_trace`), name the capture group `error_details`. - For a service or application name, use `service_name`. - For a server or machine name, use `host_name`. @@ -81,8 +81,12 @@ def get_regex_from_gemini(log_sample): 6. The regex should match the entire line from start (`^`) to end (`$`). 7. **Return ONLY the raw regex string and absolutely nothing else.** - **Example Output for a log like "2025-08-05 17:15:21 | WARNING | Profile updated | email-dispatcher | host-02 | a870-d0a6 | TimeoutException | ...":** + **Example 1: For a complex, pipe-delimited log like "2025-08-05 17:15:21 | WARNING | Profile updated | email-dispatcher | host-02 | a870-d0a6 | TimeoutException | ...":** ^(?P.*?)\\s*\\|\\s*(?P\\w+)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P.*?)\\s*\\|\\s*(?P{{.*}})$ + + **Example 2: For a standard log like "[Sun Dec 04 04:51:18 2005] [error] mod_jk child workerEnv in error state 6":** + Your output should be a regex like: + ^\\[(?P.*?)\\] \\[(?P\\w+)\\] (?P.*)$ """ payload = {"contents": [{"parts": [{"text": prompt}]}]} headers = {'Content-Type': 'application/json'}