From 6392111202cacec09ea5cf98dc473c8d738a413f Mon Sep 17 00:00:00 2001 From: Shichao Song <60967965+Ki-Seki@users.noreply.github.com> Date: Sat, 11 Apr 2026 02:07:55 +0800 Subject: [PATCH 1/2] Add GLiNER2 eval results --- .../eval.sh | 6 + ...pt-AI_GIMBench-cv-parse_260411-010729.json | 4323 +++++++++++++++++ 2 files changed, 4329 insertions(+) create mode 100644 results/260411-kdd-rebuttal-cv-gliner2-model/eval.sh create mode 100644 results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json diff --git a/results/260411-kdd-rebuttal-cv-gliner2-model/eval.sh b/results/260411-kdd-rebuttal-cv-gliner2-model/eval.sh new file mode 100644 index 0000000..37a8e50 --- /dev/null +++ b/results/260411-kdd-rebuttal-cv-gliner2-model/eval.sh @@ -0,0 +1,6 @@ +python -m gimbench.cv.cv_parse \ + --use_gliner2 \ + --model_name "fastino/gliner2-large-v1" \ + --model_type "openai" \ + --judge_model_name "google/gemini-2.5-flash" \ + --api_key $API_KEY --base_url $API_BASE diff --git a/results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json b/results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json new file mode 100644 index 0000000..8d657c0 --- /dev/null +++ b/results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json @@ -0,0 +1,4323 @@ +{ + "eval_env": { + "exec_command": "/root/autodl-tmp/GIMBench/.venv/bin/python /root/autodl-tmp/GIMBench/src/gimbench/cv/cv_parse.py --use_gliner2 --model_name fastino/gliner2-large-v1 --model_type openai --judge_model_name google/gemini-2.5-flash --api_key **** --base_url https://openrouter.ai/api/v1", + "gimbench_version": "0.4.0", + "gimbench_file": "/root/autodl-tmp/GIMBench/src/gimbench/__init__.py", + "gimkit_version": "0.1.1", + "gimkit_file": "/root/autodl-tmp/GIMBench/.venv/lib/python3.13/site-packages/gimkit/__init__.py", + "git_repo": "/root/autodl-tmp/GIMBench", + "git_branch": "feat/eval-results", + "git_commit_id": "63f067650c0e8c863445c7c4b89d1ed6b3f8aa75" + }, + "evaluator_type": "cv", + "args": { + "use_gim_prompt": false, + "output_type": null, + "model_type": "openai", + "model_name": "fastino/gliner2-large-v1", + "api_key": "****", + "base_url": "https://openrouter.ai/api/v1", + "max_model_len": 8192, + "temperature": 0.0, + "top_p": 1.0, + "presence_penalty": 1.0, + "max_tokens": 8192, + "seed": 16, + "first_n": -1, + "num_proc": 1, + "output_dir": "results", + "counter_tokenizer": "Qwen/Qwen3-4B-Instruct-2507", + "record_timing": false, + "ref_model_name": "google/gemma-3-270m", + "ref_model_device": "cpu", + "norm_ppl_alpha": 0.2, + "ppl_window_k": 16, + "golden_truth_only": false, + "no_gimkit": false, + "reason_budget": 0, + "auto_budget": false, + "auto_budget_prompt": "I'll show you a couple of questions. Decide how many reasoning steps are needed to answer each accurately.\n\nConsider a plausible reasoning workflow first (you may use reasoning, reflection, trial and error, and parallel thinking by applying different approaches, plus a quick verification if needed). Then output a step budget (where each step is an atomic reasoning action taking 3–5 sentences) that allows for granular, step-by-step derivation without skipping logic, ensuring a robust and high-confidence conclusion;leave extra headroom for cross-checking and possible revision on multi-hop or tricky questions.\n\n## Question: {question}\n\nDo not be anchored by the examples above. Scale your step budget linearly with the difficulty. For complex problems, you are encouraged to assign a high budget (20, or more) to ensure there is enough room for step-by-step derivation and verification.\n\n", + "reason_step_desc": "A distinct, verified reasoning step building on the previous one. Write 2–3 substantial sentences (60–80 words each) to ensure depth.", + "use_outlines": false, + "use_gliner2": true, + "judge_model_name": "google/gemini-2.5-flash", + "num_samples": 1, + "pass_k": [ + 1 + ], + "exec_timeout": 10, + "scierc_split": "test", + "scierc_max_relations": 30, + "dataset": { + "path": "Sculpt-AI/GIMBench-cv-parse", + "name": null, + "split": "test" + } + }, + "start_time": "2026-04-11T01:07:29.376578", + "end_time": "2026-04-11T02:01:23.365790", + "elapsed_minutes": 53.8998202, + "evaled_items": [ + { + "filename": "Nan_Zhang.pdf", + "extraction_details": { + "name": { + "prediction": "Nan Zhang", + "expected": "Nan Zhang", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "American", + "expected": "Germany", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "3 January 1984", + "expected": "1984-01-03", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "phone_number": { + "prediction": "", + "expected": "+49 (0) 621 181 2098", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "B n.zhang@uni-mannheim.de", + "expected": "n.zhang@uni-mannheim.de", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "Mannheim Center for European Social Research", + "expected": "Stanford University", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Political Science", + "expected": "Political Science", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "2021", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2021", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "http://nanzhangresearch.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "http://nanzhangresearch.github.io", + "expected": "https://github.com/nanzhangresearch", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 10, + "num_correct": 4, + "error_msg": "" + }, + { + "filename": "Aurelie_de_Gendre.pdf", + "extraction_details": { + "name": { + "prediction": "", + "expected": "Aurelie de Gendre", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "country": { + "prediction": "France", + "expected": "France", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1990-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "", + "expected": "a.degendre@unimelb.edu.au", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "CREST", + "expected": "Maastricht University", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "", + "expected": "Research Centre for Education and the Labor Market", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Economics", + "expected": "Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "2026", + "expected": "2015-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "", + "expected": "2021-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 10, + "num_correct": 3, + "error_msg": "" + }, + { + "filename": "Aditya_Chaudhry.pdf", + "extraction_details": { + "name": { + "prediction": "Aditya Chaudhry", + "expected": "Aditya Chaudhry", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "United States of America", + "expected": "United States of America", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1996-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": "703 628 9071", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "", + "expected": "chaudhry.127@osu.ed", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "The Ohio State University", + "expected": "University of Chicago", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Finance", + "expected": "Booth School of Business", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Finance", + "expected": "Finance", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "2023", + "expected": "2018-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2023", + "expected": "2023-01-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "homepage_url": { + "prediction": "", + "expected": "https://chaudhryaditya.github.io/", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 12, + "num_correct": 5, + "error_msg": "" + }, + { + "filename": "Alexander_J_Fertig.pdf", + "extraction_details": { + "name": { + "prediction": "Alexander J. Fertig", + "expected": "Alexander J. Fertig", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "", + "expected": "United States", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1988-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "(716) 587-1812", + "expected": "(716) 587-1812", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "email": { + "prediction": "afertig@umich.edu", + "expected": "afertig@umich.edu", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "University of Michigan", + "expected": "University of Michigan", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "department": { + "prediction": "Department of Economics", + "expected": "Department of Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "major": { + "prediction": "Economics", + "expected": "Economics & Public Policy", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "", + "expected": "2019-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2026", + "expected": "2026-01-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "homepage_url": { + "prediction": "alexanderfertig.com", + "expected": "http://alexanderfertig.com", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "github_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 12, + "num_correct": 8, + "error_msg": "" + }, + { + "filename": "Maksim_Zhdanov.pdf", + "extraction_details": { + "name": { + "prediction": "MAKSIM ZHDANOV", + "expected": "Maksim Zhdanov", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "country": { + "prediction": "", + "expected": "Netherlands", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1997-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "email", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "Master", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "university": { + "prediction": "University of Amsterdam", + "expected": "University of Amsterdam", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "department": { + "prediction": "", + "expected": "AMLab", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "hierarchical models", + "expected": "Machine Learning", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "2023", + "expected": "2023-01-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "end_date": { + "prediction": "2027", + "expected": "2027-01-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "homepage_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "github", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 9, + "num_correct": 4, + "error_msg": "" + }, + { + "filename": "Theo_Serlin.pdf", + "extraction_details": { + "name": { + "prediction": "Theo Serlin", + "expected": "Theo Serlin", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "UK", + "expected": "United Kingdom", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1997-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "WC2B 4BG", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "theo.serlin@kcl.ac.uk", + "expected": "theo.serlin@kcl.ac.uk", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "King’s College London", + "expected": "Stanford University", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Department of Political Economy", + "expected": "Department of Political Science", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Political Science", + "expected": "Political Science", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "2025", + "expected": "2019-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2025", + "expected": "2024-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "https://theo-serlin.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "theo-serlin.github.io", + "expected": "https://github.com/theo-serlin", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 12, + "num_correct": 5, + "error_msg": "" + }, + { + "filename": "August_Bruno.pdf", + "extraction_details": { + "name": { + "prediction": "August Bruno", + "expected": "August Bruno", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "United States", + "expected": "United States", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1995-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": "+1 (716) 238-5546", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "abruno@skidmore.edu", + "expected": "abruno@skidmore.edu", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "Skidmore College", + "expected": "University of North Carolina at Chapel Hill", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Department of Economics", + "expected": "Department of Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "major": { + "prediction": "Economics", + "expected": "Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "May 2025", + "expected": "2025-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "", + "expected": "2025-05-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "https://augustbruno.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "https://augustbruno.github.io", + "expected": "https://github.com/augustbruno", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 13, + "num_correct": 6, + "error_msg": "" + }, + { + "filename": "Unknown.pdf", + "extraction_details": { + "name": { + "prediction": "", + "expected": "Amarnath Murugan", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "country": { + "prediction": "", + "expected": "USA", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1997-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "", + "expected": "amarnathmurugan0@gmail.com", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "Master", + "expected": "Master", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "University of U.", + "expected": "University of Utah", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "department": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Computing", + "expected": "Graphics & Visualization", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "", + "expected": "2022-08-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "", + "expected": "2024-05-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 9, + "num_correct": 2, + "error_msg": "" + }, + { + "filename": "Guillermo_A_Perez.pdf", + "extraction_details": { + "name": { + "prediction": "Guillermo A. Perez", + "expected": "Guillermo A. Perez", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "Antwerpen – Belgium", + "expected": "Belgium", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1987-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "+3232653904", + "expected": "+3232653904", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "email": { + "prediction": "guillermo.perez@uantwerpen.be", + "expected": "guillermo.perez@uantwerpen.be", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "University of Antwerp", + "expected": "Université libre de Bruxelles", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Department of Computer Science", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Computer Science", + "expected": "Computer Science", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "2020", + "expected": "2012-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2020", + "expected": "2016-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "www.uantwerpen.be/en/staff/guillermoalberto-perez/", + "expected": "https://www.uantwerpen.be/en/staff/guillermoalberto-perez/", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "github_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 11, + "num_correct": 7, + "error_msg": "" + }, + { + "filename": "Nicholas_Vreugdenhil.pdf", + "extraction_details": { + "name": { + "prediction": "Nicholas Vreugdenhil", + "expected": "Nicholas Vreugdenhil", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "", + "expected": "Australia", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1990-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "nvreugde@asu.edu", + "expected": "nvreugde@asu.edu", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "Arizona State University", + "expected": "Northwestern University", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Economics", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Economics", + "expected": "Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "July 2020", + "expected": "2020-07-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "end_date": { + "prediction": "June 2020", + "expected": "2026-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "https://nvreug.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "https://nvreug.github.ioFieldsIndustrial", + "expected": "https://github.com/nvreug", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 11, + "num_correct": 5, + "error_msg": "" + }, + { + "filename": "Florian_P_Ederer.pdf", + "extraction_details": { + "name": { + "prediction": "Florian P. Ederer", + "expected": "Florian P. Ederer", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "Boston, MA", + "expected": "USA", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1980-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "02215", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "florian.ederer@gmail.com", + "expected": "florian.ederer@gmail.com", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "Boston University", + "expected": "Massachusetts Institute of Technology", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "", + "expected": "Department of Economics", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Economics", + "expected": "Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "2023", + "expected": "2004-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2023", + "expected": "2009-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "https://florianederer.github.io/", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "https://florianederer.github.io/", + "expected": "https://github.com/florianederer", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 12, + "num_correct": 4, + "error_msg": "" + }, + { + "filename": "Ivan_Kartáč.pdf", + "extraction_details": { + "name": { + "prediction": "Ivan Kart´aˇc", + "expected": "Ivan Kartáč", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "country": { + "prediction": "Czechia", + "expected": "Czechia", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1996-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "# kartac@ufal.mff.cuni.cz", + "expected": "kartac@ufal.mff.cuni.cz", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "Master", + "expected": "Master", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "Charles University", + "expected": "Charles University", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "department": { + "prediction": "Faculty of Mathematics and Physics", + "expected": "Faculty of Mathematics and Physics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "major": { + "prediction": "NLP", + "expected": "Language Technologies and Computational Linguistics", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "start_date": { + "prediction": "10/2021", + "expected": "2021-10-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "end_date": { + "prediction": "02/2025", + "expected": "2025-02-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "homepage_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "", + "expected": "https://github.com/ivan-kartac", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 11, + "num_correct": 8, + "error_msg": "" + }, + { + "filename": "Diego_Martínez.pdf", + "extraction_details": { + "name": { + "prediction": "Diego Mart´ınez", + "expected": "Diego Martínez", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "country": { + "prediction": "", + "expected": "Spain", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1992-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "diego.martinez@kuleuven.be", + "expected": "diego.martinez@kuleuven.be", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "", + "expected": "PhD", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "university": { + "prediction": "KU Leuven", + "expected": "Universidad Carlos III de Madrid", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Department of Mathematics", + "expected": "Department of Mathematics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "major": { + "prediction": "Mathematics", + "expected": "Mathematics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "October 2025", + "expected": "2025-10-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "end_date": { + "prediction": "now", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "https://diego-mmg.github.io/", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "https://diego-mmg.github.io/", + "expected": "https://github.com/diego-mmg", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 11, + "num_correct": 5, + "error_msg": "" + }, + { + "filename": "Yuki_Takahashi.pdf", + "extraction_details": { + "name": { + "prediction": "Yuki Takahashi", + "expected": "Yuki Takahashi", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "The Netherlands", + "expected": "The Netherlands", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1987-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "+31 6-4145-8415", + "expected": "+31 6-4145-8415", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "email": { + "prediction": "d.schindler@tilburguniversity.edu", + "expected": "y.takahashi@tilburguniversity.edu", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "Tilburg University", + "expected": "University of Bologna", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Dept. of Economics", + "expected": "Dept. of Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "major": { + "prediction": "Economics", + "expected": "Economics", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "Sept 2023", + "expected": "2017-11-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "Aug 2023", + "expected": "2022-07-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "https://yukitakahashi1.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "https://yukitakahashi1.github.io", + "expected": "https://github.com/yukitakahashi1", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 13, + "num_correct": 6, + "error_msg": "" + }, + { + "filename": "Richard_Cornelius_Suwandi.pdf", + "extraction_details": { + "name": { + "prediction": "Richard Cornelius Suwandi", + "expected": "Richard Cornelius Suwandi", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "", + "expected": "China", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "2001-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "richardsuwandi@link.cuhk.edu.cn", + "expected": "richardsuwandi@link.cuhk.edu.cn", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "The Chinese University of Hong Kong, Shenzhen", + "expected": "The Chinese University of Hong Kong, Shenzhen", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "department": { + "prediction": "", + "expected": "School of Science and Engineering", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Computer and Information Engineering", + "expected": "Computer and Information Engineering", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "Sep 2023", + "expected": "2023-09-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "end_date": { + "prediction": "May 2027", + "expected": "2027-05-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "homepage_url": { + "prediction": "", + "expected": "https://richardcsuwandi.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "richardcsuwandi.github.io", + "expected": "https://github.com/richardcsuwandi", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 12, + "num_correct": 7, + "error_msg": "" + }, + { + "filename": "Wojciech_Anyszka.pdf", + "extraction_details": { + "name": { + "prediction": "Wojciech Anyszka", + "expected": "Wojciech Anyszka", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "Princeton, NJ, USA", + "expected": "USA", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "2001-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "531 333 131", + "expected": "531 333 131", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "email": { + "prediction": "wa8157@princeton.edu", + "expected": "wa8157@princeton.edu", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "Princeton University", + "expected": "Princeton University", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "department": { + "prediction": "Operations Research and Financial Engineering (ORFE)", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "", + "expected": "Operations Research and Financial Engineering", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "Sept 2025", + "expected": "2025-09-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "end_date": { + "prediction": "present", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "homepage_url": { + "prediction": "", + "expected": "https://wanyszka.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "wanyszka.github.io", + "expected": "https://github.com/wanyszka", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 11, + "num_correct": 8, + "error_msg": "" + }, + { + "filename": "A_Combs.pdf", + "extraction_details": { + "name": { + "prediction": "Youkyoung Jeong", + "expected": "A. Combs", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "country": { + "prediction": "", + "expected": "United States", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1987-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "University of Georgia", + "expected": "University of Kentucky", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Department of Public Administration and Policy", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "", + "expected": "Public Policy & Administration", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "", + "expected": "2014-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "", + "expected": "2018-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 8, + "num_correct": 1, + "error_msg": "" + }, + { + "filename": "Elena_Pagnin.pdf", + "extraction_details": { + "name": { + "prediction": "ELENA PAGNIN", + "expected": "Elena Pagnin", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "country": { + "prediction": "Swedish", + "expected": "Italy", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1989-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "0000-0002-7804-6696", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "elenap@chalmers.se", + "expected": "elenap@chalmers.se", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "", + "expected": "PhD", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "university": { + "prediction": "Chalmers University of Technology", + "expected": "Chalmers University of Technology", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "department": { + "prediction": "", + "expected": "Department of Computer Science and Engineering", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Cryptography", + "expected": "Computer Science", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "2022 Oct-now", + "expected": "2014-05-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2022 Apr-Aug", + "expected": "2019-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "epagnin", + "expected": "https://epagnin.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "Google Scholar", + "expected": "https://github.com/epagnin", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 12, + "num_correct": 3, + "error_msg": "" + }, + { + "filename": "Gabriel_Okasa.pdf", + "extraction_details": { + "name": { + "prediction": "Gabriel Okasa", + "expected": "Gabriel Okasa", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "Switzerland", + "expected": "Switzerland", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "birthday": { + "prediction": "", + "expected": "1992-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "+41 78 700 07 63", + "expected": "+41 78 700 07 63", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "email": { + "prediction": "gabriel.okasa@epfl.ch", + "expected": "gabriel.okasa@epfl.ch", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "University of St.Gallen", + "expected": "University of St.Gallen", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "department": { + "prediction": "causal inference and econometrics", + "expected": "School of Economics and Political Science", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Economics and Finance", + "expected": "Economics and Finance", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "start_date": { + "prediction": "February 2017", + "expected": "2017-02-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "end_date": { + "prediction": "February 2022", + "expected": "2022-02-01", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "homepage_url": { + "prediction": "", + "expected": "https://okasag.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "okasag", + "expected": "https://github.com/okasag", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + } + }, + "num_fields": 13, + "num_correct": 10, + "error_msg": "" + }, + { + "filename": "Martin_Souchier.pdf", + "extraction_details": { + "name": { + "prediction": "", + "expected": "Martin Souchier", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "country": { + "prediction": "", + "expected": "USA", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1993-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "souchier@wharton.upenn.edu", + "expected": "souchier@wharton.upenn.edu", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "", + "expected": "Stanford University", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "Finance", + "expected": "Economics", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "2024", + "expected": "2017-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "2024", + "expected": "2023-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "https://www.martinsouchier.com/PRIMARY", + "expected": "http://sites.google.com/site/zarekcb", + "verbatim_correct": false, + "judge_model_correct": true, + "correct": true + }, + "github_url": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 10, + "num_correct": 4, + "error_msg": "" + }, + { + "filename": "Zubin_Jelveh.pdf", + "extraction_details": { + "name": { + "prediction": "Zubin Jelveh", + "expected": "Zubin Jelveh", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "country": { + "prediction": "MD", + "expected": "USA", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "birthday": { + "prediction": "", + "expected": "1977-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "phone_number": { + "prediction": "", + "expected": null, + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "email": { + "prediction": "zjelveh@umd.edu", + "expected": "zjelveh@umd.edu", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "highest_level_degree": { + "prediction": "PhD", + "expected": "PhD", + "verbatim_correct": true, + "judge_model_correct": false, + "correct": true + }, + "university": { + "prediction": "University of Maryland", + "expected": "New York University", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "department": { + "prediction": "Dept. of Criminology and Criminal Justice", + "expected": "Tandon School of Engineering", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "major": { + "prediction": "", + "expected": "Computer Science", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "start_date": { + "prediction": "2021", + "expected": "2011-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "end_date": { + "prediction": "", + "expected": "2017-01-01", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "homepage_url": { + "prediction": "", + "expected": "http://zjelveh.github.io", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + }, + "github_url": { + "prediction": "http://zjelveh.github.io", + "expected": "https://github.com/zjelveh", + "verbatim_correct": false, + "judge_model_correct": false, + "correct": false + } + }, + "num_fields": 12, + "num_correct": 3, + "error_msg": "" + } + ], + "total_fields": 472, + "total_correct": 210, + "errors": 0, + "calibrated_accuracy": 0.4449152542372881 +} \ No newline at end of file From d267019b8bb3da6f3e263b29bf1172f0f2825dee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 18:09:53 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...ner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json b/results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json index 8d657c0..b772fb8 100644 --- a/results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json +++ b/results/260411-kdd-rebuttal-cv-gliner2-model/fastino_gliner2-large-v1_Sculpt-AI_GIMBench-cv-parse_260411-010729.json @@ -4320,4 +4320,4 @@ "total_correct": 210, "errors": 0, "calibrated_accuracy": 0.4449152542372881 -} \ No newline at end of file +}