From 47a0d90fbd05f829c097319696370dfa24793dfe Mon Sep 17 00:00:00 2001 From: David Date: Sat, 18 Jan 2025 18:48:31 -0800 Subject: [PATCH 1/2] assemble support for pandas --- Makefile | 2 +- examples/kafka/s2_producer.py | 1 + examples/kafka/s4_producer_v2.py | 1 + examples/tasks/assemble/assemble.py | 34 +++++++++++++++++- llmint/assemble/pandas/__init__.py | 1 + llmint/assemble/pandas/function.py | 36 +++++++++++++++++++ llmint/assemble/pandas/transform/__init__.py | 12 +++++++ .../pandas/transform/field/__init__.py | 0 llmint/assemble/pandas/transform/field/add.py | 10 ++++++ .../assemble/pandas/transform/field/cast.py | 7 ++++ .../assemble/pandas/transform/field/copy.py | 8 +++++ .../pandas/transform/field/default.py | 10 ++++++ .../assemble/pandas/transform/field/delete.py | 7 ++++ .../pandas/transform/field/missing.py | 7 ++++ .../assemble/pandas/transform/field/rename.py | 7 ++++ .../pandas/transform/value/__init__.py | 0 .../assemble/pandas/transform/value/apply.py | 16 +++++++++ .../assemble/pandas/transform/value/link.py | 7 ++++ .../assemble/pandas/transform/value/scale.py | 13 +++++++ .../assemble/pandas/transform/value/shift.py | 13 +++++++ llmint/core/eval.py | 21 +++++------ llmint/map/function.py | 21 +++++++++-- llmint/map/transform/field/add.py | 9 +++-- llmint/map/transform/field/cast.py | 9 +++-- llmint/map/transform/field/copy.py | 9 +++-- llmint/map/transform/field/default.py | 10 ++++-- llmint/map/transform/field/delete.py | 9 +++-- llmint/map/transform/field/missing.py | 9 +++-- llmint/map/transform/field/rename.py | 10 ++++-- llmint/map/transform/value/apply.py | 22 ++++++------ llmint/map/transform/value/link.py | 11 +++--- llmint/map/transform/value/scale.py | 10 ++++-- llmint/map/transform/value/shift.py | 9 +++-- 33 files changed, 302 insertions(+), 49 deletions(-) create mode 100644 llmint/assemble/pandas/__init__.py create mode 100644 llmint/assemble/pandas/function.py create mode 100644 llmint/assemble/pandas/transform/__init__.py create mode 100644 llmint/assemble/pandas/transform/field/__init__.py create mode 100644 llmint/assemble/pandas/transform/field/add.py create mode 100644 llmint/assemble/pandas/transform/field/cast.py create mode 100644 llmint/assemble/pandas/transform/field/copy.py create mode 100644 llmint/assemble/pandas/transform/field/default.py create mode 100644 llmint/assemble/pandas/transform/field/delete.py create mode 100644 llmint/assemble/pandas/transform/field/missing.py create mode 100644 llmint/assemble/pandas/transform/field/rename.py create mode 100644 llmint/assemble/pandas/transform/value/__init__.py create mode 100644 llmint/assemble/pandas/transform/value/apply.py create mode 100644 llmint/assemble/pandas/transform/value/link.py create mode 100644 llmint/assemble/pandas/transform/value/scale.py create mode 100644 llmint/assemble/pandas/transform/value/shift.py diff --git a/Makefile b/Makefile index 7da49bc..9ec0344 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ install: .PHONY: run map map: - python examples/map.py + python examples/tasks/map/map.py run: map .PHONY: benchmark_match, benchmark_mapper, test diff --git a/examples/kafka/s2_producer.py b/examples/kafka/s2_producer.py index db10b62..4276296 100644 --- a/examples/kafka/s2_producer.py +++ b/examples/kafka/s2_producer.py @@ -23,6 +23,7 @@ # User data to be sent user_data = {"name": "John Doe", "age": 28} +print("User:", user_data) # Produce message producer.produce(topic='user-info', key=str(user_data['name']), value=user_data) diff --git a/examples/kafka/s4_producer_v2.py b/examples/kafka/s4_producer_v2.py index c97a355..3a6d6b8 100644 --- a/examples/kafka/s4_producer_v2.py +++ b/examples/kafka/s4_producer_v2.py @@ -18,6 +18,7 @@ # Updated user data to be sent with the new schema user_data = {"name": "Jane Doe", "age": 27, "email": "janedoe@example.com"} +print("User:", user_data) # Produce message producer.produce(topic='user-info', key=str(user_data['name']), value=user_data) diff --git a/examples/tasks/assemble/assemble.py b/examples/tasks/assemble/assemble.py index 2b5256e..ba84c21 100644 --- a/examples/tasks/assemble/assemble.py +++ b/examples/tasks/assemble/assemble.py @@ -1,8 +1,40 @@ +import pandas as pd + import llmint +from llmint.assemble.pandas import assemble def main(): - llmint.assemble() + source_schema = ''' + { + "fields": [ + {"name": "Fname", "type": "string"}, + {"name": "Lname", "type": "string"}, + {"name": "Age", "type": "int"}, + {"name": "Email", "type": ["null", "string"], "default": null} + ] + } + ''' + target_schema = ''' + { + "fields": [ + {"name": "name", "type": "string"}, + {"name": "age", "type": "int"}, + {"name": "email", "type": ["null", "string"], "default": null} + ] + } + ''' + + + source_df = pd.DataFrame([{"Fname": "Josh", "Lname": "Doe", "Age": 31, "Email": "joshdoe@example.com"}]) + dest_df = pd.DataFrame([{"name": "Jane Doe", "age": 27, "email": "janedoe@example.com"}]) + print("Source:", source_df, sep="\n") + print("Dest:", dest_df, sep="\n") + + mappings = llmint.map(source_df, dest_df) + output = assemble(source_df, mappings) + combined_df = pd.concat([dest_df, output], axis=0) + print("Combined:", combined_df, sep="\n") if __name__ == "__main__": diff --git a/llmint/assemble/pandas/__init__.py b/llmint/assemble/pandas/__init__.py new file mode 100644 index 0000000..e3600db --- /dev/null +++ b/llmint/assemble/pandas/__init__.py @@ -0,0 +1 @@ +from llmint.assemble.pandas.function import assemble diff --git a/llmint/assemble/pandas/function.py b/llmint/assemble/pandas/function.py new file mode 100644 index 0000000..fddbc2f --- /dev/null +++ b/llmint/assemble/pandas/function.py @@ -0,0 +1,36 @@ +import pandas as pd + +from llmint.assemble.pandas.transform import ( + add, cast, copy, default, delete, missing, rename, apply, link, scale, shift +) +from llmint.map.function import Map + +def assemble(df: pd.DataFrame, mappings: list[Map]): + df_outputs = [] + + for mapping in mappings: + match mapping.transformation.split(' ')[0]: + case 'ADD': + df_outputs.append(add(df, mapping)) + case 'CAST': + df_outputs.append(cast(df, mapping)) + case 'COPY': + df_outputs.append(copy(df, mapping)) + case 'DEFAULT': + df_outputs.append(default(df, mapping)) + case 'DELETE': + df_outputs.append(delete(df, mapping)) + case 'MISSING': + df_outputs.append(missing(df, mapping)) + case 'RENAME': + df_outputs.append(rename(df, mapping)) + case 'APPLY': + df_outputs.append(apply(df, mapping)) + case 'LINK': + df_outputs.append(link(df, mapping)) + case 'SCALE': + df_outputs.append(scale(df, mapping)) + case 'SHIFT': + df_outputs.append(shift(df, mapping)) + + return pd.concat(df_outputs, axis=1) diff --git a/llmint/assemble/pandas/transform/__init__.py b/llmint/assemble/pandas/transform/__init__.py new file mode 100644 index 0000000..f668db7 --- /dev/null +++ b/llmint/assemble/pandas/transform/__init__.py @@ -0,0 +1,12 @@ +from llmint.assemble.pandas.transform.field.add import func as add +from llmint.assemble.pandas.transform.field.cast import func as cast +from llmint.assemble.pandas.transform.field.copy import func as copy +from llmint.assemble.pandas.transform.field.default import func as default +from llmint.assemble.pandas.transform.field.delete import func as delete +from llmint.assemble.pandas.transform.field.missing import func as missing +from llmint.assemble.pandas.transform.field.rename import func as rename + +from llmint.assemble.pandas.transform.value.apply import func as apply +from llmint.assemble.pandas.transform.value.link import func as link +from llmint.assemble.pandas.transform.value.scale import func as scale +from llmint.assemble.pandas.transform.value.shift import func as shift diff --git a/llmint/assemble/pandas/transform/field/__init__.py b/llmint/assemble/pandas/transform/field/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/llmint/assemble/pandas/transform/field/add.py b/llmint/assemble/pandas/transform/field/add.py new file mode 100644 index 0000000..a7843b2 --- /dev/null +++ b/llmint/assemble/pandas/transform/field/add.py @@ -0,0 +1,10 @@ +import re +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + col_type = re.search(r'TYPE (\w+)', mapping.transformation).group(1) + + return Series([], name=mapping.target_field, dtype=col_type) diff --git a/llmint/assemble/pandas/transform/field/cast.py b/llmint/assemble/pandas/transform/field/cast.py new file mode 100644 index 0000000..2217de4 --- /dev/null +++ b/llmint/assemble/pandas/transform/field/cast.py @@ -0,0 +1,7 @@ +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + pass diff --git a/llmint/assemble/pandas/transform/field/copy.py b/llmint/assemble/pandas/transform/field/copy.py new file mode 100644 index 0000000..a78009c --- /dev/null +++ b/llmint/assemble/pandas/transform/field/copy.py @@ -0,0 +1,8 @@ +import re +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + return Series(df[mapping.source_field], name=mapping.target_field) diff --git a/llmint/assemble/pandas/transform/field/default.py b/llmint/assemble/pandas/transform/field/default.py new file mode 100644 index 0000000..ed0d2e0 --- /dev/null +++ b/llmint/assemble/pandas/transform/field/default.py @@ -0,0 +1,10 @@ +import re +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + default_val = re.search(r'DEFAULT TO (.*)', mapping.transformation).group(1) + + return Series([default_val] * len(df), name=mapping.target_field) diff --git a/llmint/assemble/pandas/transform/field/delete.py b/llmint/assemble/pandas/transform/field/delete.py new file mode 100644 index 0000000..2217de4 --- /dev/null +++ b/llmint/assemble/pandas/transform/field/delete.py @@ -0,0 +1,7 @@ +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + pass diff --git a/llmint/assemble/pandas/transform/field/missing.py b/llmint/assemble/pandas/transform/field/missing.py new file mode 100644 index 0000000..2217de4 --- /dev/null +++ b/llmint/assemble/pandas/transform/field/missing.py @@ -0,0 +1,7 @@ +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + pass diff --git a/llmint/assemble/pandas/transform/field/rename.py b/llmint/assemble/pandas/transform/field/rename.py new file mode 100644 index 0000000..2217de4 --- /dev/null +++ b/llmint/assemble/pandas/transform/field/rename.py @@ -0,0 +1,7 @@ +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + pass diff --git a/llmint/assemble/pandas/transform/value/__init__.py b/llmint/assemble/pandas/transform/value/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/llmint/assemble/pandas/transform/value/apply.py b/llmint/assemble/pandas/transform/value/apply.py new file mode 100644 index 0000000..f831fab --- /dev/null +++ b/llmint/assemble/pandas/transform/value/apply.py @@ -0,0 +1,16 @@ +import re +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + apply_func = re.search(r'APPLY (.*)', mapping.transformation).group(1) + + # assign all columns to their own variables + for col in df.columns: + exec(f'{col.replace(" ", "_")} = df[col]', locals(), globals()) + + exec(f'_output = {apply_func}', locals(), globals()) + + return Series(_output, name=mapping.target_field) diff --git a/llmint/assemble/pandas/transform/value/link.py b/llmint/assemble/pandas/transform/value/link.py new file mode 100644 index 0000000..2217de4 --- /dev/null +++ b/llmint/assemble/pandas/transform/value/link.py @@ -0,0 +1,7 @@ +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + pass diff --git a/llmint/assemble/pandas/transform/value/scale.py b/llmint/assemble/pandas/transform/value/scale.py new file mode 100644 index 0000000..72cfa3a --- /dev/null +++ b/llmint/assemble/pandas/transform/value/scale.py @@ -0,0 +1,13 @@ +import re +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + try: + scale = float(re.search(r'SCALE BY (\d*.\d*)', mapping.transformation).group(1)) + except ValueError: + return df[mapping.source_field].copy() + + return df[mapping.source_field] * scale diff --git a/llmint/assemble/pandas/transform/value/shift.py b/llmint/assemble/pandas/transform/value/shift.py new file mode 100644 index 0000000..abd1a3f --- /dev/null +++ b/llmint/assemble/pandas/transform/value/shift.py @@ -0,0 +1,13 @@ +import re +from pandas import Series, DataFrame + +from llmint.map.function import Map + + +def func(df: DataFrame, mapping: Map): + try: + shift = float(re.search(r'SHIFT BY (\d*.\d*)', mapping.transformation).group(1)) + except ValueError: + return df[mapping.source_field].copy() + + return df[mapping.source_field] + shift diff --git a/llmint/core/eval.py b/llmint/core/eval.py index 835a966..5a1ba4d 100644 --- a/llmint/core/eval.py +++ b/llmint/core/eval.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + class pcolors: RIGHT = '\033[92m' WRONG = '\033[91m' @@ -43,13 +46,11 @@ def accuracy(output: list, test_example: list): f1 = 0 return precision, recall, f1 -def print_mappings(mappings: dict, include_reasoning=True): - for name, response in mappings.items(): - mapping, reasoning = response - if include_reasoning: - - print(pcolors.RIGHT + mapping + pcolors.ENDC + '\n', - reasoning, flush=True) - else: - print(pcolors.RIGHT + mapping + pcolors.ENDC, - flush=True) +def print_mappings(mappings: list[Map], include_reasoning=True): + for mapping in mappings: + if include_reasoning: + print(pcolors.RIGHT + mapping.__dict__ + pcolors.ENDC + '\n', + mapping.reasoning, flush=True) + else: + print(pcolors.RIGHT + mapping.__dict__ + pcolors.ENDC, + flush=True) diff --git a/llmint/map/function.py b/llmint/map/function.py index 77456d5..1b0d686 100644 --- a/llmint/map/function.py +++ b/llmint/map/function.py @@ -1,8 +1,18 @@ +from pydantic import BaseModel + from llmint.core import model from llmint.map import prompt, parameter + +class Map(BaseModel): + source_field: str | None + target_field: str + transformation: str + reasoning: str | None + + def map(source_schema, target_schema): - mappings = model.call( + output = model.call( prompt=[ {"role": "system", "content": prompt.system}, {"role": "user", "content": prompt.user.format( @@ -15,5 +25,12 @@ def map(source_schema, target_schema): temperature=parameter.temperature, seed=parameter.seed, max_model_call=1, # only one model call - )["tool_outputs"][0] # take the first tool output + )["tool_outputs"] + + # process the mappings + mappings = [] + for mapping in output: + for _, mapping in mapping.items(): + mappings.append(mapping) + return mappings diff --git a/llmint/map/transform/field/add.py b/llmint/map/transform/field/add.py index 6eeb60a..3dced81 100644 --- a/llmint/map/transform/field/add.py +++ b/llmint/map/transform/field/add.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "ADD" schema = { "type": "function", @@ -27,5 +30,7 @@ def func(target_field, field_type, reasoning): - return (f'{{from: None, to: {target_field}, ' - f'transformation: ADD {target_field} TYPE {field_type}}}', reasoning) + return Map(source_field=None, + target_field=target_field, + transformation=f'ADD {target_field} TYPE {field_type}', + reasoning=reasoning) diff --git a/llmint/map/transform/field/cast.py b/llmint/map/transform/field/cast.py index 2ecbf03..4512815 100644 --- a/llmint/map/transform/field/cast.py +++ b/llmint/map/transform/field/cast.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "CAST" schema = { "type": "function", @@ -35,5 +38,7 @@ def func(source_field, target_field, source_type, target_type, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: CAST {source_field} FROM {source_type} TO {target_type}}}', reasoning) + return Map(source_field=source_field, + target_field=target_field, + transformation=f'CAST FROM {source_type} TO {target_type}', + reasoning=reasoning) diff --git a/llmint/map/transform/field/copy.py b/llmint/map/transform/field/copy.py index 0f84d8a..7ba4764 100644 --- a/llmint/map/transform/field/copy.py +++ b/llmint/map/transform/field/copy.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "COPY" schema = { "type": "function", @@ -28,5 +31,7 @@ def func(source_field, target_field, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: COPY}}', reasoning) + return Map(source_field=source_field, + target_field=target_field, + transformation=f'COPY', + reasoning=reasoning) diff --git a/llmint/map/transform/field/default.py b/llmint/map/transform/field/default.py index 8d32726..65035ce 100644 --- a/llmint/map/transform/field/default.py +++ b/llmint/map/transform/field/default.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "DEFAULT" schema = { "type": "function", @@ -31,6 +34,7 @@ def func(source_field, target_field, default_value, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: DEFAULT {target_field} TO {default_value}}}', reasoning) + return Map(source_field=source_field, + target_field=target_field, + transformation=f'DEFAULT TO {default_value}', + reasoning=reasoning) diff --git a/llmint/map/transform/field/delete.py b/llmint/map/transform/field/delete.py index 15c3eef..45146a4 100644 --- a/llmint/map/transform/field/delete.py +++ b/llmint/map/transform/field/delete.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "DELETE" schema = { "type": "function", @@ -23,5 +26,7 @@ def func(source_field, reasoning): - return (f'{{from: {source_field}, to: None, ' - f'transformation: DELETE {source_field}}}', reasoning) + return Map(source_field=source_field, + target_field=None, + transformation=f'DELETE', + reasoning=reasoning) diff --git a/llmint/map/transform/field/missing.py b/llmint/map/transform/field/missing.py index e0d30bc..ab43f17 100644 --- a/llmint/map/transform/field/missing.py +++ b/llmint/map/transform/field/missing.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "MISSING" schema = { "type": "function", @@ -23,5 +26,7 @@ def func(target_field, reasoning): - return (f'{{from: None, to: {target_field}, ' - f'transformation: MISSING {target_field}}}', reasoning) + return Map(source_field=None, + target_field=target_field, + transformation=f'MISSING', + reasoning=reasoning) diff --git a/llmint/map/transform/field/rename.py b/llmint/map/transform/field/rename.py index 939d3fc..18ee733 100644 --- a/llmint/map/transform/field/rename.py +++ b/llmint/map/transform/field/rename.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "RENAME" schema = { "type": "function", @@ -27,6 +30,7 @@ def func(source_field, target_field, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: RENAME {source_field} TO {target_field}}}', reasoning) + return Map(source_field=source_field, + target_field=target_field, + transformation=f'RENAME TO {target_field}', + reasoning=reasoning) diff --git a/llmint/map/transform/value/apply.py b/llmint/map/transform/value/apply.py index 5589aa4..e8ffd14 100644 --- a/llmint/map/transform/value/apply.py +++ b/llmint/map/transform/value/apply.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "APPLY" schema = { "type": "function", @@ -7,29 +10,28 @@ "parameters": { "type": "object", "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, "target_field": { "type": "string", "description": "Field from the target schema", }, - "function_name": { + "function": { "type": "string", - "description": "Function to apply", + "description": "An expression involving source schema field(s) to apply, " + "replace any spaces in the schema fields with underscores", }, "reasoning": { "type": "string", "description": "In-depth reasoning as to why you chose this function", }, }, - "required": ["source_field", "target_field", "function_name", "reasoning"], + "required": ["target_field", "function", "reasoning"], }, } } -def func(source_field, target_field, function_name, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: APPLY {source_field} {function_name}}}', reasoning) +def func(target_field, function, reasoning): + return Map(source_field=None, + target_field=target_field, + transformation=f'APPLY {function}', + reasoning=reasoning) diff --git a/llmint/map/transform/value/link.py b/llmint/map/transform/value/link.py index 33c19b8..b00f0e7 100644 --- a/llmint/map/transform/value/link.py +++ b/llmint/map/transform/value/link.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "LINK" schema = { "type": "function", @@ -34,7 +37,7 @@ } def func(source_field, target_field, old_value, new_value, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: LINK {source_field} "{old_value}" TO "{new_value}"}}', - reasoning) \ No newline at end of file + return Map(source_field=source_field, + target_field=target_field, + transformation=f'LINK {source_field} "{old_value}" TO "{new_value}"', + reasoning=reasoning) \ No newline at end of file diff --git a/llmint/map/transform/value/scale.py b/llmint/map/transform/value/scale.py index 97b57ce..f90f546 100644 --- a/llmint/map/transform/value/scale.py +++ b/llmint/map/transform/value/scale.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "SCALE" schema = { "type": "function", @@ -31,6 +34,7 @@ def func(source_field, target_field, factor, reasoning): - return ( - f'{{from: {source_field}, to: {target_field}, ' - f'transformation: SCALE {source_field} BY {factor}}}', reasoning) + return Map(source_field=source_field, + target_field=target_field, + transformation=f'SCALE BY {factor}', + reasoning=reasoning) diff --git a/llmint/map/transform/value/shift.py b/llmint/map/transform/value/shift.py index 9e8da38..da62dd0 100644 --- a/llmint/map/transform/value/shift.py +++ b/llmint/map/transform/value/shift.py @@ -1,3 +1,6 @@ +from llmint.map.function import Map + + name = "SHIFT" schema = { "type": "function", @@ -31,5 +34,7 @@ def func(source_field, target_field, value, reasoning): - return (f'{{from: {source_field}, to: {target_field}, ' - f'transformation: SHIFT {source_field} BY {value}}}', reasoning) + return Map(source_field=source_field, + target_field=target_field, + transformation=f'SHIFT BY {value}', + reasoning=reasoning) From 4357bb66893c1d8f2c502ad000472c24c7c1621e Mon Sep 17 00:00:00 2001 From: David Date: Mon, 27 Jan 2025 14:05:37 -0800 Subject: [PATCH 2/2] add parameter to control reasoning when map and refactor pandas assemble --- examples/tasks/assemble/assemble.py | 11 +++-- llmint/assemble/pandas/__init__.py | 2 +- llmint/assemble/pandas/function.py | 41 ++++++++-------- llmint/assemble/pandas/transform/__init__.py | 4 -- llmint/assemble/pandas/transform/field/add.py | 6 +-- .../assemble/pandas/transform/field/cast.py | 7 --- .../assemble/pandas/transform/field/copy.py | 7 ++- .../pandas/transform/field/default.py | 6 +-- .../assemble/pandas/transform/field/delete.py | 7 --- .../pandas/transform/field/missing.py | 6 +-- .../assemble/pandas/transform/field/rename.py | 7 --- .../assemble/pandas/transform/value/apply.py | 15 +++--- .../assemble/pandas/transform/value/link.py | 7 --- .../assemble/pandas/transform/value/scale.py | 7 ++- .../assemble/pandas/transform/value/shift.py | 7 ++- llmint/map/parameter.py | 10 ++-- llmint/map/prompt.py | 3 ++ llmint/map/transform/field/add.py | 37 ++++++++------- llmint/map/transform/field/copy.py | 39 +++++++-------- llmint/map/transform/field/default.py | 42 ++++++++--------- llmint/map/transform/field/delete.py | 32 +++++++------ llmint/map/transform/field/missing.py | 32 +++++++------ llmint/map/transform/field/rename.py | 37 ++++++++------- llmint/map/transform/value/apply.py | 39 +++++++-------- llmint/map/transform/value/gen.py | 44 ++++++++--------- llmint/map/transform/value/link.py | 47 +++++++++---------- llmint/map/transform/value/scale.py | 42 ++++++++--------- llmint/map/transform/value/shift.py | 42 ++++++++--------- 28 files changed, 282 insertions(+), 304 deletions(-) delete mode 100644 llmint/assemble/pandas/transform/field/cast.py delete mode 100644 llmint/assemble/pandas/transform/field/delete.py delete mode 100644 llmint/assemble/pandas/transform/field/rename.py delete mode 100644 llmint/assemble/pandas/transform/value/link.py diff --git a/examples/tasks/assemble/assemble.py b/examples/tasks/assemble/assemble.py index ba84c21..edbd156 100644 --- a/examples/tasks/assemble/assemble.py +++ b/examples/tasks/assemble/assemble.py @@ -1,7 +1,7 @@ import pandas as pd import llmint -from llmint.assemble.pandas import assemble +from llmint.assemble.pandas import assemble, construct def main(): @@ -28,13 +28,16 @@ def main(): source_df = pd.DataFrame([{"Fname": "Josh", "Lname": "Doe", "Age": 31, "Email": "joshdoe@example.com"}]) dest_df = pd.DataFrame([{"name": "Jane Doe", "age": 27, "email": "janedoe@example.com"}]) + print("Concat the source dataframe to the dest dataframe:") print("Source:", source_df, sep="\n") print("Dest:", dest_df, sep="\n") - mappings = llmint.map(source_df, dest_df) - output = assemble(source_df, mappings) + mappings = llmint.map(source_schema, target_schema) + assembly = assemble(mappings) + output = construct(source_df, assembly) + combined_df = pd.concat([dest_df, output], axis=0) - print("Combined:", combined_df, sep="\n") + print("\nCombined:", combined_df, sep="\n") if __name__ == "__main__": diff --git a/llmint/assemble/pandas/__init__.py b/llmint/assemble/pandas/__init__.py index e3600db..9cfa181 100644 --- a/llmint/assemble/pandas/__init__.py +++ b/llmint/assemble/pandas/__init__.py @@ -1 +1 @@ -from llmint.assemble.pandas.function import assemble +from llmint.assemble.pandas.function import assemble, construct diff --git a/llmint/assemble/pandas/function.py b/llmint/assemble/pandas/function.py index fddbc2f..b9f2be2 100644 --- a/llmint/assemble/pandas/function.py +++ b/llmint/assemble/pandas/function.py @@ -1,36 +1,39 @@ import pandas as pd +from typing import List, Callable from llmint.assemble.pandas.transform import ( - add, cast, copy, default, delete, missing, rename, apply, link, scale, shift + add, copy, default, missing, apply, scale, shift ) from llmint.map.function import Map -def assemble(df: pd.DataFrame, mappings: list[Map]): - df_outputs = [] + +def assemble(mappings: list[Map]): + output = [] for mapping in mappings: match mapping.transformation.split(' ')[0]: case 'ADD': - df_outputs.append(add(df, mapping)) - case 'CAST': - df_outputs.append(cast(df, mapping)) + output.append(add(mapping)) case 'COPY': - df_outputs.append(copy(df, mapping)) + output.append(copy(mapping)) case 'DEFAULT': - df_outputs.append(default(df, mapping)) - case 'DELETE': - df_outputs.append(delete(df, mapping)) + output.append(default(mapping)) case 'MISSING': - df_outputs.append(missing(df, mapping)) - case 'RENAME': - df_outputs.append(rename(df, mapping)) + output.append(missing(mapping)) case 'APPLY': - df_outputs.append(apply(df, mapping)) - case 'LINK': - df_outputs.append(link(df, mapping)) + output.append(apply(mapping)) case 'SCALE': - df_outputs.append(scale(df, mapping)) + output.append(scale(mapping)) case 'SHIFT': - df_outputs.append(shift(df, mapping)) + output.append(shift(mapping)) - return pd.concat(df_outputs, axis=1) + return output + + +def construct(df: pd.DataFrame, assembly: List[Callable[[pd.DataFrame], pd.Series]]): + df_output = [] + + for func in assembly: + df_output.append(func(df)) + + return pd.concat(df_output, axis=1) diff --git a/llmint/assemble/pandas/transform/__init__.py b/llmint/assemble/pandas/transform/__init__.py index f668db7..0c10cf9 100644 --- a/llmint/assemble/pandas/transform/__init__.py +++ b/llmint/assemble/pandas/transform/__init__.py @@ -1,12 +1,8 @@ from llmint.assemble.pandas.transform.field.add import func as add -from llmint.assemble.pandas.transform.field.cast import func as cast from llmint.assemble.pandas.transform.field.copy import func as copy from llmint.assemble.pandas.transform.field.default import func as default -from llmint.assemble.pandas.transform.field.delete import func as delete from llmint.assemble.pandas.transform.field.missing import func as missing -from llmint.assemble.pandas.transform.field.rename import func as rename from llmint.assemble.pandas.transform.value.apply import func as apply -from llmint.assemble.pandas.transform.value.link import func as link from llmint.assemble.pandas.transform.value.scale import func as scale from llmint.assemble.pandas.transform.value.shift import func as shift diff --git a/llmint/assemble/pandas/transform/field/add.py b/llmint/assemble/pandas/transform/field/add.py index a7843b2..398f0a9 100644 --- a/llmint/assemble/pandas/transform/field/add.py +++ b/llmint/assemble/pandas/transform/field/add.py @@ -1,10 +1,10 @@ import re -from pandas import Series, DataFrame +from pandas import Series from llmint.map.function import Map -def func(df: DataFrame, mapping: Map): +def func(mapping: Map): col_type = re.search(r'TYPE (\w+)', mapping.transformation).group(1) - return Series([], name=mapping.target_field, dtype=col_type) + return lambda df: Series([], name=mapping.target_field, dtype=col_type) diff --git a/llmint/assemble/pandas/transform/field/cast.py b/llmint/assemble/pandas/transform/field/cast.py deleted file mode 100644 index 2217de4..0000000 --- a/llmint/assemble/pandas/transform/field/cast.py +++ /dev/null @@ -1,7 +0,0 @@ -from pandas import Series, DataFrame - -from llmint.map.function import Map - - -def func(df: DataFrame, mapping: Map): - pass diff --git a/llmint/assemble/pandas/transform/field/copy.py b/llmint/assemble/pandas/transform/field/copy.py index a78009c..b47042c 100644 --- a/llmint/assemble/pandas/transform/field/copy.py +++ b/llmint/assemble/pandas/transform/field/copy.py @@ -1,8 +1,7 @@ -import re -from pandas import Series, DataFrame +from pandas import Series from llmint.map.function import Map -def func(df: DataFrame, mapping: Map): - return Series(df[mapping.source_field], name=mapping.target_field) +def func(mapping: Map): + return lambda df: Series(df[mapping.source_field], name=mapping.target_field) diff --git a/llmint/assemble/pandas/transform/field/default.py b/llmint/assemble/pandas/transform/field/default.py index ed0d2e0..cdc540a 100644 --- a/llmint/assemble/pandas/transform/field/default.py +++ b/llmint/assemble/pandas/transform/field/default.py @@ -1,10 +1,10 @@ import re -from pandas import Series, DataFrame +from pandas import Series from llmint.map.function import Map -def func(df: DataFrame, mapping: Map): +def func(mapping: Map): default_val = re.search(r'DEFAULT TO (.*)', mapping.transformation).group(1) - return Series([default_val] * len(df), name=mapping.target_field) + return lambda df: Series([default_val] * len(df), name=mapping.target_field) diff --git a/llmint/assemble/pandas/transform/field/delete.py b/llmint/assemble/pandas/transform/field/delete.py deleted file mode 100644 index 2217de4..0000000 --- a/llmint/assemble/pandas/transform/field/delete.py +++ /dev/null @@ -1,7 +0,0 @@ -from pandas import Series, DataFrame - -from llmint.map.function import Map - - -def func(df: DataFrame, mapping: Map): - pass diff --git a/llmint/assemble/pandas/transform/field/missing.py b/llmint/assemble/pandas/transform/field/missing.py index 2217de4..9c25dfb 100644 --- a/llmint/assemble/pandas/transform/field/missing.py +++ b/llmint/assemble/pandas/transform/field/missing.py @@ -1,7 +1,5 @@ -from pandas import Series, DataFrame - from llmint.map.function import Map -def func(df: DataFrame, mapping: Map): - pass +def func(mapping: Map): + return lambda df: print(f"WARNING: {mapping.target_field} field cannot be automatically converted.") diff --git a/llmint/assemble/pandas/transform/field/rename.py b/llmint/assemble/pandas/transform/field/rename.py deleted file mode 100644 index 2217de4..0000000 --- a/llmint/assemble/pandas/transform/field/rename.py +++ /dev/null @@ -1,7 +0,0 @@ -from pandas import Series, DataFrame - -from llmint.map.function import Map - - -def func(df: DataFrame, mapping: Map): - pass diff --git a/llmint/assemble/pandas/transform/value/apply.py b/llmint/assemble/pandas/transform/value/apply.py index f831fab..fb370e1 100644 --- a/llmint/assemble/pandas/transform/value/apply.py +++ b/llmint/assemble/pandas/transform/value/apply.py @@ -4,13 +4,16 @@ from llmint.map.function import Map -def func(df: DataFrame, mapping: Map): +def func(mapping: Map): apply_func = re.search(r'APPLY (.*)', mapping.transformation).group(1) - # assign all columns to their own variables - for col in df.columns: - exec(f'{col.replace(" ", "_")} = df[col]', locals(), globals()) + def apply(df: DataFrame): + # assign all columns to their own variables + for col in df.columns: + exec(f'{col.replace(" ", "_")} = df[col]', locals(), globals()) - exec(f'_output = {apply_func}', locals(), globals()) + exec(f'_output = {apply_func}', locals(), globals()) + + return Series(_output, name=mapping.target_field) - return Series(_output, name=mapping.target_field) + return apply diff --git a/llmint/assemble/pandas/transform/value/link.py b/llmint/assemble/pandas/transform/value/link.py deleted file mode 100644 index 2217de4..0000000 --- a/llmint/assemble/pandas/transform/value/link.py +++ /dev/null @@ -1,7 +0,0 @@ -from pandas import Series, DataFrame - -from llmint.map.function import Map - - -def func(df: DataFrame, mapping: Map): - pass diff --git a/llmint/assemble/pandas/transform/value/scale.py b/llmint/assemble/pandas/transform/value/scale.py index 72cfa3a..845d5f9 100644 --- a/llmint/assemble/pandas/transform/value/scale.py +++ b/llmint/assemble/pandas/transform/value/scale.py @@ -1,13 +1,12 @@ import re -from pandas import Series, DataFrame from llmint.map.function import Map -def func(df: DataFrame, mapping: Map): +def func(mapping: Map): try: scale = float(re.search(r'SCALE BY (\d*.\d*)', mapping.transformation).group(1)) except ValueError: - return df[mapping.source_field].copy() + return lambda df: df[mapping.source_field].copy() - return df[mapping.source_field] * scale + return lambda df: df[mapping.source_field] * scale diff --git a/llmint/assemble/pandas/transform/value/shift.py b/llmint/assemble/pandas/transform/value/shift.py index abd1a3f..d5310c1 100644 --- a/llmint/assemble/pandas/transform/value/shift.py +++ b/llmint/assemble/pandas/transform/value/shift.py @@ -1,13 +1,12 @@ import re -from pandas import Series, DataFrame from llmint.map.function import Map -def func(df: DataFrame, mapping: Map): +def func(mapping: Map): try: shift = float(re.search(r'SHIFT BY (\d*.\d*)', mapping.transformation).group(1)) except ValueError: - return df[mapping.source_field].copy() + return lambda df: df[mapping.source_field].copy() - return df[mapping.source_field] + shift + return lambda df: df[mapping.source_field] + shift diff --git a/llmint/map/parameter.py b/llmint/map/parameter.py index fa3b1ec..e8bbd7f 100644 --- a/llmint/map/parameter.py +++ b/llmint/map/parameter.py @@ -7,16 +7,18 @@ "llmint.map.match", # field transformation "llmint.map.transform.field.add", - "llmint.map.transform.field.cast", + # "llmint.map.transform.field.cast", "llmint.map.transform.field.copy", "llmint.map.transform.field.default", - "llmint.map.transform.field.delete", - "llmint.map.transform.field.rename", + # "llmint.map.transform.field.delete", + # "llmint.map.transform.field.rename", "llmint.map.transform.field.missing", # value transformation "llmint.map.transform.value.apply", # "llmint.map.transform.value.gen", - "llmint.map.transform.value.link", + # "llmint.map.transform.value.link", "llmint.map.transform.value.scale", "llmint.map.transform.value.shift", ] + +reasoning = False diff --git a/llmint/map/prompt.py b/llmint/map/prompt.py index 81c70d6..b429e7c 100644 --- a/llmint/map/prompt.py +++ b/llmint/map/prompt.py @@ -69,3 +69,6 @@ user = "Source Schema: ' + {source_schema} + " \ "'\nTarget Schema: ' + {target_schema}" + +"""Reasoning prompt""" +reasoning_prompt = "In-depth reasoning as to why you chose this function" diff --git a/llmint/map/transform/field/add.py b/llmint/map/transform/field/add.py index 3dced81..f390ac2 100644 --- a/llmint/map/transform/field/add.py +++ b/llmint/map/transform/field/add.py @@ -1,35 +1,36 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "ADD" +description = "Add an optional target field" +properties = { + "target_field": (str, "Optional field in the target schema"), + "field_type": (str, "The type of the field to be added"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Add an optional target field", + "description": description, "parameters": { "type": "object", - "properties": { - "target_field": { - "type": "string", - "description": "Optional field in the target schema", - }, - "field_type": { - "type": "string", - "description": "The type of the field to be added", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["target_field", "field_type", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(target_field, field_type, reasoning): +def func(target_field, field_type, reasoning=None): return Map(source_field=None, target_field=target_field, transformation=f'ADD {target_field} TYPE {field_type}', diff --git a/llmint/map/transform/field/copy.py b/llmint/map/transform/field/copy.py index 7ba4764..1a7cef9 100644 --- a/llmint/map/transform/field/copy.py +++ b/llmint/map/transform/field/copy.py @@ -1,36 +1,37 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "COPY" +description = "Directly copies data from the source field to the target field without any transformation." +properties = { + "source_field": (str, "Field in the source schema"), + "target_field": (str, "Field in the target schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Directly copies data from the source field " - "to the target field without any transformation..", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field in the source schema", - }, - "target_field": { - "type": "string", - "description": "Field in the target schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["target_field", "field_type", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, reasoning): + +def func(source_field, target_field, reasoning=None): return Map(source_field=source_field, target_field=target_field, transformation=f'COPY', diff --git a/llmint/map/transform/field/default.py b/llmint/map/transform/field/default.py index 65035ce..79c521f 100644 --- a/llmint/map/transform/field/default.py +++ b/llmint/map/transform/field/default.py @@ -1,39 +1,37 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "DEFAULT" +description = "Set the default of a target field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "default_value": (str, "Default value of the target field"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Set the default of a target field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "default_value": { - "type": "string", - "description": "Default value of the target field", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "default_value", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, default_value, reasoning): +def func(source_field, target_field, default_value, reasoning=None): return Map(source_field=source_field, target_field=target_field, transformation=f'DEFAULT TO {default_value}', diff --git a/llmint/map/transform/field/delete.py b/llmint/map/transform/field/delete.py index 45146a4..3b89849 100644 --- a/llmint/map/transform/field/delete.py +++ b/llmint/map/transform/field/delete.py @@ -1,31 +1,35 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "DELETE" +description = "Delete a source field" +properties = { + "source_field": (str, "Field from the source schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Delete a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, reasoning): +def func(source_field, reasoning=None): return Map(source_field=source_field, target_field=None, transformation=f'DELETE', diff --git a/llmint/map/transform/field/missing.py b/llmint/map/transform/field/missing.py index ab43f17..ce5e11d 100644 --- a/llmint/map/transform/field/missing.py +++ b/llmint/map/transform/field/missing.py @@ -1,31 +1,35 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "MISSING" +description = "Indicates that the required target field is impossible to construct from the fields in the source schema" +properties = { + "target_field": (str, "Field from the target schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Indicates that the required target field is impossible to construct from the fields in the source schema", + "description": description, "parameters": { "type": "object", - "properties": { - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["target_field", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(target_field, reasoning): +def func(target_field, reasoning=None): return Map(source_field=None, target_field=target_field, transformation=f'MISSING', diff --git a/llmint/map/transform/field/rename.py b/llmint/map/transform/field/rename.py index 18ee733..eccacd1 100644 --- a/llmint/map/transform/field/rename.py +++ b/llmint/map/transform/field/rename.py @@ -1,35 +1,36 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "RENAME" +description = "Rename a source field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Rename a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, reasoning): +def func(source_field, target_field, reasoning=None): return Map(source_field=source_field, target_field=target_field, transformation=f'RENAME TO {target_field}', diff --git a/llmint/map/transform/value/apply.py b/llmint/map/transform/value/apply.py index e8ffd14..8b9ccab 100644 --- a/llmint/map/transform/value/apply.py +++ b/llmint/map/transform/value/apply.py @@ -1,36 +1,37 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "APPLY" +description = "Apply a function to the values of a source field" +properties = { + "target_field": (str, "Field from the target schema"), + "function": (str, "An expression involving source schema field(s) to apply, " + "replace any spaces in the schema fields with underscores"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Apply a function to the values of a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "function": { - "type": "string", - "description": "An expression involving source schema field(s) to apply, " - "replace any spaces in the schema fields with underscores", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["target_field", "function", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(target_field, function, reasoning): +def func(target_field, function, reasoning=None): return Map(source_field=None, target_field=target_field, transformation=f'APPLY {function}', diff --git a/llmint/map/transform/value/gen.py b/llmint/map/transform/value/gen.py index fcfac22..5e3456a 100644 --- a/llmint/map/transform/value/gen.py +++ b/llmint/map/transform/value/gen.py @@ -1,31 +1,31 @@ +from libem.core.util import create_json_schema + +from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt + name = "GEN" +description = "Describes the equation needed to convert from source to target values" +properties = { + "source_field": (str, "Field from the target schema"), + "target_field": (str, "Field from the target schema"), + "conversion_equation": (str, "Mathematical equation used in conversion. " + "Let x be the source value and y be the target value."), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Describes the equation needed to convert from source to target values", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the target schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "conversion_equation": { - "type": "string", - "description": "Mathematical equation used in conversion. " - "Let x be the source value and y be the target value.", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "conversion_equation", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } diff --git a/llmint/map/transform/value/link.py b/llmint/map/transform/value/link.py index b00f0e7..9179351 100644 --- a/llmint/map/transform/value/link.py +++ b/llmint/map/transform/value/link.py @@ -1,42 +1,37 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "LINK" +description = "Create a mapping between a value in the source field to a value in the target field, usually for enum type values" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "old_value": (str, "Source field value"), + "new_value": (str, "Target field value"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Create a mapping between a value in the source field to a value in the target field, usually for enum type values", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "old_value": { - "type": "string", - "description": "Source field value", - }, - "new_value": { - "type": "string", - "description": "Target field value", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "old_value", "new_value", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, old_value, new_value, reasoning): +def func(source_field, target_field, old_value, new_value, reasoning=None): return Map(source_field=source_field, target_field=target_field, transformation=f'LINK {source_field} "{old_value}" TO "{new_value}"', diff --git a/llmint/map/transform/value/scale.py b/llmint/map/transform/value/scale.py index f90f546..1a2f767 100644 --- a/llmint/map/transform/value/scale.py +++ b/llmint/map/transform/value/scale.py @@ -1,39 +1,37 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "SCALE" +description = "Scale the value of a source field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "factor": (str, "Factor to multiply the source field by"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Scale the value of a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "factor": { - "type": "string", - "description": "Factor to multiply the source field by", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "factor", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, factor, reasoning): +def func(source_field, target_field, factor, reasoning=None): return Map(source_field=source_field, target_field=target_field, transformation=f'SCALE BY {factor}', diff --git a/llmint/map/transform/value/shift.py b/llmint/map/transform/value/shift.py index da62dd0..04d9445 100644 --- a/llmint/map/transform/value/shift.py +++ b/llmint/map/transform/value/shift.py @@ -1,39 +1,37 @@ +from libem.core.util import create_json_schema + from llmint.map.function import Map +from llmint.map.parameter import reasoning +from llmint.map.prompt import reasoning_prompt name = "SHIFT" +description = "Shift the value of a source field" +properties = { + "source_field": (str, "Field from the source schema"), + "target_field": (str, "Field from the target schema"), + "value": (str, "Value to shift the source field by"), +} +if reasoning: + properties["reasoning"] = (str, reasoning_prompt) + schema = { "type": "function", "function": { "name": name, - "description": "Shift the value of a source field", + "description": description, "parameters": { "type": "object", - "properties": { - "source_field": { - "type": "string", - "description": "Field from the source schema", - }, - "target_field": { - "type": "string", - "description": "Field from the target schema", - }, - "value": { - "type": "string", - "description": "Value to shift the source field by", - }, - "reasoning": { - "type": "string", - "description": "In-depth reasoning as to why you chose this function", - }, - }, - "required": ["source_field", "target_field", "value", "reasoning"], - }, + "properties": create_json_schema( + **properties + )["properties"], + "required": list(properties.keys()), + } } } -def func(source_field, target_field, value, reasoning): +def func(source_field, target_field, value, reasoning=None): return Map(source_field=source_field, target_field=target_field, transformation=f'SHIFT BY {value}',