From 56934aca424898d950db2cb0c13b7dfff012f15b Mon Sep 17 00:00:00 2001 From: psychlone77 Date: Thu, 19 Mar 2026 02:59:46 +0530 Subject: [PATCH 1/9] Update parser.py to use relevant code of pglast 7.5; Add gitignore and requirements.txt. --- .gitignore | 3 + requirements.txt | 19 ++++ src/parser.py | 233 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 184 insertions(+), 71 deletions(-) create mode 100644 .gitignore create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0061816 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +config/database.ini +__pycache__/ +.venv/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..aadff3f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,19 @@ +contourpy==1.3.3 +cplex==22.1.2.1 +cycler==0.12.1 +docplex==2.32.259 +fonttools==4.62.1 +hashable-list==0.2.0 +kiwisolver==1.5.0 +matplotlib==3.10.8 +Mosek==11.1.10 +numpy==2.4.3 +ordered-set==4.1.0 +packaging==26.0 +pglast==7.5 +pillow==12.1.1 +psycopg2-binary==2.9.11 +pyparsing==3.3.2 +python-dateutil==2.9.0.post0 +setuptools==82.0.1 +six==1.17.0 diff --git a/src/parser.py b/src/parser.py index 069171c..1de608d 100644 --- a/src/parser.py +++ b/src/parser.py @@ -28,7 +28,7 @@ def iterate(self, node): if isinstance(node, (tuple, ast.Node)): todo.append((Ancestor(), node)) else: - raise ValueError('Bad argument, expected a ast.Node instance or a tuple') + raise ValueError("Bad argument, expected a ast.Node instance or a tuple") while todo: ancestors, node = todo.popleft() @@ -93,8 +93,8 @@ def iterate(self, node): def visit_JoinExpr(self, ancestors, node): """ - we keep all the table name including renaming, and all the predicate - in the join condition + we keep all the table name including renaming, and all the predicate + in the join condition """ idx = 0 # left is table name @@ -137,12 +137,16 @@ def visit_SelectStmt(self, ancestors, node): if len(self.qual) == 1: node.whereClause = self.qual[0] elif len(self.qual) > 1: - node.whereClause = ast.BoolExpr(boolop=enums.BoolExprType.AND_EXPR, args=()) + node.whereClause = ast.BoolExpr( + boolop=enums.BoolExprType.AND_EXPR, args=() + ) for item in self.qual: node.whereClause.args += (item,) else: temp = node.whereClause - node.whereClause = ast.BoolExpr(boolop=enums.BoolExprType.AND_EXPR, args=(temp,)) + node.whereClause = ast.BoolExpr( + boolop=enums.BoolExprType.AND_EXPR, args=(temp,) + ) for item in self.qual: node.whereClause.args += (item,) @@ -162,7 +166,11 @@ def visit_ResTarget(self, ancestors, node): # all the written code does not have aggregation # may be revisited for max later if isinstance(node.val, ast.FuncCall): - if node.val.funcname[0].val == 'sum' or node.val.funcname[0].val == 'count' or node.val.funcname[0].val == 'max': + if ( + node.val.funcname[0].sval == "sum" + or node.val.funcname[0].sval == "count" + or node.val.funcname[0].sval == "max" + ): return visitors.Delete def visit_SelectStmt(self, ancestors, node): @@ -174,14 +182,16 @@ def visit_SelectStmt(self, ancestors, node): for item in node.targetList: if isinstance(item.val, ast.FuncCall): if len(item.val.funcname) == 1: - if item.val.funcname[0].val == 'count': - node.targetList += (ast.ResTarget(val=ast.A_Const(val=ast.Integer(1))),) - elif item.val.funcname[0].val == 'sum': + if item.val.funcname[0].sval == "count": + node.targetList += ( + ast.ResTarget(val=ast.A_Const(val=ast.Integer(1))), + ) + elif item.val.funcname[0].sval == "sum": temp = item.val.args[0] node.targetList += (ast.ResTarget(val=temp),) - elif item.val.funcname[0].val == 'max': + elif item.val.funcname[0].sval == "max": temp = item.val.args[0] - self.index = item.val.args[1].val.val + self.index = item.val.args[1].val.ival node.targetList += (ast.ResTarget(val=temp),) else: pass @@ -199,7 +209,7 @@ def get_primary_keys(pks, relations): if pk[0] in relation: left = pk[2].find("(") right = pk[2].find(")") - key = pk[2][left + 1:right] + key = pk[2][left + 1 : right] res.append(pk[0] + "." + key) return res @@ -225,11 +235,17 @@ def visit_SelectStmt(self, ancestors, node): for r in self.keys: table_attribute = r.split(".") for name in renaming[table_attribute[0]]: - rename = 'id' + str(idx) - table = ast.String(value=rename) + rename = "id" + str(idx) + table = ast.String(sval=rename) attri = ast.ColumnRef(fields=(name, table_attribute[1])) - node.targetList += (ast.ResTarget(val=ast.FuncCall(funcname=(ast.String(value='concat'),), - args=(table, attri)), name=rename),) + node.targetList += ( + ast.ResTarget( + val=ast.FuncCall( + funcname=(ast.String(sval="concat"),), args=(table, attri) + ), + name=rename, + ), + ) # node.targetList += (ast.ResTarget(val=ast.ColumnRef(fields=(name, # table_attribute[1])), name=rename),) idx += 1 @@ -266,10 +282,10 @@ def __init__(self, fks): split = fk[2].split("REFERENCES ") left1 = split[0].find("(") right1 = split[0].find(")") - src_key = split[0][left1 + 1:right1] + src_key = split[0][left1 + 1 : right1] left2 = split[1].find("(") right2 = split[1].find(")") - dest_key = split[1][left2 + 1:right2] + dest_key = split[1][left2 + 1 : right2] dest_table = split[1][0:left2] self.fk_dic[(src_table, src_key)] = (dest_table, dest_key) @@ -278,32 +294,53 @@ def check_condition(self, node, src_rename, dest_rename, src_key, dest_key): # for each dest table, there are four cases for foreign key condition for dest in dest_rename: # r1.k1 = r2.k2 - c1 = ast.A_Expr(kind=enums.A_Expr_Kind.AEXPR_OP, name=(ast.String(value="="),), - lexpr=ast.ColumnRef( - fields=(ast.String(value=src_rename), ast.String(value=src_key[0].strip()))), - rexpr=ast.ColumnRef( - fields=(ast.String(value=dest), ast.String(value=dest_key[0].strip())))) + c1 = ast.A_Expr( + kind=enums.A_Expr_Kind.AEXPR_OP, + name=(ast.String(sval="="),), + lexpr=ast.ColumnRef( + fields=( + ast.String(sval=src_rename), + ast.String(sval=src_key[0].strip()), + ) + ), + rexpr=ast.ColumnRef( + fields=(ast.String(sval=dest), ast.String(sval=dest_key[0].strip())) + ), + ) # r2.k2 = r1.k1 - c2 = ast.A_Expr(kind=enums.A_Expr_Kind.AEXPR_OP, name=(ast.String(value="="),), - lexpr=ast.ColumnRef( - fields=(ast.String(value=dest), ast.String(value=dest_key[0].strip()),)), - rexpr=ast.ColumnRef( - fields=(ast.String(value=src_rename), ast.String(value=src_key[0].strip()),))) + c2 = ast.A_Expr( + kind=enums.A_Expr_Kind.AEXPR_OP, + name=(ast.String(sval="="),), + lexpr=ast.ColumnRef( + fields=( + ast.String(sval=dest), + ast.String(sval=dest_key[0].strip()), + ) + ), + rexpr=ast.ColumnRef( + fields=( + ast.String(sval=src_rename), + ast.String(sval=src_key[0].strip()), + ) + ), + ) # k1 = k2 - c3 = ast.A_Expr(kind=enums.A_Expr_Kind.AEXPR_OP, name=(ast.String(value="="),), - lexpr=ast.ColumnRef( - fields=(ast.String(value=src_key[0].strip()),)), - rexpr=ast.ColumnRef( - fields=(ast.String(value=dest_key[0].strip()),))) + c3 = ast.A_Expr( + kind=enums.A_Expr_Kind.AEXPR_OP, + name=(ast.String(sval="="),), + lexpr=ast.ColumnRef(fields=(ast.String(sval=src_key[0].strip()),)), + rexpr=ast.ColumnRef(fields=(ast.String(sval=dest_key[0].strip()),)), + ) # k2 = k1 - c4 = ast.A_Expr(kind=enums.A_Expr_Kind.AEXPR_OP, name=(ast.String(value="="),), - lexpr=ast.ColumnRef( - fields=(ast.String(value=dest_key[0].strip()),)), - rexpr=ast.ColumnRef( - fields=(ast.String(value=src_key[0].strip()),))) + c4 = ast.A_Expr( + kind=enums.A_Expr_Kind.AEXPR_OP, + name=(ast.String(sval="="),), + lexpr=ast.ColumnRef(fields=(ast.String(sval=dest_key[0].strip()),)), + rexpr=ast.ColumnRef(fields=(ast.String(sval=src_key[0].strip()),)), + ) r1 = check_expr(c1) r1(node.whereClause) @@ -334,11 +371,18 @@ def visit_SelectStmt(self, ancestors, node): for fk in self.fk_dic.keys(): # if src table is in the current query, and destination table is not in # we add the destination table into the query, and the join condition - if fk[0] in relation_dict.keys() and self.fk_dic[fk][0] not in relation_dict.keys(): + if ( + fk[0] in relation_dict.keys() + and self.fk_dic[fk][0] not in relation_dict.keys() + ): # renaming the upcoming table rename = self.fk_dic[fk][0] + str(0) # syntax node for this table - item = ast.RangeVar(relname=self.fk_dic[fk][0], inh=True, alias=ast.Alias(aliasname=rename)) + item = ast.RangeVar( + relname=self.fk_dic[fk][0], + inh=True, + alias=ast.Alias(aliasname=rename), + ) # add to the select statement node.fromClause += (item,) # update renaming @@ -348,11 +392,22 @@ def visit_SelectStmt(self, ancestors, node): src_key = fk[1].split(",") dest_key = self.fk_dic[fk][1].split(",") for i in range(len(src_key)): - c = ast.A_Expr(kind=enums.A_Expr_Kind.AEXPR_OP, name=(ast.String(value="="),), - lexpr=ast.ColumnRef(fields=(ast.String(value=relation_dict[fk[0]][0]), - ast.String(value=src_key[i].strip()))), - rexpr=ast.ColumnRef( - fields=(ast.String(value=rename), ast.String(value=dest_key[i].strip())))) + c = ast.A_Expr( + kind=enums.A_Expr_Kind.AEXPR_OP, + name=(ast.String(sval="="),), + lexpr=ast.ColumnRef( + fields=( + ast.String(sval=relation_dict[fk[0]][0]), + ast.String(sval=src_key[i].strip()), + ) + ), + rexpr=ast.ColumnRef( + fields=( + ast.String(sval=rename), + ast.String(sval=dest_key[i].strip()), + ) + ), + ) # print(stream.RawStream()(c)) conditions += (c,) # add the join condition to the select statement @@ -360,18 +415,25 @@ def visit_SelectStmt(self, ancestors, node): if len(conditions) == 1: node.whereClause = conditions[0] elif len(conditions) > 1: - node.whereClause = ast.BoolExpr(boolop=enums.BoolExprType.AND_EXPR, args=()) + node.whereClause = ast.BoolExpr( + boolop=enums.BoolExprType.AND_EXPR, args=() + ) for condition in conditions: node.whereClause.args += (condition,) else: temp = node.whereClause - node.whereClause = ast.BoolExpr(boolop=enums.BoolExprType.AND_EXPR, args=(temp,)) + node.whereClause = ast.BoolExpr( + boolop=enums.BoolExprType.AND_EXPR, args=(temp,) + ) for condition in conditions: node.whereClause.args += (condition,) visited = True # if both src and dest tables are in the query # we check if the foreign key condition is in the whereClause - if fk[0] in relation_dict.keys() and self.fk_dic[fk][0] in relation_dict.keys(): + if ( + fk[0] in relation_dict.keys() + and self.fk_dic[fk][0] in relation_dict.keys() + ): src_rename = relation_dict[fk[0]] dest_rename = relation_dict[self.fk_dic[fk][0]] src_key = fk[1].split(",") @@ -379,34 +441,57 @@ def visit_SelectStmt(self, ancestors, node): # for each renaming of the src table # we check there is at least one dest table renaming connecting to this src table for src in src_rename: - if not (self.check_condition(node, src, dest_rename, src_key, dest_key)): + if not ( + self.check_condition( + node, src, dest_rename, src_key, dest_key + ) + ): # add another destination table renaming - rename = self.fk_dic[fk][0] + str(len(relation_dict[self.fk_dic[fk][0]])) + rename = self.fk_dic[fk][0] + str( + len(relation_dict[self.fk_dic[fk][0]]) + ) relation_dict[self.fk_dic[fk][0]].append(rename) - item = ast.RangeVar(relname=self.fk_dic[fk][0], inh=True, alias=ast.Alias(aliasname=rename)) + item = ast.RangeVar( + relname=self.fk_dic[fk][0], + inh=True, + alias=ast.Alias(aliasname=rename), + ) # add to the select statement node.fromClause += (item,) conditions = () for i in range(len(src_key)): - c = ast.A_Expr(kind=enums.A_Expr_Kind.AEXPR_OP, name=(ast.String(value="="),), - lexpr=ast.ColumnRef( - fields=(ast.String(value=src), - ast.String(value=src_key[i].strip()))), - rexpr=ast.ColumnRef( - fields=(ast.String(value=rename), - ast.String(value=dest_key[i].strip())))) + c = ast.A_Expr( + kind=enums.A_Expr_Kind.AEXPR_OP, + name=(ast.String(sval="="),), + lexpr=ast.ColumnRef( + fields=( + ast.String(sval=src), + ast.String(sval=src_key[i].strip()), + ) + ), + rexpr=ast.ColumnRef( + fields=( + ast.String(sval=rename), + ast.String(sval=dest_key[i].strip()), + ) + ), + ) conditions += (c,) # add the join condition to the select statement if node.whereClause is None: if len(conditions) == 1: node.whereClause = conditions elif len(conditions) > 1: - node.whereClause = ast.BoolExpr(boolop=enums.BoolExprType.AND_EXPR, args=()) + node.whereClause = ast.BoolExpr( + boolop=enums.BoolExprType.AND_EXPR, args=() + ) for condition in conditions: node.whereClause.args += (condition,) else: temp = node.whereClause - node.whereClause = ast.BoolExpr(boolop=enums.BoolExprType.AND_EXPR, args=(temp,)) + node.whereClause = ast.BoolExpr( + boolop=enums.BoolExprType.AND_EXPR, args=(temp,) + ) for condition in conditions: node.whereClause.args += (condition,) @@ -414,9 +499,10 @@ def visit_SelectStmt(self, ancestors, node): class add_table_name(visitors.Visitor): - ''' + """ add the table either renaming or not to the column - ''' + """ + def __init__(self, select_node, schema): # get the renaming of the current node renaming = get_rename() @@ -428,20 +514,22 @@ def visit_ColumnRef(self, ancestors, node): # get fields fields = node.fields if len(fields) == 1: - attribute = fields[0].val + attribute = fields[0].sval for table in self.rename_dict.keys(): table_attr = self.schema[table] if attribute in table_attr: # self.rename_dict[table] length should be 1, otherwise this # is an ambiguous column - node.fields = (ast.String(value=self.rename_dict[table][0]),) + node.fields + node.fields = ( + ast.String(sval=self.rename_dict[table][0]), + ) + node.fields class get_rename(visitors.Visitor): - ''' + """ get all the current renaming in tis select statement this is mainly a helper visitor for other functional visitor - ''' + """ def __init__(self): self.rename_dict = {} @@ -478,7 +566,9 @@ def visit_SelectStmt(self, ancestors, node): # remove the group column to the selection (targetList) self.root = node self.group = node.groupClause - new_selection = ast.FuncCall(funcname=(ast.String(value='concat'),), args=self.group) + new_selection = ast.FuncCall( + funcname=(ast.String(sval="concat"),), args=self.group + ) node.targetList = (ast.ResTarget(val=new_selection),) + node.targetList node.groupClause = None @@ -498,6 +588,7 @@ class check_type(visitors.Visitor): this visitor will check the type of the input query and then decide which algorithm to process the input query """ + def __init__(self, relations): self.subquery = False self.groupby = False @@ -512,7 +603,7 @@ def visit_SelectStmt(self, ancestors, node): for item in node.targetList: if isinstance(item.val, ast.FuncCall): if len(item.val.funcname) == 1: - if item.val.funcname[0].val == 'max': + if item.val.funcname[0].sval == "max": self.max = True else: # if there is a subquery, we have to get implicit join first @@ -522,7 +613,7 @@ def visit_SelectStmt(self, ancestors, node): for item in node.targetList: if isinstance(item.val, ast.FuncCall): if len(item.val.funcname) == 1: - if item.val.funcname[0].val == 'max': + if item.val.funcname[0].sval == "max": self.max = True # check l renaming = get_rename() @@ -541,5 +632,5 @@ def visit_SelectStmt(self, ancestors, node): self.groupby = True -if __name__ == '__main__': - pass \ No newline at end of file +if __name__ == "__main__": + pass From 7d8103eaf3585fab9ff46d10593ca8458301e69f Mon Sep 17 00:00:00 2001 From: psychlone77 Date: Thu, 19 Mar 2026 03:00:40 +0530 Subject: [PATCH 2/9] Revise README.md; add CPLEX setup guide to documentation --- README.md | 174 ++++++++++++++++++-------------------------- docs/cplex_setup.md | 61 ++++++++++++++++ 2 files changed, 133 insertions(+), 102 deletions(-) create mode 100644 docs/cplex_setup.md diff --git a/README.md b/README.md index a1fdc78..1e1347a 100644 --- a/README.md +++ b/README.md @@ -1,119 +1,89 @@ -# Differential Privacy over SQL - -## Table of Contents -* [About the Project](#about-the-project) -* [Prerequisites](#prerequisites) - * [Tools](#tools) - * [Python Dependency](#python-dependency) - * [Database Permission](#database-permission) -* [system structure](#system-structure) -* [Demo System](#demo-system) -* [Instruction for Collecting Result](#collect-result) -* [Future Plan](#future-plan) +# Differential Privacy over SQL (DPSQL) + +DPSQL is a system designed for answering SQL queries while satisfying differential privacy guarantees. ## About The Project -Differential Privacy over SQL (DPSQL) is a system for answering queries over differential privacy. -The file structure is as below -``` +The file and directory structure of the project is organized as follows: + +```text project -│ -└───config -└───docs -└───Profile -└───src -│ └───algorithm -└───Test -│ └───TPCH -│ └───Graph -└───Sample +├── config/ # Configuration files required for the system +├── docs/ # Reference information and documentation +├── Profile/ # Profile information/licenses (e.g., mosek.lic) +├── src/ # Main source code files +│ └── algorithm/ # Core algorithms integrated into the system (e.g., FastSJA, OptSJA) +├── Test/ # Queries used in system experiments (TPCH, Graph) +└── Sample/ # Scripts for database setup and collecting experiment results ``` -`./config` stores the configuration files users need for the system. -`./docs` stores the reference information users need to work with DPSQL: +## Prerequisites -`./Profile` stores the Profile information for using `mosek` in the system. +### Tools +* **[PostgreSQL](https://www.postgresql.org/)**: Database engine. +* **[Python3](https://www.python.org/download/releases/3.0/)**: Ensure version 3.0 or higher. +* **[Mosek](https://www.mosek.com/downloads/)**: License file must be placed in `./Profile`. +* **CPLEX (Full Edition)**: Required for large datasets. Note: Do not rely on `pip install cplex` alone, as it has a 1,000-variable limit. + * [Detailed CPLEX Installation & Python Linking Guide](docs/cplex_setup.md) -`./src` stores main source files. -* `./src/algorithm` stores 3 algorithm we integrated into this system. +### Python Dependencies -`./Test` stores the queries used in the experiments of the system. +Install the required Python packages using the provided `requirements.txt` file: -`./Sample` stores the script for setting up database and collecting experiment results. +```bash +pip install -r requirements.txt +``` +### Database Permissions +The user running the system must have read permissions for the target database schema. -## Prerequisites -### Tools -Before running this project, please install below tools -* [PostgreSQL](https://www.postgresql.org/) -* [Python3](https://www.python.org/download/releases/3.0/) -* [Cplex](https://www.ibm.com/analytics/cplex-optimizer) -* [Mosek](https://www.mosek.com/downloads/) and the licence is under `./Profile`. - -Please do not install `Cplex` dependency, which can only handle a small dataset, but download the `Cplex API` and import that to python with this [instruction](https://www.ibm.com/docs/zh/icos/12.9.0?topic=cplex-setting-up-python-api). -(We are aware that this link is expired and are working on a substitute solution.) - -### Python Dependency -Here are dependencies used in python programs: -* `matplotlib` -* `numpy` -* `sys` -* `os` -* `collections` -* `configparser` -* `math` -* `psycopg2` -* `pglast`v4.4 -* `argparser` - -### Database permission -The user should have the permission to read the schema of the database to use this system. - -## System structure -TODO - -## Demo System - -To run the system, run `main.py`. There are seven parameters - - `--d`: path to database initialization file; - - `--q`: path to query file; - - `--r`: path to private relation file; - - `--c`: path to the configuration file; - - `--o`: path to the output file; - - `--debug`: debug mode for more information; - - `--optimal`: choose to use optimal algorithm for SJA queries; - -One can use `--h` to get help for parameter instruction. - -For more information about input file, users can consult [here](./docs/system-input.md) - -For the SQL syntax used in this system, users can consult [here](./docs/query-syntax.md) - -Example: -``` -python main.py --d ./config/database.ini --q ./test.txt --r ./test_relation.txt --c ./config/parameter.config --o out.txt -``` +## Usage / Demo System -## collect result +The main entry point for the system is `main.py`. -1. install the dependency +### Command-Line Arguments +| Parameter | Description | +| :--- | :--- | +| `--d` | Path to the database initialization file | +| `--q` | Path to the query file | +| `--r` | Path to the private relation file | +| `--c` | Path to the configuration file | +| `--o` | Path to the output file | +| `--debug` | Enable debug mode for more detailed logging | +| `--optimal` | Use the optimal algorithm for SJA queries | -2. create an empty database in `PosgreSQL` -3. generate `tbl` data files by using dbgen from [TPCH website](https://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp) -and store them in `/Sample/data/TPCH` -4. run script we provide in `/Sample/setupDBTPCH.py` -``` -python setupDBTPCH.py --db databasename -``` -5. run script we provide in `/Sample/collectResult.py` -```commandline -python collectResult.py -``` -6. find the result in `/Sample/result/TPCH` +*Use `python main.py --h` to view complete help instructions.* + +**Documentation Links:** +* [Input File Configuration](./docs/system-input.md) +* [Supported SQL Syntax](./docs/query-syntax.md) -## Future Plan +**Example Run:** +```bash +python main.py --d ./config/database.ini --q ./test.txt --r ./test_relation.txt --c ./config/parameter.config --o out.txt +``` -- Distinct count queries type (projection); -- User Interface -- Better user experience; -- Optimization; +## Collecting Results + +Follow these steps to set up the data and collect experiment results: + +1. **Install Dependencies**: Ensure tools and Python requirements are installed as per the [Prerequisites](#prerequisites). +2. **Database Setup**: Create an empty database in PostgreSQL. +3. **Data Generation**: Generate `.tbl` data files using `dbgen` from the [TPC-H website](https://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp), and store them in `./Sample/data/TPCH`. +4. **Database Initialization**: Run the setup script provided in `./Sample/setupDBTPCH.py`: + ```bash + python Sample/setupDBTPCH.py --db + ``` +5. **Run Collection Script**: + ```bash + cd Sample + python collectResult.py + ``` +6. **View Results**: The output will be available in `./Sample/result/TPCH`. + +## Future Plans + +* Support for distinct count queries (projection). +* Develop a User Interface (UI). +* Improve overall user experience. +* General performance optimization. diff --git a/docs/cplex_setup.md b/docs/cplex_setup.md new file mode 100644 index 0000000..b2c9688 --- /dev/null +++ b/docs/cplex_setup.md @@ -0,0 +1,61 @@ +# CPLEX Full Version Setup Guide + +To handle large-scale database workloads (like TPC-H and SSB), you must use the Full Edition of CPLEX. The standard `pip install cplex` is a Community Edition limited to 1,000 variables and 1,000 constraints. + +## 1. Download the Installer + +* **Students/Academics:** Register at the [IBM Academic Initiative](https://ibm.biz/academic) using your university email to download CPLEX for free. +* **Commercial:** Download via the IBM Passport Advantage portal. +* **Version:** This project is tested with CPLEX Studio 22.1.1 or newer. + +## 2. Install the Studio + +### Linux (Fedora/Ubuntu) + +1. Make the installer executable: `chmod +x cplex_studioXXXX.linux_x86_64.bin` +2. Run with sudo: `sudo ./cplex_studioXXXX.linux_x86_64.bin` +3. Default path: `/opt/ibm/ILOG/CPLEX_StudioXXXX` + +### Windows + +1. Run the `.exe` installer as Administrator. +2. Default path: `C:\Program Files\IBM\ILOG\CPLEX_StudioXXXX` + +## 3. Python API Integration + +IBM no longer includes a python folder in the installation. Follow these steps to link the Full Version to your Python environment: + +1. **Activate your virtual environment:** + ```bash + source .venv/bin/activate # Linux + .venv\Scripts\activate # Windows + ``` + +2. **Install the base packages:** + ```bash + pip install cplex docplex + ``` + +3. **Link to Local Binaries:** + Run the docplex utility to upgrade your pip installation to the Full Version: + + **Linux:** + ```bash + docplex config --upgrade /opt/ibm/ILOG/CPLEX_StudioXXXX + ``` + + **Windows:** + ```powershell + docplex config --upgrade "C:\Program Files\IBM\ILOG\CPLEX_StudioXXXX" + ``` + +## 4. Verification + +Run the following to ensure the unlimited version is active: + +```python +import cplex +c = cplex.Cplex() +print(f"CPLEX Version: {c.get_version()}") +# If this succeeds without a "Promotional Version" warning, you are ready. +``` \ No newline at end of file From 1632c27ceebb35a701bc4613dbf08429abbc950a Mon Sep 17 00:00:00 2001 From: psychlone77 Date: Sat, 9 May 2026 17:37:15 +0530 Subject: [PATCH 3/9] Add query rewriting for subquery unnesting functionality; update README and add test cases Co-authored-by: Copilot --- README.md | 4 ++ src/parser.py | 44 ++++++++++++ src/process.py | 177 ++++++++++++++++++++++++++++++++---------------- test_exists.txt | 8 +++ test_nested.txt | 7 ++ 5 files changed, 183 insertions(+), 57 deletions(-) create mode 100644 test_exists.txt create mode 100644 test_nested.txt diff --git a/README.md b/README.md index 1e1347a..3961e0c 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,10 @@ Follow these steps to set up the data and collect experiment results: ``` 6. **View Results**: The output will be available in `./Sample/result/TPCH`. +## Query Rewriting & Subquery Unnesting + +DPSQL automatically rewrites and unnests subqueries to standard relational joins to ensure differential privacy mechanisms can be seamlessly applied. Through a custom Abstract Syntax Tree (AST) visitor (`UnnestSubqueries` in `src/parser.py`) built using `pglast`, the system traverses the AST and flattens nested `IN`, `ANY`, and `EXISTS` subqueries found in the `WHERE` clause into standard multi-table joins, while automatically preserving and linking the original filtering conditions. + ## Future Plans * Support for distinct count queries (projection). diff --git a/src/parser.py b/src/parser.py index 1de608d..39c853e 100644 --- a/src/parser.py +++ b/src/parser.py @@ -634,3 +634,47 @@ def visit_SelectStmt(self, ancestors, node): if __name__ == "__main__": pass + +class UnnestSubqueries(visitors.Visitor): + def __init__(self): + self.sub_from_clauses = () + self.sub_where_clauses = [] + + def visit_SubLink(self, ancestors, node): + if node.subLinkType in (enums.SubLinkType.ANY_SUBLINK, enums.SubLinkType.EXISTS_SUBLINK): + self.sub_from_clauses += node.subselect.fromClause + + if node.subLinkType == enums.SubLinkType.ANY_SUBLINK: + if node.subselect.whereClause: + self.sub_where_clauses.append(node.subselect.whereClause) + return ast.A_Expr( + kind=enums.A_Expr_Kind.AEXPR_OP, + name=(ast.String('='),), + lexpr=node.testexpr, + rexpr=node.subselect.targetList[0].val + ) + elif node.subLinkType == enums.SubLinkType.EXISTS_SUBLINK: + if node.subselect.whereClause: + return node.subselect.whereClause + else: + return ast.A_Const(val=ast.Integer(1)) + +def apply_unnest_subqueries(selectstmt): + while True: + unnester = UnnestSubqueries() + unnester(selectstmt) + if not unnester.sub_from_clauses: + break + + if selectstmt.fromClause is None: + selectstmt.fromClause = () + selectstmt.fromClause += unnester.sub_from_clauses + + for sub_where in unnester.sub_where_clauses: + if selectstmt.whereClause: + selectstmt.whereClause = ast.BoolExpr( + boolop=enums.BoolExprType.AND_EXPR, + args=(selectstmt.whereClause, sub_where) + ) + else: + selectstmt.whereClause = sub_where diff --git a/src/process.py b/src/process.py index 0b378ed..d134e62 100644 --- a/src/process.py +++ b/src/process.py @@ -11,8 +11,16 @@ import src.algorithm.MaxSJA1 import src.algorithm.MaxSJA2 import src.algorithm.OptSJA -from src.parser import userAdder, ImplicitJoin, complete_query, aggregationVisit, get_primary_keys, add_table_name, \ - group_by +from src.parser import ( + userAdder, + ImplicitJoin, + complete_query, + aggregationVisit, + get_primary_keys, + add_table_name, + group_by, + apply_unnest_subqueries, +) from src.util import pg_exec @@ -49,28 +57,35 @@ def rewrite(self, query, private_relations): selectstmt = root[0].stmt if not isinstance(selectstmt, ast.SelectStmt): raise Exception + apply_unnest_subqueries(selectstmt) ImplicitJoin()(selectstmt) add_table_name(selectstmt, self.schema)(selectstmt) aggregationVisit()(selectstmt) complete_query(self.fks)(selectstmt) userAdder(private_pk)(selectstmt) - self.rewrite_query = (stream.RawStream()(selectstmt)) + self.rewrite_query = stream.RawStream()(selectstmt) def process(self): - epsilon = float(self.parameters['epsilon']) - beta = float(self.parameters['beta']) - processor_num = int(self.parameters['processor_num']) - global_sensitivity = float(self.parameters['global_sensitivity']) - approximate_factor = float(self.parameters['approximate_factor']) - src.algorithm.FastSJA.processFastSJA(self.input_result, e=epsilon, b=beta, gs=global_sensitivity, - p_num=processor_num, afactor=approximate_factor) + epsilon = float(self.parameters["epsilon"]) + beta = float(self.parameters["beta"]) + processor_num = int(self.parameters["processor_num"]) + global_sensitivity = float(self.parameters["global_sensitivity"]) + approximate_factor = float(self.parameters["approximate_factor"]) + src.algorithm.FastSJA.processFastSJA( + self.input_result, + e=epsilon, + b=beta, + gs=global_sensitivity, + p_num=processor_num, + afactor=approximate_factor, + ) self.true_result, self.noise_result = src.algorithm.FastSJA.get_result() class OptSJA(FastSJA): def process(self): - epsilon = float(self.parameters['epsilon']) - beta = float(self.parameters['beta']) + epsilon = float(self.parameters["epsilon"]) + beta = float(self.parameters["beta"]) src.algorithm.OptSJA.processOpt(self.input_result, e=epsilon, b=beta) self.true_result, self.noise_result = src.algorithm.OptSJA.get_result() @@ -83,30 +98,39 @@ def rewrite(self, query, private_relations): selectstmt = root[0].stmt if not isinstance(selectstmt, ast.SelectStmt): raise Exception + apply_unnest_subqueries(selectstmt) ImplicitJoin()(selectstmt) add_table_name(selectstmt, self.schema)(selectstmt) group_by()(selectstmt) aggregationVisit()(selectstmt) complete_query(self.fks)(selectstmt) userAdder(private_pk)(selectstmt) - self.rewrite_query = (stream.RawStream()(selectstmt)) + self.rewrite_query = stream.RawStream()(selectstmt) def process(self): - epsilon = float(self.parameters['epsilon']) - beta = float(self.parameters['beta']) - delta = float(self.parameters['delta']) - src.algorithm.MultiSJF.ProcessMultiQSJF(self.input_result, e=epsilon, b=beta, d=delta) - self.true_result, self.noise_result, self.error = src.algorithm.MultiSJF.get_result() + epsilon = float(self.parameters["epsilon"]) + beta = float(self.parameters["beta"]) + delta = float(self.parameters["delta"]) + src.algorithm.MultiSJF.ProcessMultiQSJF( + self.input_result, e=epsilon, b=beta, d=delta + ) + self.true_result, self.noise_result, self.error = ( + src.algorithm.MultiSJF.get_result() + ) class MultiSJA(MultiSJF): def process(self): - epsilon = float(self.parameters['epsilon']) - beta = float(self.parameters['beta']) - delta = float(self.parameters['delta']) - src.algorithm.MultiSJA.ProcessMultiQSJA(self.input_result, e=epsilon, b=beta, Del=delta) - self.true_result, self.noise_result, self.error = src.algorithm.MultiSJA.get_result() + epsilon = float(self.parameters["epsilon"]) + beta = float(self.parameters["beta"]) + delta = float(self.parameters["delta"]) + src.algorithm.MultiSJA.ProcessMultiQSJA( + self.input_result, e=epsilon, b=beta, Del=delta + ) + self.true_result, self.noise_result, self.error = ( + src.algorithm.MultiSJA.get_result() + ) class MaxSJA1(algorithm): @@ -120,6 +144,7 @@ def rewrite(self, query, private_relations): selectstmt = root[0].stmt if not isinstance(selectstmt, ast.SelectStmt): raise Exception + apply_unnest_subqueries(selectstmt) ImplicitJoin()(selectstmt) add_table_name(selectstmt, self.schema)(selectstmt) agg = aggregationVisit() @@ -127,18 +152,26 @@ def rewrite(self, query, private_relations): complete_query(self.fks)(selectstmt) userAdder(private_pk)(selectstmt) self.k = agg.index - self.rewrite_query = (stream.RawStream()(selectstmt)) + self.rewrite_query = stream.RawStream()(selectstmt) def process(self): if self.k == 0: self.k = len(self.input_result) - epsilon = float(self.parameters['epsilon']) - beta = float(self.parameters['beta']) - error_level = float(self.parameters['error_level']) - upper_bound = float(self.parameters['upper_bound']) - src.algorithm.MaxSJA1.processMaxSJA1(self.input_result, self.k, e=epsilon, b=beta, - error=error_level, ub=upper_bound) - self.true_result, self.noise_result, self.error = src.algorithm.MaxSJA1.get_result() + epsilon = float(self.parameters["epsilon"]) + beta = float(self.parameters["beta"]) + error_level = float(self.parameters["error_level"]) + upper_bound = float(self.parameters["upper_bound"]) + src.algorithm.MaxSJA1.processMaxSJA1( + self.input_result, + self.k, + e=epsilon, + b=beta, + error=error_level, + ub=upper_bound, + ) + self.true_result, self.noise_result, self.error = ( + src.algorithm.MaxSJA1.get_result() + ) class MaxSJA2(MaxSJA1): @@ -146,14 +179,23 @@ class MaxSJA2(MaxSJA1): def process(self): if self.k == 0: self.k = len(self.input_result) - epsilon = float(self.parameters['epsilon']) - beta = float(self.parameters['beta']) - processor_num = int(self.parameters['processor_num']) - error_level = float(self.parameters['error_level']) - upper_bound = float(self.parameters['upper_bound']) - src.algorithm.MaxSJA2.processMaxSJA2(self.input_result, self.k, e=epsilon, b=beta, - error=error_level, ub=upper_bound, p_num=processor_num) - self.true_result, self.noise_result, self.error = src.algorithm.MaxSJA2.get_result() + epsilon = float(self.parameters["epsilon"]) + beta = float(self.parameters["beta"]) + processor_num = int(self.parameters["processor_num"]) + error_level = float(self.parameters["error_level"]) + upper_bound = float(self.parameters["upper_bound"]) + src.algorithm.MaxSJA2.processMaxSJA2( + self.input_result, + self.k, + e=epsilon, + b=beta, + error=error_level, + ub=upper_bound, + p_num=processor_num, + ) + self.true_result, self.noise_result, self.error = ( + src.algorithm.MaxSJA2.get_result() + ) class MultiMax(algorithm): @@ -172,6 +214,7 @@ def rewrite(self, query, private_relations): selectstmt = root[0].stmt if not isinstance(selectstmt, ast.SelectStmt): raise Exception + apply_unnest_subqueries(selectstmt) ImplicitJoin()(selectstmt) add_table_name(selectstmt, self.schema)(selectstmt) group_by()(selectstmt) @@ -180,7 +223,7 @@ def rewrite(self, query, private_relations): complete_query(self.fks)(selectstmt) userAdder(private_pk)(selectstmt) self.k = agg.index - self.rewrite_query = (stream.RawStream()(selectstmt)) + self.rewrite_query = stream.RawStream()(selectstmt) def get_input_result(self): self.input_result = pg_exec(self.dbsetting, self.rewrite_query) @@ -200,26 +243,38 @@ def get_input_result(self): self.error = 0 def process(self): - epsilon = float(self.parameters['epsilon']) - beta = float(self.parameters['beta']) - error_level = float(self.parameters['error_level']) - upper_bound = float(self.parameters['upper_bound']) - processor_num = int(self.parameters['processor_num']) - delta = float(self.parameters['delta']) + epsilon = float(self.parameters["epsilon"]) + beta = float(self.parameters["beta"]) + error_level = float(self.parameters["error_level"]) + upper_bound = float(self.parameters["upper_bound"]) + processor_num = int(self.parameters["processor_num"]) + delta = float(self.parameters["delta"]) # advanced composition beta = beta / self.num_query - epsilon = (math.sqrt(2 * self.num_query * math.log(1 / delta) + 4 * epsilon * self.num_query) - math.sqrt( - 2 * self.num_query * math.log(1 / delta))) / (2 * self.num_query) + epsilon = ( + math.sqrt( + 2 * self.num_query * math.log(1 / delta) + 4 * epsilon * self.num_query + ) + - math.sqrt(2 * self.num_query * math.log(1 / delta)) + ) / (2 * self.num_query) if self.input_l == 1: for g_id in self.input_final_result.keys(): group = self.group_ids[g_id] next_input = self.input_final_result[g_id] if self.k == 0: - self.k = len(next_input)-1 - src.algorithm.MaxSJA1.processMaxSJA1(next_input, self.k, e=epsilon, b=beta, - error=error_level, ub=upper_bound) - true_result_k, noise_result_k, error_k = src.algorithm.MaxSJA1.get_result() + self.k = len(next_input) - 1 + src.algorithm.MaxSJA1.processMaxSJA1( + next_input, + self.k, + e=epsilon, + b=beta, + error=error_level, + ub=upper_bound, + ) + true_result_k, noise_result_k, error_k = ( + src.algorithm.MaxSJA1.get_result() + ) self.true_result.append((true_result_k, group)) self.noise_result.append((noise_result_k, group)) self.error += error_k @@ -228,11 +283,19 @@ def process(self): group = self.group_ids[g_id] next_input = self.input_final_result[g_id] if self.k == 0: - self.k = len(next_input)-1 - src.algorithm.MaxSJA2.processMaxSJA2(next_input, self.k, e=epsilon, b=beta, - error=error_level, ub=upper_bound, - p_num=processor_num) - true_result_k, noise_result_k, error_k = src.algorithm.MaxSJA2.get_result() + self.k = len(next_input) - 1 + src.algorithm.MaxSJA2.processMaxSJA2( + next_input, + self.k, + e=epsilon, + b=beta, + error=error_level, + ub=upper_bound, + p_num=processor_num, + ) + true_result_k, noise_result_k, error_k = ( + src.algorithm.MaxSJA2.get_result() + ) self.true_result.append((true_result_k, group)) self.noise_result.append((noise_result_k, group)) self.error += error_k diff --git a/test_exists.txt b/test_exists.txt new file mode 100644 index 0000000..96fff99 --- /dev/null +++ b/test_exists.txt @@ -0,0 +1,8 @@ +SELECT sum(l_extendedprice) +FROM lineitem +WHERE EXISTS ( + SELECT 1 + FROM orders + WHERE o_orderkey = l_orderkey + AND o_custkey = 123 +); diff --git a/test_nested.txt b/test_nested.txt new file mode 100644 index 0000000..fd1e67a --- /dev/null +++ b/test_nested.txt @@ -0,0 +1,7 @@ +SELECT sum(l_extendedprice) +FROM lineitem +WHERE l_orderkey IN ( + SELECT o_orderkey + FROM orders + WHERE o_custkey = 123 +); From 16fe45a0f6ce70656e41e60a75bb472c08d235b4 Mon Sep 17 00:00:00 2001 From: PASINDU151 Date: Sun, 10 May 2026 19:39:12 +0530 Subject: [PATCH 4/9] Add support for recursive queries --- main.py | 148 +++++++++++------ src/algorithm/FastSJA.py | 4 + src/algorithm/MaxSJA1.py | 2 + src/algorithm/MaxSJA2.py | 2 + src/algorithm/OptSJA.py | 2 + src/parser.py | 47 ++++-- src/process.py | 19 +++ src/recursive.py | 346 +++++++++++++++++++++++++++++++++++++++ src/util.py | 19 +-- 9 files changed, 513 insertions(+), 76 deletions(-) create mode 100644 src/recursive.py diff --git a/main.py b/main.py index cdb0538..c227bf5 100644 --- a/main.py +++ b/main.py @@ -8,108 +8,151 @@ from pglast import parser, prettify from pglast import ast import src.process +from src.recursive import is_recursive_query def get_project_root() -> Path: return Path(__file__).parent -def main(): - argparser = argparse.ArgumentParser(description='sql over DP') - argparser.add_argument('--db', '--d', type=str, default='./config/database.ini', - help='path to database initialization file') - argparser.add_argument('--query', '--q', type=str, default='./test.txt', help='path to query file') - argparser.add_argument('--relation', '--r', type=str, help='path to private relation file', default="./test_relation.txt") - argparser.add_argument('--config', '--c', type=str, help='path to the configuration file', default="./config/parameter.config") - argparser.add_argument('--output', '--o', type=str, help='path to output file', - default="./out.txt") - argparser.add_argument('--debug', action='store_true', help='debug mode, print more information') - argparser.add_argument('--optimal', action='store_true', help='optimal mode for SJA') - +def main(): + argparser = argparse.ArgumentParser(description="sql over DP") + argparser.add_argument( + "--db", + "--d", + type=str, + default="./config/database.ini", + help="path to database initialization file", + ) + argparser.add_argument( + "--query", "--q", type=str, default="./test.txt", help="path to query file" + ) + argparser.add_argument( + "--relation", + "--r", + type=str, + help="path to private relation file", + default="./test_relation.txt", + ) + argparser.add_argument( + "--config", + "--c", + type=str, + help="path to the configuration file", + default="./config/parameter.config", + ) + argparser.add_argument( + "--output", "--o", type=str, help="path to output file", default="./out.txt" + ) + argparser.add_argument( + "--debug", action="store_true", help="debug mode, print more information" + ) + argparser.add_argument( + "--optimal", action="store_true", help="optimal mode for SJA" + ) opt = argparser.parse_args() # load the config file dbsetting = config(opt.db) - global_para = config(opt.config, 'global') - fast_para = config(opt.config, 'FastSJA') - multi_para = config(opt.config, 'MultiQ') - max_para = config(opt.config, 'MaxSJA') + global_para = config(opt.config, "global") + fast_para = config(opt.config, "FastSJA") + multi_para = config(opt.config, "MultiQ") + max_para = config(opt.config, "MaxSJA") # load the input query query = "" - query_file = open(opt.query, 'r') + query_file = open(opt.query, "r") for line in query_file.readlines(): query = query + line if ";" in query: break # load the private relation - relation_file = open(opt.relation, 'r') + relation_file = open(opt.relation, "r") private_relations = "" for line in relation_file.readlines(): private_relations = private_relations + line + "," # first parsing for type check - root = parser.parse_sql(query) - selectstmt = root[0].stmt - if not isinstance(selectstmt, ast.SelectStmt): - raise Exception - check = check_type(private_relations) - check(selectstmt) + # + # The original check_type visitor assumes that every SelectStmt it visits + # has a fromClause. A WITH RECURSIVE query contains inner SELECT nodes + # such as constants/depth expressions that can violate that assumption. + # For recursive queries we choose the FastSJA path here and let + # process.rewrite(...) convert the recursive CTE into bounded row-level + # input before execution. + if is_recursive_query(query): + check = check_type(private_relations) + else: + root = parser.parse_sql(query) + selectstmt = root[0].stmt + if not isinstance(selectstmt, ast.SelectStmt): + raise Exception + check = check_type(private_relations) + check(selectstmt) filepath = get_project_root() - output_file = open(opt.output, 'w') - if opt.debug: - pg_test(dbsetting) + output_file = open(opt.output, "w") + if not pg_test(dbsetting): + raise SystemExit( + "Database connection failed. Check config/database.ini and make sure PostgreSQL is running/reachable." + ) # set up misc - multiprocessing.set_start_method("fork") + try: + multiprocessing.set_start_method("fork") + except (RuntimeError, ValueError): + # Windows does not support fork; keep the platform default. + pass para = dict(global_para) + para["recursion_bound"] = global_para.get("recursion_bound", "3") # load misc pks = pg_single(dbsetting, str(filepath) + "/config/primary_keys.txt") - fks = pg_single(dbsetting, str(filepath) + "/config/foreign_keys.txt") - table_file = open(str(filepath) + "/config/table.txt", 'r') + fks = pg_single(dbsetting, str(filepath) + "/config/foreign_keys.txt") + table_file = open(str(filepath) + "/config/table.txt", "r") q = table_file.read() schema = get_schema(dbsetting, q) if check.max is not None and check.groupby: para.update(multi_para) para.update(max_para) - output_file.write('Query type: MultiMax' + "\n") + output_file.write("Query type: MultiMax" + "\n") process = src.process.MultiMax(check.l, pks, fks, schema, para, dbsetting) elif check.max is not None: para.update(max_para) # shiftedinverse1 if check.l == 1: - output_file.write('Query type: MaxSJA1' + "\n") + output_file.write("Query type: MaxSJA1" + "\n") process = src.process.MaxSJA1(pks, fks, schema, para, dbsetting) # shiftedinverse2 if check.l > 1: - output_file.write('Query type: MaxSJA2' + "\n") + output_file.write("Query type: MaxSJA2" + "\n") process = src.process.MaxSJA2(pks, fks, schema, para, dbsetting) elif check.groupby: - para.update(multi_para) - if check.selfjoin: - # multiSJA - output_file.write('Query type: multiSJA' + "\n") - process = src.process.MultiSJA(pks, fks, schema, para, dbsetting) - else: - # multiSJF - output_file.write('Query type: multiSJF' + "\n") - process = src.process.MultiSJF(pks, fks, schema, para, dbsetting) + para.update(multi_para) + if check.selfjoin: + # multiSJA + output_file.write("Query type: multiSJA" + "\n") + process = src.process.MultiSJA(pks, fks, schema, para, dbsetting) + else: + # multiSJF + output_file.write("Query type: multiSJF" + "\n") + process = src.process.MultiSJF(pks, fks, schema, para, dbsetting) else: - # R2T + # R2T - para.update(fast_para) - if opt.optimal: - output_file.write('Query type: OptSJA' + "\n") - process = src.process.OptSJA(pks, fks, schema, para, dbsetting) - else: - output_file.write('Query type: FastSJA' + "\n") - process = src.process.FastSJA(pks, fks, schema, para, dbsetting) + para.update(fast_para) + if opt.optimal: + output_file.write("Query type: OptSJA" + "\n") + process = src.process.OptSJA(pks, fks, schema, para, dbsetting) + else: + output_file.write("Query type: FastSJA" + "\n") + process = src.process.FastSJA(pks, fks, schema, para, dbsetting) start = time.time() process.rewrite(query, private_relations) + output_file.write("rewritten Query:" + "\n") + output_file.write(prettify(process.rewrite_query)) process.get_input_result() end1 = time.time() @@ -128,6 +171,9 @@ def main(): output_file.write("\n" + "error:") output_file.write(str(process.error)) + output_file.write("\n" + "actual result:") + output_file.write(str(process.true_result)) + output_file.write("\n" + "noise result:") output_file.write(str(process.noise_result)) output_file.write("\n" + "rewrite time:") @@ -137,5 +183,5 @@ def main(): # Press the green button in the gutter to run the script. -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/algorithm/FastSJA.py b/src/algorithm/FastSJA.py index 37626d0..4c29492 100644 --- a/src/algorithm/FastSJA.py +++ b/src/algorithm/FastSJA.py @@ -70,6 +70,8 @@ def ReadInput(): for line in input_result: ids = line[1:] for id in ids: + if id is None: + continue temp_id = id if temp_id not in reorder_ids: reorder_ids[temp_id] = num_id @@ -84,6 +86,8 @@ def ReadInput(): #print(aggregation_value) # For each entity contribution to that join result for element in elements[1:]: + if element is None: + continue element = reorder_ids[element] if element in id_dic.keys(): element = id_dic[element] diff --git a/src/algorithm/MaxSJA1.py b/src/algorithm/MaxSJA1.py index a220cf1..25377e8 100644 --- a/src/algorithm/MaxSJA1.py +++ b/src/algorithm/MaxSJA1.py @@ -21,6 +21,8 @@ def ReadInput(): tuple_value = float(elements[0]) user_id = elements[1] + if user_id is None: + continue if user_id in id_dict.keys(): user_id = id_dict[user_id] diff --git a/src/algorithm/MaxSJA2.py b/src/algorithm/MaxSJA2.py index d38e38e..c7223a0 100644 --- a/src/algorithm/MaxSJA2.py +++ b/src/algorithm/MaxSJA2.py @@ -28,6 +28,8 @@ def ReadInput(): value = float(elements[0]) for element in elements[1:]: + if element is None: + continue user_id = element if user_id in id_dict.keys(): diff --git a/src/algorithm/OptSJA.py b/src/algorithm/OptSJA.py index 515c05b..0de5811 100644 --- a/src/algorithm/OptSJA.py +++ b/src/algorithm/OptSJA.py @@ -47,6 +47,8 @@ def ReadInput(): aggregation_value = float(elements[0]) # For each entity contribution to that join result for element in elements[1:]: + if element is None: + continue # element = int(element) # Re-order the IDs if element in id_dic.keys(): diff --git a/src/parser.py b/src/parser.py index 39c853e..c41e737 100644 --- a/src/parser.py +++ b/src/parser.py @@ -214,6 +214,29 @@ def get_primary_keys(pks, relations): return res +def _split_pk_columns(pk_text): + """Return individual PK column names from strings like 'id' or 'id1, id2'.""" + return [c.strip().strip('\"') for c in pk_text.split(",") if c.strip()] + + +def _build_tuple_id_expr(alias, columns, id_label): + """ + Build concat('id0', alias.col1, ':', alias.col2, ...) for tuple identity. + This fixes composite primary keys that were previously treated as one + quoted column name, e.g. lineitem."l_orderkey, l_linenumber". + """ + args = [ast.String(sval=id_label)] + for i, col in enumerate(columns): + if i > 0: + args.append(ast.String(sval=":")) + args.append( + ast.ColumnRef( + fields=(ast.String(sval=alias), ast.String(sval=col)) + ) + ) + return ast.FuncCall(funcname=(ast.String(sval="concat"),), args=tuple(args)) + + class userAdder(visitors.Visitor): def __init__(self, keys): @@ -233,21 +256,21 @@ def visit_SelectStmt(self, ancestors, node): renaming = renaming.rename_dict # add all the private keys into the select statement for r in self.keys: - table_attribute = r.split(".") - for name in renaming[table_attribute[0]]: + table_attribute = r.split(".", 1) + if len(table_attribute) != 2: + continue + table_name, pk_text = table_attribute + pk_columns = _split_pk_columns(pk_text) + if table_name not in renaming: + continue + for name in renaming[table_name]: rename = "id" + str(idx) - table = ast.String(sval=rename) - attri = ast.ColumnRef(fields=(name, table_attribute[1])) node.targetList += ( ast.ResTarget( - val=ast.FuncCall( - funcname=(ast.String(sval="concat"),), args=(table, attri) - ), + val=_build_tuple_id_expr(name, pk_columns, rename), name=rename, ), ) - # node.targetList += (ast.ResTarget(val=ast.ColumnRef(fields=(name, - # table_attribute[1])), name=rename),) idx += 1 @@ -535,6 +558,8 @@ def __init__(self): self.rename_dict = {} def visit_SelectStmt(self, ancestors, node): + if not node.fromClause: + return for item in node.fromClause: if isinstance(item, ast.RangeVar): if item.relname not in self.rename_dict.keys(): @@ -579,7 +604,7 @@ def __init__(self): self.node = None def visit_SelectStmt(self, ancestors, node): - if isinstance(node.fromClause[0], ast.RangeSubselect): + if node.fromClause and isinstance(node.fromClause[0], ast.RangeSubselect): self.node = node.fromClause[0].subquery @@ -598,6 +623,8 @@ def __init__(self, relations): self._relation = relations def visit_SelectStmt(self, ancestors, node): + if not node.fromClause: + return if isinstance(node.fromClause[0], ast.RangeSubselect): self.subquery = True for item in node.targetList: diff --git a/src/process.py b/src/process.py index d134e62..0d7622b 100644 --- a/src/process.py +++ b/src/process.py @@ -22,6 +22,7 @@ apply_unnest_subqueries, ) from src.util import pg_exec +from src.recursive import rewrite_bounded_recursive_query class algorithm(ABC): @@ -40,6 +41,16 @@ def __init__(self, pks, fks, schema, parameters, dbsetting): def get_input_result(self): self.input_result = pg_exec(self.dbsetting, self.rewrite_query) + def rewrite_recursive_if_needed(self, query, private_relations): + recursion_bound = int(self.parameters.get("recursion_bound", 10)) + rewritten = rewrite_bounded_recursive_query( + query, private_relations, self.pks, recursion_bound + ) + if rewritten is not None: + self.rewrite_query = rewritten + return True + return False + @abstractmethod def rewrite(self, query, private_relations): pass @@ -52,6 +63,8 @@ def process(self): class FastSJA(algorithm): def rewrite(self, query, private_relations): + if self.rewrite_recursive_if_needed(query, private_relations): + return private_pk = get_primary_keys(self.pks, private_relations) root = parser.parse_sql(query) selectstmt = root[0].stmt @@ -93,6 +106,8 @@ def process(self): class MultiSJF(algorithm): def rewrite(self, query, private_relations): + if self.rewrite_recursive_if_needed(query, private_relations): + return private_pk = get_primary_keys(self.pks, private_relations) root = parser.parse_sql(query) selectstmt = root[0].stmt @@ -139,6 +154,8 @@ def __init__(self, pks, fks, schema, parameters, dbsetting): self.k = None def rewrite(self, query, private_relations): + if self.rewrite_recursive_if_needed(query, private_relations): + return private_pk = get_primary_keys(self.pks, private_relations) root = parser.parse_sql(query) selectstmt = root[0].stmt @@ -209,6 +226,8 @@ def __init__(self, input_l, pks, fks, schema, parameters, dbsetting): self.num_query = None def rewrite(self, query, private_relations): + if self.rewrite_recursive_if_needed(query, private_relations): + return private_pk = get_primary_keys(self.pks, private_relations) root = parser.parse_sql(query) selectstmt = root[0].stmt diff --git a/src/recursive.py b/src/recursive.py new file mode 100644 index 0000000..3971c7e --- /dev/null +++ b/src/recursive.py @@ -0,0 +1,346 @@ +"""Bounded recursive CTE support for DPSQL. + +The original DPSQL rewrite pipeline expects a flat SelectStmt. Recursive CTEs +(`WITH RECURSIVE ...`) break that assumption because table aliases inside the +recursive term are not visible from the outer query. This module handles the +common linear-recursive pattern separately by producing the row-level input that +DPSQL algorithms need directly: + + WITH RECURSIVE r AS (...) + SELECT count(*) FROM r + +becomes roughly: + + WITH RECURSIVE r(..., id0, ...) AS (... id columns ...) + SELECT 1, id0, ... FROM r + +The recursive term is also bounded with `--recursion-bound` when the query does +not already contain a tighter depth predicate. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import Iterable, List, Optional, Sequence, Tuple + + +@dataclass(frozen=True) +class IdColumn: + table: str + alias: str + pk: str + name: str + + +def is_recursive_query(query: str) -> bool: + return bool(re.search(r"\bWITH\s+RECURSIVE\b", query, flags=re.IGNORECASE)) + + +def _strip_semicolon(query: str) -> str: + return query.strip().rstrip(";").strip() + + +def _split_cte(query: str) -> Optional[Tuple[str, List[str], str, str]]: + """Return (cte_name, column_names, cte_body, outer_select) for one CTE. + + This intentionally supports the project use case: a single recursive CTE + followed by a final SELECT. It is conservative and raises a helpful error + through the caller for unsupported shapes instead of silently producing + invalid SQL. + """ + q = _strip_semicolon(query) + m = re.match( + r"\s*WITH\s+RECURSIVE\s+(?P[A-Za-z_][\w]*)\s*(?P\([^)]*\))?\s+AS\s*\(", + q, + flags=re.IGNORECASE | re.DOTALL, + ) + if not m: + return None + + open_idx = m.end() - 1 + depth = 0 + close_idx = None + for i in range(open_idx, len(q)): + ch = q[i] + if ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + if depth == 0: + close_idx = i + break + if close_idx is None: + return None + + cols_raw = m.group("cols") + cols = [] + if cols_raw: + cols = [c.strip() for c in cols_raw[1:-1].split(",") if c.strip()] + + return m.group("name"), cols, q[open_idx + 1 : close_idx].strip(), q[close_idx + 1 :].strip() + + +def _split_union_all(cte_body: str) -> Tuple[str, str]: + # Split at top-level UNION ALL only. + depth = 0 + upper = cte_body.upper() + i = 0 + while i < len(cte_body): + ch = cte_body[i] + if ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + elif depth == 0 and upper.startswith("UNION ALL", i): + return cte_body[:i].strip(), cte_body[i + len("UNION ALL") :].strip() + i += 1 + raise ValueError("Recursive CTE must contain a top-level UNION ALL.") + + +def _pk_lookup(pks: Sequence[Sequence[str]], private_relations: str) -> dict: + relation_set = {r.strip() for r in private_relations.split(",") if r.strip()} + out = {} + for pk in pks: + table = str(pk[0]) + if table not in relation_set: + continue + definition = str(pk[2]) + left = definition.find("(") + right = definition.find(")") + if left != -1 and right != -1 and right > left: + out[table] = definition[left + 1 : right].strip() + return out + + +def _infer_pk_from_recursive_sql(table: str, anchor_sql: str, recursive_sql: str) -> Optional[str]: + """Best-effort fallback when PostgreSQL primary key metadata is absent. + + DPSQL needs a stable tuple identifier for each private tuple contribution. + Some demo graph tables do not declare a DB primary key, so primary_keys.txt + returns nothing. For bounded graph recursion, the edge tuple is usually + identified by the columns used as alias.column in the anchor/recursive terms, + e.g. graph_edges(src, dst). + """ + cols: List[str] = [] + combined = anchor_sql + "\n" + recursive_sql + pat = re.compile( + rf"\b(?:FROM|JOIN)\s+{re.escape(table)}(?:\s+(?:AS\s+)?([A-Za-z_]\w*))?", + flags=re.IGNORECASE, + ) + aliases = [] + for m in pat.finditer(combined): + alias = m.group(1) or table + if alias.upper() in {"WHERE", "JOIN", "INNER", "LEFT", "RIGHT", "FULL", "CROSS", "ON", "GROUP", "ORDER", "UNION"}: + alias = table + aliases.append(alias) + + for alias in aliases: + for cm in re.finditer(rf"\b{re.escape(alias)}\.([A-Za-z_]\w*)\b", combined): + col = cm.group(1) + if col not in cols: + cols.append(col) + + # Prefer the natural edge key when present. + lowered = {c.lower(): c for c in cols} + if "src" in lowered and "dst" in lowered: + return f"{lowered['src']}, {lowered['dst']}" + if cols: + return ", ".join(cols[:2]) + return None + + +def _fill_missing_pk_fallbacks( + pk_by_table: dict, + private_relations: str, + anchor_sql: str, + recursive_sql: str, +) -> dict: + """Fill missing private relation keys using recursive SQL as fallback.""" + out = dict(pk_by_table) + for table in [r.strip() for r in private_relations.split(",") if r.strip()]: + if table in out: + continue + inferred = _infer_pk_from_recursive_sql(table, anchor_sql, recursive_sql) + if inferred: + out[table] = inferred + return out + + +def _qualified_pk_expr(alias: str, pk: str) -> str: + """Return a SQL expression identifying a private tuple. + + Supports both single-column and composite primary keys: + - Single: _qualified_pk_expr("e", "id") → "e.id" + - Composite: _qualified_pk_expr("e", "src,dst") → "concat(e.src, ':', e.dst)" + + Composite keys are concatenated with ':' as separator to produce + a single stable tuple identifier for differential privacy. + """ + cols = [c.strip() for c in pk.split(",") if c.strip()] + if not cols: + raise ValueError("Primary key definition did not contain any columns.") + + # Single column: return qualified name directly + if len(cols) == 1: + return f"{alias}.{cols[0]}" + + # Composite key: concat with ':' separator + qualified = [f"{alias}.{col}" for col in cols] + parts = [qualified[0]] + for col in qualified[1:]: + parts.append("':'") # String literal ':' to be concatenated + parts.append(col) + return "concat(" + ", ".join(parts) + ")" + + +def _find_private_aliases(sql: str, pk_by_table: dict, cte_name: str, start_index: int = 0) -> List[IdColumn]: + found: List[IdColumn] = [] + idx = start_index + for table, pk in pk_by_table.items(): + pat = re.compile( + rf"\b(?:FROM|JOIN)\s+{re.escape(table)}(?:\s+(?:AS\s+)?([A-Za-z_]\w*))?", + flags=re.IGNORECASE, + ) + for m in pat.finditer(sql): + alias = m.group(1) or table + if alias.upper() in {"WHERE", "JOIN", "INNER", "LEFT", "RIGHT", "FULL", "CROSS", "ON", "GROUP", "ORDER", "UNION"}: + alias = table + if alias.lower() == cte_name.lower(): + continue + found.append(IdColumn(table=table, alias=alias, pk=pk, name=f"id{idx}")) + idx += 1 + return found + + +def _insert_targets(select_sql: str, targets: Iterable[str]) -> str: + targets = list(targets) + if not targets: + return select_sql + m = re.search(r"\bFROM\b", select_sql, flags=re.IGNORECASE) + if not m: + raise ValueError("Could not add DPSQL id columns: SELECT term has no FROM clause.") + return select_sql[: m.start()].rstrip() + ", " + ", ".join(targets) + " " + select_sql[m.start() :].lstrip() + + +def _depth_alias_and_column(recursive_sql: str, cte_cols: Sequence[str], cte_name: str) -> Tuple[Optional[str], Optional[str]]: + # Prefer an explicit alias in patterns such as r.depth + 1. + m = re.search(r"\b([A-Za-z_]\w*)\.([A-Za-z_]\w*)\s*\+\s*1\b", recursive_sql, flags=re.IGNORECASE) + if m and m.group(1).lower() != cte_name.lower(): + return m.group(1), m.group(2) + # Fall back to a common CTE column name. + for c in cte_cols: + if c.lower() in {"depth", "level", "hop", "hops"}: + return None, c + return None, None + + +def _apply_recursion_bound(recursive_sql: str, cte_cols: Sequence[str], cte_name: str, bound: int) -> str: + if bound < 1: + raise ValueError("recursion_bound must be >= 1.") + alias, depth_col = _depth_alias_and_column(recursive_sql, cte_cols, cte_name) + if not depth_col: + # The query may already be naturally bounded by another predicate. Keep it unchanged. + return recursive_sql + depth_ref = f"{alias}.{depth_col}" if alias else depth_col + predicate = f"{depth_ref} < {int(bound)}" + + # Always append the CLI bound. If the query already has a stricter bound, + # this is redundant; if it has a looser bound, this safely tightens it. + if re.search(r"\bWHERE\b", recursive_sql, flags=re.IGNORECASE): + return recursive_sql.rstrip() + f" AND {predicate}" + return recursive_sql.rstrip() + f" WHERE {predicate}" + + +def _outer_select_to_row_input(outer_select: str, cte_name: str, id_columns: Sequence[IdColumn]) -> str: + ids = ", ".join(c.name for c in id_columns) + suffix = f", {ids}" if ids else "" + # COUNT/SUM/MAX become row-level rows consumed by DPSQL algorithms. + m_count = re.match(rf"\s*SELECT\s+count\s*\(\s*\*\s*\)\s+FROM\s+{re.escape(cte_name)}\b.*", outer_select, flags=re.IGNORECASE | re.DOTALL) + if m_count: + return f"SELECT 1{suffix} FROM {cte_name}" + + m_sum = re.match(rf"\s*SELECT\s+sum\s*\((?P.*?)\)\s+FROM\s+{re.escape(cte_name)}\b.*", outer_select, flags=re.IGNORECASE | re.DOTALL) + if m_sum: + return f"SELECT {m_sum.group('expr')}{suffix} FROM {cte_name}" + + # Non-aggregate final select: append id columns so the DP layer still has user ids. + return _insert_targets(outer_select, [c.name for c in id_columns]) + + +def rewrite_bounded_recursive_query( + query: str, + private_relations: str, + pks: Sequence[Sequence[str]], + recursion_bound: int, +) -> Optional[str]: + """Rewrite a bounded linear recursive CTE into DPSQL row-level input. + + Returns None when the query is not recursive. Raises ValueError for a + recursive query shape that is not supported by this project extension. + """ + if not is_recursive_query(query): + return None + + split = _split_cte(query) + if split is None: + raise ValueError("Only single WITH RECURSIVE cte AS (...) SELECT ... queries are supported.") + + cte_name, cte_cols, cte_body, outer_select = split + anchor_sql, recursive_sql = _split_union_all(cte_body) + pk_by_table = _pk_lookup(pks, private_relations) + pk_by_table = _fill_missing_pk_fallbacks(pk_by_table, private_relations, anchor_sql, recursive_sql) + if not pk_by_table: + raise ValueError( + "No primary key was found or inferred for the private recursive relation. " + "Either declare a DB primary key or use edge columns such as src/dst in the recursive SQL." + ) + + anchor_aliases = _find_private_aliases(anchor_sql, pk_by_table, cte_name, 0) + recursive_aliases = _find_private_aliases(recursive_sql, pk_by_table, cte_name, 0) + if not anchor_aliases or not recursive_aliases: + raise ValueError("Could not find private table references in both anchor and recursive CTE terms.") + if len(anchor_aliases) != 1 or len(recursive_aliases) != 1: + raise ValueError("Bounded recursion currently supports one private table reference per anchor/recursive term.") + + anchor_edge = anchor_aliases[0] + recursive_edge = recursive_aliases[0] + recursive_cte_alias, depth_col = _depth_alias_and_column(recursive_sql, cte_cols, cte_name) + if not recursive_cte_alias or not depth_col: + raise ValueError("Could not infer recursive alias/depth column. Use a pattern such as r.depth + 1.") + + # One id column per possible hop. This preserves the full path contribution: + # depth 1 -> id0, depth 2 -> id0,id1, ..., depth B -> id0..id(B-1). + id_names = [f"id{i}" for i in range(int(recursion_bound))] + anchor_targets = [ + f"concat('id0', {_qualified_pk_expr(anchor_edge.alias, anchor_edge.pk)}) AS id0", + *[f"NULL AS {name}" for name in id_names[1:]], + ] + recursive_targets = [f"{recursive_cte_alias}.id0 AS id0"] + current_id_expr = f"concat('id', {recursive_cte_alias}.{depth_col}, {_qualified_pk_expr(recursive_edge.alias, recursive_edge.pk)})" + for i, name in enumerate(id_names[1:], start=1): + recursive_targets.append( + f"CASE WHEN {recursive_cte_alias}.{depth_col} = {i} " + f"THEN {current_id_expr} ELSE {recursive_cte_alias}.{name} END AS {name}" + ) + + bounded_recursive_sql = _apply_recursion_bound(recursive_sql, cte_cols, cte_name, recursion_bound) + anchor_sql = _insert_targets(anchor_sql, anchor_targets) + bounded_recursive_sql = _insert_targets(bounded_recursive_sql, recursive_targets) + + cte_column_suffix = "" + if cte_cols: + cte_column_suffix = "(" + ", ".join(list(cte_cols) + id_names) + ")" + + row_input_select = _outer_select_to_row_input( + outer_select, + cte_name, + [IdColumn(table=anchor_edge.table, alias=cte_name, pk="", name=name) for name in id_names], + ) + return ( + f"WITH RECURSIVE {cte_name}{cte_column_suffix} AS (\n" + f" {anchor_sql}\n" + f" UNION ALL\n" + f" {bounded_recursive_sql}\n" + f")\n{row_input_select}" + ) diff --git a/src/util.py b/src/util.py index a4a7e4b..ceca915 100644 --- a/src/util.py +++ b/src/util.py @@ -7,37 +7,26 @@ # test the connection to pgsql def pg_test(dbsetting): - """ Connect to the PostgreSQL database server """ + """Connect to the PostgreSQL database server and return True/False.""" + conn = None try: - # read connection parameters params = dbsetting - print("testing database connection ") - # connect to the PostgreSQL server conn = psql.connect(**params) - - # create a cursor cur = conn.cursor() - - # execute a statement cur.execute("select 1") - # get the result - res = cur.fetchall() - - - # close the communication with the PostgreSQL + cur.fetchall() cur.close() print("connection ok ") + return True except (Exception, psql.DatabaseError) as error: print(error) - return False finally: if conn is not None: conn.close() - return True # read the configuration file From 570bfa867ad5ceda5c862adf9b3d3680623ca80d Mon Sep 17 00:00:00 2001 From: PavithaDissanayake Date: Sun, 10 May 2026 19:40:23 +0530 Subject: [PATCH 5/9] Add UI for testing using streamlit --- app.py | 207 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 208 insertions(+) create mode 100644 app.py diff --git a/app.py b/app.py new file mode 100644 index 0000000..cd3943a --- /dev/null +++ b/app.py @@ -0,0 +1,207 @@ +import streamlit as st +import subprocess +import configparser +import os +import ast +import pandas as pd +import matplotlib.pyplot as plt +import sys + +st.set_page_config(layout="wide", page_title="DOP-SQL Interface") + +# --- Header --- +st.title("DOP-SQL: Differentially Private SQL System") +st.markdown("A General-purpose, High-utility, and Extensible Private SQL System") + +# --- Layout: Two columns for Inputs --- +col1, col2 = st.columns([1.2, 1]) + +with col1: + st.subheader("1. Input Query") + default_query = """select count(*) from supplier, lineitem, orders, customer, nation where supplier.S_SUPPKEY=lineitem.L_SUPPKEY and lineitem.L_ORDERKEY=orders.O_ORDERKEY and orders.O_CUSTKEY=customer.C_CUSTKEY and customer.C_NATIONKEY=nation.N_NATIONKEY and nation.N_NATIONKEY=supplier.S_NATIONKEY; +""" + + query = st.text_area("SQL Query", default_query, height=250) + relations_input = st.text_input( + "Primary Private Relations (comma-separated)", "customer" + ) + +with col2: + st.subheader("2. Parameter Configuration") + + st.markdown("**Global Parameters**") + col2a, col2b = st.columns(2) + with col2a: + epsilon = st.number_input( + "Privacy Budget (ε)", min_value=0.01, max_value=10.0, value=1.0, step=0.1 + ) + with col2b: + beta = st.number_input( + "Error Probability (β)", + min_value=0.001, + max_value=1.0, + value=0.1, + step=0.01, + ) + + p_num = st.number_input("Processor Count (Parallelism)", min_value=1, value=5) + recursion_bound = st.number_input("Recursion Bound", min_value=1, value=3) + with st.expander("Advanced Algorithm Parameters", expanded=False): + c1, c2 = st.columns(2) + with c1: + fast_global_sensitivity = st.number_input( + "FastSJA global_sensitivity", min_value=0, value=1000000 + ) + fast_approximate_factor = st.number_input( + "FastSJA approximate_factor", min_value=0.0, value=0.0, format="%f" + ) + delta = st.number_input( + "Relaxation (δ) [MultiQ]", min_value=0.0, value=0.000001, format="%f" + ) + with c2: + max_upper_bound = st.number_input( + "MaxSJA upper_bound", min_value=1, value=200 + ) + error_level = st.number_input( + "Error Level [MaxSJA]", min_value=0.01, value=0.1 + ) + +# --- Execution --- +st.divider() + +if st.button("Execute DP-SQL Query", type="primary"): + with st.spinner("Rewriting query and applying DP mechanisms..."): + + # 1. Prepare temporary files for main.py execution + with open("ui_test_query.txt", "w") as f: + f.write(query) + + with open("ui_test_relations.txt", "w") as f: + f.write("\n".join([r.strip() for r in relations_input.split(",")])) + + # 2. Generate configuration file dynamically + config = configparser.ConfigParser() + config.read("config/parameter.config") # Load base config to keep DB defaults + + if not config.has_section("global"): + config.add_section("global") + config.set("global", "epsilon", str(epsilon)) + config.set("global", "beta", str(beta)) + config.set("global", "processor_num", str(p_num)) + config.set("global", "recursion_bound", str(recursion_bound)) + + if not config.has_section("FastSJA"): + config.add_section("FastSJA") + # Use values from UI (defaults provided above) + config.set("FastSJA", "global_sensitivity", str(fast_global_sensitivity)) + config.set("FastSJA", "approximate_factor", str(fast_approximate_factor)) + + if not config.has_section("MultiQ"): + config.add_section("MultiQ") + config.set("MultiQ", "delta", str(delta)) + + if not config.has_section("MaxSJA"): + config.add_section("MaxSJA") + config.set("MaxSJA", "error_level", str(error_level)) + config.set("MaxSJA", "upper_bound", str(max_upper_bound)) + + with open("ui_parameter.config", "w") as f: + config.write(f) + + # 3. Call the existing backend (main.py) + cmd = [ + sys.executable, + "main.py", + "--d", + "config/database.ini", + "--q", + "ui_test_query.txt", + "--r", + "ui_test_relations.txt", + "--c", + "ui_parameter.config", + "--o", + "ui_out.txt", + "--debug", + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + st.error("Execution Failed. Check database connection and syntax.") + st.code(result.stderr) + else: + # 4. Parse the output text file generated by main.py + with open("ui_out.txt", "r") as f: + output_content = f.read() + + st.success("Query Evaluated Successfully!") + + # Extract metrics directly from the text file output + out_lines = output_content.split("\n") + q_type = ( + out_lines[0].replace("Query type:", "").strip() + if len(out_lines) > 0 + else "Unknown" + ) + + true_res_str = "" + noise_res_str = "" + + for line in out_lines: + if line.startswith("true result:"): + true_res_str = line.split("true result:")[1].strip() + if line.startswith("noise result:"): + noise_res_str = line.split("noise result:")[1].strip() + + # --- Visualization (Mirroring Fig 2c of the paper) --- + st.subheader(f"3. Results Overview (Mechanism: {q_type})") + + try: + # Try to parse python lists if output is group-by (e.g. [(val1, grp1), (val2, grp2)]) + true_vals = ast.literal_eval(true_res_str) + noise_vals = ast.literal_eval(noise_res_str) + + if ( + isinstance(true_vals, list) + and len(true_vals) > 0 + and isinstance(true_vals[0], tuple) + ): + # Group by query + df = pd.DataFrame( + { + "Group": [str(x[1]) for x in true_vals], + "True Result": [float(x[0]) for x in true_vals], + "Privatized Result": [float(x[0]) for x in noise_vals], + } + ).set_index("Group") + + st.bar_chart(df) + else: + # Single Aggregate + st.metric( + "Privatized Output", + f"{float(noise_vals):,.4f}", + delta=f"True: {float(true_vals):,.4f}", + delta_color="off", + ) + + except Exception as e: + # Fallback if output parsing fails (shows raw logs) + st.warning( + "Could not parse result for Bar Chart visualization. Displaying raw output." + ) + st.text(output_content) + + with st.expander("View Debug Logs / Rewritten Query"): + st.code(output_content) + + # Cleanup temp files + for temp_file in [ + "ui_test_query.txt", + "ui_test_relations.txt", + "ui_parameter.config", + "ui_out.txt", + ]: + if os.path.exists(temp_file): + os.remove(temp_file) diff --git a/requirements.txt b/requirements.txt index aadff3f..2c7a3e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ pyparsing==3.3.2 python-dateutil==2.9.0.post0 setuptools==82.0.1 six==1.17.0 +streamlit==1.57.0 \ No newline at end of file From 1da5fb03cf8bd518ef0ed60475378743b1d01bea Mon Sep 17 00:00:00 2001 From: psychlone77 Date: Sun, 10 May 2026 19:40:54 +0530 Subject: [PATCH 6/9] clean up pycache folder in repo --- config/parameter.config | 1 + src/__pycache__/parser.cpython-310.pyc | Bin 13861 -> 0 bytes src/__pycache__/process.cpython-310.pyc | Bin 8021 -> 0 bytes src/__pycache__/util.cpython-310.pyc | Bin 2646 -> 0 bytes .../__pycache__/FastSJA.cpython-310.pyc | Bin 5404 -> 0 bytes .../__pycache__/MaxSJA1.cpython-310.pyc | Bin 3613 -> 0 bytes .../__pycache__/MaxSJA2.cpython-310.pyc | Bin 5420 -> 0 bytes .../__pycache__/MultiSJA.cpython-310.pyc | Bin 6276 -> 0 bytes .../__pycache__/MultiSJF.cpython-310.pyc | Bin 3642 -> 0 bytes .../__pycache__/OptSJA.cpython-310.pyc | Bin 4213 -> 0 bytes src/algorithm/__pycache__/R2T.cpython-310.pyc | Bin 5396 -> 0 bytes .../__pycache__/ShiftedInverse1.cpython-310.pyc | Bin 3629 -> 0 bytes .../__pycache__/ShiftedInverse2.cpython-310.pyc | Bin 5436 -> 0 bytes 13 files changed, 1 insertion(+) delete mode 100644 src/__pycache__/parser.cpython-310.pyc delete mode 100644 src/__pycache__/process.cpython-310.pyc delete mode 100644 src/__pycache__/util.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/FastSJA.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/MaxSJA1.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/MaxSJA2.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/MultiSJA.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/MultiSJF.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/OptSJA.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/R2T.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/ShiftedInverse1.cpython-310.pyc delete mode 100644 src/algorithm/__pycache__/ShiftedInverse2.cpython-310.pyc diff --git a/config/parameter.config b/config/parameter.config index 14cc97f..8ae66bd 100644 --- a/config/parameter.config +++ b/config/parameter.config @@ -2,6 +2,7 @@ epsilon=1 beta=0.1 processor_num=5 +recursion_bound=3 [FastSJA] global_sensitivity=1000000 approximate_factor=0 diff --git a/src/__pycache__/parser.cpython-310.pyc b/src/__pycache__/parser.cpython-310.pyc deleted file mode 100644 index 041cc6e2e1a7f8bfc19443af4b673136a27d9206..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13861 zcmb_jTa0AKS?+V~)0gR)ot>S%_|{I0*mjoPwd2HzvoT)pC2?XVX2-FUCZ^Nd=k#p% z_HF8%UhfWPh?C7lesJOh2;uTDEAhYr;)#dw5)_38#0wA!gpS|=AtgkS5M-5*tYXaf z{dM{}Gcib<(W$QbtLm>hRsX$e`U?yB44w<$TJ}Hx@l57lS(tuo6rRExt{Rz)pYe@O zrfXCUgL$)KcCD%HD_c#knyv=_t}i^1$H}s z)2bE#$@zIe@+qX~7yRO9Gt~vuEchkVl+v0KAdCJIAWJD^5s(A^G9b$-WC@Ui{t6%~ zDdYekhx}DQR#V6_Acy@UfE-C72LUr;%tEPH2m+|jk zm$&l3_qbveBRRMcM75}Xxf9gtQKVXzhfx5qRSzTlc-LBCE9$Edc_-)%yOQyP%^@3k zXM2qxM0JD6rXLf9r|^c4A_+28!_QPr-|$T=kL6pwjbGcxf@4aqpGC>_JwJ!vtnaL6 zD*2@NOn1;}HCoY!`mJ6A8##WMC;oGI!&M|3nK)C^sUdmO&#Y%|8*3FSaa3#LN|e~0 zpow&ISnu365<3h!&D$BZh|YIUocd@OsPI&~9`<`TPMr_Jt5JV&>eAU$=bu~uu`a3k?*wN|ec)oKNf?jmsw*T@@Nhojnk`mQhVyVx@>a`bpXf8A!ccUrI<0G2QH9*!-x z+F6bZw+#9E2W3uNnLm#X#<`mY#yO1dFYOxP>*EnhGb7xJJ>Pyj!_^4i6Wiefu>~B9 z+l%qy&dRNfk%<@Ehw!Bt^=;%p1Y*VV?5lRHrkt`9KX03Zwyom}JGT0P7jVxZht z07pY$QsmI@F{vC;B={r`DU!BJ9NoqOylN9g;?@U)py#WsG<~7o83q@W>Z>F>=mQ-Y zx6=xv#6mI2ugjXPN8KnXY^eTlaIRAyhC#Bh4z_(J3Y2_=q7tZj=lZ><)f)y$7GjR_ zt!l4dZ4DB4IPkINNscm&U4p8ES#)kBZa3&&4pd@uNR+PLv+5$J zjQy@@OkvfI`hGvjaV;nk!)l=w)I8Oed5rNwy`QAFvSmNX%Dk*EU^q$y_!((>9An-nAjET;%y{ z8zqC#qM5gB$T3JVGjDjPSu$1)^A+=rylooGfGwpEVEGsO2i_^R@zA1VZLQ2mzH|y} z4Q}x0aggDLoWmRbH4?}@$h_EWL*BJ5aP)yV)237aM0ow^xYkAO!;n;{i7k;;L8ahP+^a{J|(@yBICBA>*J) z8_`nx03_k^XN-!q6^}WFd<~g!H3$Y^y^e66=+<(NTnRNY?l9EW2+NfN$6+6u=-cF?CpTuhU*VWIlNbNfzUiC1Fzlt|}8xq?onM+2= zEP+5hyi<9zWVs;Pt$Sw3ZuV35C%zv6Ikpl62qEwJW^C4AJ+!Gx^4nHy+L<;8s%bn9 z6Qh>dETGQZr|u~5Sc2yh5lFJ_+%$X(#PJ^HefucJsi7WZGO4xI$0E0#jl6cQo%c=O zd5tr$*aM?1a0YD)b101GP}qEyIQX&-M#8v%Y&VTL423;0blJP&ZnegA1tP8c6T)lt zntjz3!Abh<1%VGK54A&e(da7`gac9@D0k4QL)fP@Ij_IQ_}BEtk}QQlZ<|&3a=+h! z0JlM8VWpVlsUEfL*3?PDOA0Svf$~g6y44AKLW(IhKMf3|Tp`4yI4Y7DVT-PvuU+`q zbC=X9;JLkv?raFAZek-D%u!ylq={#uSYluZk0Z%DwC$Gl3t|Kr8Y?KR81@^GtJbQ{ zzqxha9>h4oSMg{!QUug$NxoKt%`k+$T}z6!8hK3Dr~?>LaoDOOGZL0i?p^gx1`sn|GEC%zT>c2KI9& zZzX&8?3mC)+D(|#Ro?b6X^TdJ?kdpsJU^xd}k$A*Mq7kvH-hp1e8J zFr`syXCWyKEbdU3WJ}-`0`izhU<2(vyt&AR!vcfAw_lStmCMFviYM*Kje1US6$sge zX#g22SYn%f_vz=NUo_K?vn!B<@C&J2EAEdem)V5pN+#L5IN8>K+fSK^-8^2Q2z_K% zCHDLH*@vH&TKrDx$Kq0PJ#QQ80)EufOwJ=&t1Jj%B>70T@L3qeqVjb+GJ?9uIshYJ|<_Fh{$_}xqZdllc@bF-jH-AV$LoW6`?cE zHEU^~2qj)eMn@ZBV<$tG_k0EpvAEiC29)^@&`aE2!0W+}En5BaTIvZYTTYEbW%2Uo)OH=`|z8sTU}D*YSocNEk$U$=yaYVkdJe^JR0>gd`_BwP78EJoFNaX-)cexPQEmnirO@zM^0e*!T& z(V&P~an+zn#S^8SJohQWN`D8qEP3ACS{@Izx9+4zKAQgC zMWnJ7Yx8W4$f^iwM1=Ba<-sUMFz_`q72ELBe1{(1=DnMXejW>1*!rlDG5sytu{2Ys zXuRBSjzBXUGqPy+HCwea^#T?nFE97+iV3LKTKRWVQ|uJn<8RGieg_V zsu47RQeLYRl04~3oV*|@=-(P#yGY5-y~wr}7)dL3cO5L%+emWf`ki67cPVJ94|CjS znNXlt3iHbS7l;AWj`7e_J%?g-aW0Ch0yc8fSvXRQC+mxbj9B5RPoVL3J4q*WP? z2gpc~aFhho>OBU$AV)$tvZ!t~uFS{SNOvNfwNNDskDeKpUIjE|X)-pfAEYMPyJD_g zgmAOz`X^2kW`j>B1;KaC>ebvhC@{Zi)SY+m>L|1+cJsl>7w^sW7CMY2jZ6!+@*qpI ztqqfGR_d5ap}E>*@qo2xZI2X=@!6kRB(c%Z%b;oM>!y{Rs*!aR6gl|7+WbZBR)t zCPBL#_7x8wXn6s;WBPV#ZRwJeT3aZ=+H$9?E!%g;?LG9?CAQD$64V9T=fpb8YZ!RU zlQNDbs8;9{##WqdnrAYbj__V~O$&S)K3j!L7cO47bYbn>h4r#}9uW0WCbY@HploM#pky6AlpC}$MDhO^SfTjg zPhrP?xSfsYHzCf{wvmIg>|6Ankatwc_ur+C8(|jPcLbg{O#h>S4h){tft`_+^4p-2H+K15z zVK~O%Y9Dz{wTQLNWQ_?$wR(gJbxP%cwrCziKFO0cM#hpbfi9>UY`%r0>O8H2`c)`+ zFiX{JJc9R8yzj*OSk+W0E0h(=>M@!;A&%VUm?&A)N@*NIUm-(X26iYDa~0iX%j=jwA4eJ+xRV{BWtVd8|ve8aZ3?w@1Kxv9*=R2ZeFTowL2 z564Unhb_o1ZIoLy%E7ZR$6q&98OO)2Z^9>qnZ9G)fP;PwF$eHdEHPB zTg2W`aO@av%)=ww*JeD8b9cIjZvc{wa}tYjUb9AqJY=hmmV;h5a4m7-g@eTj9y@I2*BQOqE;)9S( z3%5XV;o<2qBO9wRv*$+G* zL{d4Xn#^x7d6|j$w^x{>qEHniRp%^+Q>O_K*Vj=zmegU&Uni6%r1}(-SD4JH4$&I# zWW{4l-o@k;lg~1txU3wScaX0Fnks5(p@ zM^ep?wM014LW$8zj7Bv#u2b(}hfg#43=+8XsxhZ^B(8a$nq~(rT#Jr1Pugw#Eg276 ztLEEqI=_PRc!rsH>V6ENRd=k1if$g~^9)3-AYDPY)iqZoXQOu!ZQ0MWR*d835#w#f z67(3c@N|FKFyDm6vPL=gr=@HW<1my5w;q_&Q`&i(^6e|xdEb7> z&p*IKwqsu}{~DUSfj6Y>=~3x=Ua5Fm5#GQ%^Uy_o3a{!TfhS)=lqbEVMq1rEBd*9O z^rMg11H?PwzmI|RLhQB4sg+0r98}SM(u{n8IS~XR1LVUwYvE>)O^NxpGkK5+ zJ63b!D~GDfG7dII7ZK;yYjl1!4w3ya*N%Z&m$zC#(X_wH7LFo*g3AH-&#j}T<8kNR zaR@ab<1cY~5_y;;ON70KWM{NxLcvxsE6@gNW}Z3v6=K=?lu#ccgX5S$V~C@5T>Bc+ zEJFYcOvxRm$-OY?D6uP4cI&MkjvVXdD?taBGW5ML5IaHz<-V}MC>DULU1``t(>>-s zewgPh&*2r8)Sf#xTT{59H*>%9+HU~C9U=hL${kW|v+qr@q}DOTegy^67bx)qw`uz9mEtT3QT>T#R+MDfyK`;5#6KNeu}iIL8b^uXoq6)o1~ppWbzQ_OCZ=~ zW5vuvoUif(?T1_UOoGPgq3e|3-F9g+<|F_XwJ%vtN2S&*ECKn5B#QaaIVi3bICOh-K!i$^B`OsLJOzdL&P-F-B_nXRZMiF*VOISOVbU zqjT#pMSSX)IM@V_RwTLOZllCI_oXtK<>*?; zL^eIh3GFAF{tXjSucO7*q3JR9CpIqaIlK}w);A9sqDyaepX*4%T!v%-Vcs`0`CX=% z>doSrY|3w<;yWW`a-d=iK58ciCcNyI-f9%3lPVD%`C!42s6MXB>PyS>-DiT#6a`~(a2y3WKGEs;Cu;K37jqUew3YV}6@x zwf&GQ%K#1U%j!SsKpoQIn(R$ywA#42LT6>**hOcj()r_-l2~iJ(F)-KD$L^Y%n1Yp zQDSgVuF9OyC80~#$L%F`Li*=%MhYbk0c5MkqXPbWK!=U?#`@wM))#kSeKE!QL{ywR z8(xqT;ZkZ_;y({C<|0J&^%%;&y#&loj8Oee9Uhu0`_5HI-4whBHk;tCWoT_feNiA2 zd(+kY^}Mz^&tvV>I+DuD9KWgGVih-7pPPy@xX9w8Og_LwdQ(PC>~`&|{vKh!%j9>M zh`9Y4a=R^7QR`yLzl!?AxH?Z93(;>%R=-QzK8f{41!`IOv{LZiZBhs0243{w4$a!T3wl4Q`K3IxtTr8 zjMWi_JqCv1L>Mj%cXlt^aO2XSpg-YmU9p~kg$waJCo}6cRfs}X+>`f}H*cQDcg{W4 zFO~8Nu8$tM?4SRNqWqH@lb3KdM~8+VtOy7_cGtDaH(5yE8S&xxm$Is-4%DGTXSpORd=Cq_=9NI{8hA9g9kDG5ZY`03A9h}_*q}wQtIm;67R0m zHCb+KN3m!$<8BaN>-$pQeDS5aD)mMb<7N#SA_@f6Fe_+u>$c48p=ZStNXfd z;BEw_Z~E39G(h#C32OKXsL9k~YBP11LWQ=JWAzH&rVn-_sqcVTQS(~RZOA-0iw}+i5y_mt z9R+dR?zLn|1n*;484HW;-e52GL=f$Tu`~lA`a%|Bu@_{0#a_Q1WvydN8c`7Lh-K_t zREew*sS#NPIab62xc%$NjUPgY(MG2c^?SEAE(XzV+#hVbwYhQe>eidDZ$zTGF%bP` z5Jl$(x1{5F?Or?fJX$CsVyUygyn4Gj;_12UfhMeRbWxpT$IPWwVz93R26warVz5pO z6BP{94N=C8_bk?^b^_#B6=$eUpOBCTQA@OHl2K>T8<7-i>g`kGJ(2QFPHWO$vWx1p zKibHD|utP9^jhjR6yt$rq$!SLzN)k8I2HWZv_-eSZ(e=E}|qxOwy>j*z(9EX)q#nD68483W-^ttZQRQW#)fuim%c-bs~=w`E=8! z{Zok&u{v6@#NX6J-_$iSX~OaQO&Y>5r1kv)TQV6+cIl{)ov^xGM&;AhrNe3fC9p^d zeElw>xLBj!Lpv3}Ln^@HH?D{gBkP*@9#$96gVgil0@Z1&;zc5aVB!*p3u9q7hpn=l z1Z`|WKnd=T*PSPkSMW$gHbC-NQ`gno<>brHjl^KgieVwR(ypjPgcS}U8R!gLQitj40-q#yU%N80#Dk^fF1z3-riqJ18uO zU!wx(LY`$;6NNZA!|_o$25fZFaO z)Z0Xvy0R!wq^O%>#`!7GB?YB33M=z)I3V%s-(fz$N%k z`!3RVbm;3ihxt@dSV^ELB~}z?R#B30dn6x3%hTE+eu~c#FM!m`i8a1M9h8ii@UxrL z9LJR&nlp-Wey*a-5B^8HnWrtk$J%^Uo79%FTBCH#Cu4m%{~25iZS!}u^Q%Otb)Wck zbkY@30QFhiOOj@DEp{_D+)OTY)KKFlA$fLDJwx`0z$^7LZ1Rw%HTE+A0;Gmo&??Vp z6CLH-vSg7h-ogq=4uc?5+DV!xl({6G*!Ck?P5O4)J)Hm2gjx;C^=X+uJ5VOP+rrQs-fbCNt;Q>d zwUKvQ9ecO6&*j|~hUjk-r{_30@go|T0ZPKRW0Z)5H0{9h;Q53LOM*}0{5&5v86QS} z##$-k#~DbZEC4O~MdjV6_cfe2k;Wisx}l0E>3(P?eMiV7p@tynSMYQ#dIn+qbMRWe z_KdQxUN7-%JDa^vAZ88W2YH3`Ia=jzL}?QbrWdRkTwmlpcm5oE#a#ec4y~j_e{Wp*V||@AL|B zsZ;JOA=u;@qX?+LNiQCwJmfq2ety5OU&Ja1NQe2Mv!k&-DxDhIg>lSlcqhSdwb=-?CtM!_O{kJF}*jWy$~|rGd=eH6&xN3uNlTeoHXOvw5`XQL>r|=RS7i6cUMd ziI83n#%{BKH{q}k*kY{B3qcXj%qG&?)n zjnAc1@%--dZf$yzB+rKgJNA?OSDNJPDYQ-!_w=oa2c;-M5YkD9rtih=FhOz7Mm|BK zmx#PdgpT_hlP4-hESliyCCi$iuW5z zswP&XYN;jkP1RP*=v!(<-9W#j+iLZhFx$GJ)=qeTS+A&dwQ(Z)H}s9u)ic&#)9ZRk zFYC1val&x}-wki)t*_t{+v#xMepnX!gV4vZ44#OTvAh7*X9PVTYdZp>T*7UaG- z{uO5n#6Q33Se4Y@|HxFlNM0s;tP(0d;$|DxgAJ7+h7@o3{+vIDwevlD*Y?D_7we;o zbq*gmgKgM&&N_1bUWV`sJ#?8XGk2h~N2PTmqj!J%Nv3j_Kgq4m0L-7X$>#4kZQZyS zU40qWhsR}+&85{7GW5ZG<&EX?S+?`($A4p;rcY*OTsR-k?BT>W&iD9qcw}s8d_%v` z!y~7?+}Cb+kWVH)8BR*8eRKQ8P|qAL@ylE1K5ZLQnojDQ>WS5kobu@`H~G~1m@NAU zW_@I|^@*mvCPD&3^Uojleus!`?`dw!;<&e^?K4--dXGQqZ9PKVJ!^)&BR8IOXUD!V z8x1JxCe^@)XkrlyDOMvHwIV5G+!mW$@;25ak9quBej&aV-^i3^?bnj8aI=YHzBQ|X zMeIX?g^P!ZEWsO!?DrTrcEUm_@rqFijj5o-si^oFH+L|KXoQlAx7cT`MN%b--$tRK z7q{7cc6hUjcA1M6O0OatDEvS3PQ3FK)wZE#es>p%qNIKP{vs<%m+hBFh04Z7D1pyE zSY-R-iSDF^JoT~Bx#}++xxN7@_@Xu^(qtjiM+ z#sOpiObUoOlq^7+;%ozu{sfSw0a96|Z$O&9OwS?3C_pN9LRdiM{{pcQK=esKE+A&^ zAb|gY7!jV7h^T1aMkD?kRzJYUYgke0NM!<*2}#8(ehdG%TLG~OXrVNt`Y%^STkHgX zqfW9j|Hbb1C)*D{&4%UVXj%-2LD|QTfAeb)vgQ`3I^5r+aE@E{5`#=?v4`#vUmf4i1i z_a|fPhUIkjU`qDL_$Ej<(r!@ukvnI`PoUx5`;&5*PwWE>F3#W@LaYCvif68Afu>S^$PL40cn6h zRxt@ERv`?PsArJTW<}Vb_DC^1A~+xJfh-a`&0RD;t<%y^LpauR*OzV(fUy4tAO%wI zpW&<}zQ;l0R`8pMx#^NcIT%dyaWNRoZ+5%AS{Hj4zvAw=7|%DZ*2Aq`_rjU);4C11 zi@FUjN(d-oXKH(#re)C(99@ov8i~|g zUX48g1q7IM8K8@9nw4gmeK+0pH*{AniU0*#WLFdg3It8v?_5e_d6JU2_n!MY_rY_% za}Lvby{h5)*AwsZ_KTYKSE?*NCMs8vvOgl@8fOE|FH7b#Ro8u8m4NWjho#1MB`Ru^u2!l$nYE7=9N!0e~CNXMcL$4UPHOe zJzhuI;tjrp@(f?&XL$P)=C}D-zJivs(F$MP)5V8TJ7WD+-sJ4p`nq<+uqNA5sk8ni zPN8jfSYiDv9E>9nPB-A`czY0ascZ4kQMrngwUI>{9T!JsT*qPtH@38nS-3wP$#@ul z6p1dwsQE)v4W#Qx*EzQ7^ z6&kaS``gpA?@LFzWj$wWI2q{b9z%QW;V5whYk9{S-84>(gq75+E1)pWUR5jZY2q%| z%bDnz&5`wooU$I)Q+r&;%!liV;e7Obh0l#mLC61+vQDEo(~D$~Zug_Ej1QvVfUdfz z+`JXY!x4y)h=<`ID_j|lgE-;Q{i50%q}$;j7>4%?s~m6dgk716qMFHc6!0<4Fv9Ju zux2!77quu9gTsJ^pd;K)g)BTZVYWy|FU%|&>$I7qV4qBh{=UWkfid$5hKBVocpOl9Iz$@pWQtLJ2ZBfhdPsPYW56 zj%1+SK)Wf;8``hVPpsVH&}`D7iJjYMYfdV8W#Z&cZX7UiIX817cOW|^*I(Br=KiJq zwwfDGEQ#4iY~uE`Nmbgp%Nc3nqy}on6a3zqc;MiHzn6PCn^ums15JDYDOGLBl4MgS z*N>sWxlXNEOUGDUt*2v-Hk&ifwcN8b$Z73$DBcw~ky&5)37;s?z-DydM*tKjPq z-~G3x4!TOTG(go<>-VH9jUyITNy3GN8|>7IHc#&*_d>ygEK0H%q8&?!M3hL-QC8Ht zX_Ao0r%6_{!d_2Ay^w090%V03C!?_pM3jvOvM>j6CJSq4kcO!2MTc2ohoe!H@WRN( z!(v%PP>B6;Bo1d&I(2_-zS`m*0v>mLJLV^4G9LOh5v77hB8YkB+h_qnYbH@)psCPf z?pI_q98tRy4Wc1fXGQhlA^hcUoS~@92A-0|!o3rP{2e;p^SG>R6PZ~z_1Viz^*V+J zjWChu)j!t%<5Z~b=}p6do~8gja)m{p*t+c*&xL&Dxq%-`l^%8B0*=T*0DCVkF~I}KsyVx zyFgbL=$iCoT^cn_HqOH~bMsRpT!P+OvMHC381%FP{kNvA9`2DehFg=0Z01b${Q`Kv za%T+~f!&^ghS|7(H@EW2x;Aa0M%r89I;_s}iZ-cm3%YDWFWX02?zYvcIYW7t#y~jx1vxEed}6@baVr! z+hEnvKjZi8{@-W}czAmz_`h1#fTQ-Lruu3?;Cfz%7C4(T*Rnvob-yydtnmxB2kzC zFDYD_-K311EkXJqT9lC|B}ml)J)3TLe?PNp|OO<1&a&T2LQ)nS#3jN^$;js zQCxxlihvj7yE}0LBY@4FMi_5#r}w}n6UfZfCNL8yV-WIs@JhIo5$?m8 zxbU@IXr+_8K&2-!ztLaHE4jnjr#ie5nr$BA#@g_qfF~wO=ykt|QPsT04H^Y3^N{LD zjT7E$;H?vHtg}2>%Dw4w?kXIvgIZElW4g@29ryjg{%zt43_O$9;Zrq^3^l@@wz)$5 z+@792p;k_))fvUOg?Q$-;IY|eV>$rcM8VAWMBPMPtEPnZ$YXd=FQLVK4OAA~7;XF|%(20W7}5f`cT z60*YTjt0?v-@2{}_arFu-3&3BVp3RT;Q)SAEL-Tws4&w6KCOkwHqFFU;`bOb-#rLL z93ot%V2pFWMIL1+0=SKt6d^1@rcl^#68~-*?d-(eSh<$QNAbvSP&1q-glqu4VL2=k z0sua1Y0(I{*$K?VBWL?V5og@M|-x-WR?WW{9N79Gu+=Fus=a-_j?GW~h zbXpK3A?loieTfYUFUYb9ik5I0gfqT_Voc7vU!`CcMnI0~*2}VzQrI631OZ)XyvQo4 zAhWb!%9fEC>nwlUGX4^ILG~#Wc`wOJ3kIbn8@!?@tb@HtntO-na)j`)9wS=MQgwdG)O&txIA% ze~2dGRR-e*2r?8301%kwEHrXGM5q}eqzvucgoEoy8^M_^ZTw2n#5n}AwsemG8M1n$ zg*8&1j=rhZ?RbUW>lSvDkkE?9!Ct&c87(A!NZB>YXmRla$|%rKRt=woTzEc@zH9^o zW$4Bt@d`$Pb5>NsnPXv%lnX*X7|PqHAiZeJ9F*Blgr^aMXB5#pu+OFmJFQ;FEF*&| z6Zg^CMSA2pxgHW$vOqgJ;?g^)bads*2(tbQA6@N2Z9s`{x$m2_II(gJafpI`KMbL= zLC~>EAW4WzN*RWhAFi{e%rF$|FcX)l^_R#B3*tID6dzKu5$lZDPlDM}G=c02RYO#&qIX`lqb?g<0^Zx*cfd^&) diff --git a/src/algorithm/__pycache__/MaxSJA1.cpython-310.pyc b/src/algorithm/__pycache__/MaxSJA1.cpython-310.pyc deleted file mode 100644 index 191339ebf26c9570ebab567c54a622318caccb21..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3613 zcma)9&u=706|SoOIXz>0ylcl}Y?82Rk~CSZ6NqGyn1y9mNLeiwyAC;w6ph+bJ+rf( znI2cy7(4Ww90WN;f+L)4Bn~Ta;>Lf#nPXi!M0-GjD+q<}Rgb-6vj^I$H&xZI>&JWF z``&XlHy1HH^&1bQSz_#OG?;x{41R!?A3{hbd7m}QU*6<)ESf?J>3qhTj&!AmUsw7v zz^^AmS;4O_BU#07AeZEVtbfLvppZ|@?VHmCTk~xW3OG? z;^pVqo9(noo? z3a?SnWxN4%qq(4qu{a&t3tw*1=RGN?b+PHSk>-4h(18C>QPe!A3D2>xk zho&+d4_ae7x$*N%=~No8Hw*S3NUejzJFjq$O8g}Bo=&|4VR%o-*dvZJ*l(8fY=lf2b|67zHIk{4bqDJDS`%zyoSA_d_^$mP*iD@Xx=wkQK@&W>Uz$Lu8Mm@Ab1G0xavA>KM_cT6-|3G}Tn8Ott!m-oA17Qk~e#Qq0{3v7^3D{Z}E3XI*yH^0mj~;oWnBc?7+jT)$BbVeW`W{1uqH#^?R9U^@Fs~jA!4Ckqy91p@_s2SgDi|x)=u-hHMQAOyv_;F#3Lb) z74mh$*uND4@6ttSPa5r3#sf zvW{h%g^6Fdr867Y=}L=my9yZ5uCn|;tN7IyD|j<2d~UxB zrr*&GL+oBF0@%x*x8y(q#lat>lg#P)(*@}kp+u%X6Q8qRvJSU(N?3&VVW&w2x2fPa zDx!k8_n){+9be6#fpLq7&Q6gIuz5>tQiUBcS5zA;S=85FjP!M~dNOCZ6f2SK_QkKc zLM>u7#Y;6@XuLnOqerJ`AP}f>xU#b|`;Jbaz3C}l`et@N8y4XVhjf9)*ITFJhh;(X z6`o6(j>3~_Knj+en5PTReiBml_!U_RLHQeTAa-XW1to&G{f^;B&rs;)$$;|myE8Eq zZBtp$236nrJ*`SxG}x!FrAMP|h-BVN`m=S%xo*6qZa}k)Q%c?HnGT$<7Z=y{9&AAFSqm%*DUij=SvF1cue+01+)kTfh!L=^Rm31Vyb=$9tTj1V0e+%Ud&D2rSfTF~! z;Nk|@*u}ef{s#IR-2IPwpLQW{eEGZI((~u($dJips5265F7hZx& zCP6KeIE93@yPfc2RQ#8hRBFEdu*X^k1(^nLvz+wQ8zs=h5w343-riS_t7_Dt2Ib^!&WPH z%Uu2eeMZJt+YkVr1v^<7)zNWoYfzi7uNcK1{$kBxGDur3<0h(m_|h|l+k;Gw`{^BW dVooVOWR$f~0dItFhO6OXv=zP{h7In1^t+Y+q7|e>ZlvW~l zso9|;tIPHvr%eh3XpSvVfI9TxKcScY4L#4bhr-963baVm-pbP16jtmEGx2#Z$;MR48B!am$UfR>g8VIm;_6Y*a~Rb1`;ve#|+` zIn}P?3t2A34L0JrkJbvc?mw?RUDAH;A94DPkjvf4c&*&c{S^k;z{=Kg2YKI)8f9pvJOhh^zV_W?ui$}!f5nkq)FO0Uqb48Cws6X+^hYI>%~?YZi6$-}y2n@m^r zj1K@_E+g4yIlDXakr?rj$ORNs8add!(-mJj z`@z20WDl7lgK4@(T>rN5hFPRCQ@R)ShEZmCY#VLz?;C4Bp~kh{FiVsBYqz3oPp5;m zJ8!JrdVllBx7RY&SqppHsfzVZf9>7yVDp_fUcNrKZz}7(IMbc9Ke+i_T9`m`nH@fR zCko};$zZ6julLeU*voDrb0Yb{xsA~#shizcfJ_cMSwZok*ih6^Y=Rp%)=~d-(TZeE zu-*&%TQa=)DoS?xfE}(rb^1cCDU7e8d)RGcoW1CNbJo;iT4`HF*|4XLOY1aVx0i+} zKms}o2ZJb)#?!+=FEXVpRXUQ!$s%o>UX++}GVHet{d%;@%v7wJ+jgu9wc4(k@!~{A z2d1_^jMV+fl+CK~W7(E*N1GBd5La~6ty0mcH5tqbV0lSof~XhuqeNqER;Fz{JE8F> z4aTSCv}5_W!pc+Klu2XjNL=6LuE5I!CqS;o8z|L8g;xa1JV3r7@SNRi|7 zLS4lG=gyNn-^Xh`{}89E*_a=b7xG;(hKcTqU3b?je05xsY?AY*xnP#?`{R;kWR=JfOZn zjncR(1?6-j58#oG#heSkoO3<$@>JTT-2UDDThs9`PR1`rd~*Dvho0=l?gIG>Ijb@~ zw}kaBvu$7)b_V3yy|_D{i`Y~mi?3=7j<-e zFEVA=dfSQ_ulNN0IEnl5VPxvm)=RhBnO0HQHw`LBN~P+gW{zq;2~`p&Qz=_l_*`}( zCz64>h=m$AO`=RKQ`S3-R0_vYVX_^mXKYodRCr%qqI~&YsN!&|2S{^5Da{=DN2uDJ zG{JJhn0&-8sFOzBZrq8j*Eomq02@hr*jj+kP{(OvE^cF)(>=GXM>VUhnN$C5_Xfu4 z3=UKibE|LC0^Cm8%Z#hiPmrQZre2~pask74)io**3>QfOTDJ$AG_`si_1R4%fjbB1 zg2UmrEb0??tXU_lqpo4wYFvC(^D4Z~Yy1javcc!!h!vsU#tBg{5I6wwB zgVTsR9IotEpOSyyV+QSt~ zSGKdsV4xUlRI+s?T#Qr)V$gIp9$E0Fq**d|!IJX5HR&x-sTQIdZ z<0Mq~H*tWSou+T}1pimeR+Yk?94_oIQX5C>RhTTGVrel{$xcr1~k_xsF0kjAd@bHI$*b zhFXADPA(+3_dV`6D42syfZuE~MXays*>SB^t5sAKwjOkLqRw8sn>C%5*v-^?Xlc!z zu}G`S=n0%(_+SUlwUhSboni9E$=!W~-Wk~-;rAkU|EuWQ?ckk0w2vu%e>}l&i9M_m zk~`Yf-m1k`T_OXT#~b=O7?+P3F(KkS(>(E>nuPi2I+z^S{=DsZRL~Wmtght@TDS%# zGhjE@bvg{Iv^H>E^vG?G{XbD&TFj=?Z+aW*9Vq=2r-92Q9V(!aaq-ETi|R)Z0MtZr z4_Evki+g}82bgQBLvRz--b#lcDAfXWd*C-c<8DPdG>#63X0{Jz6Ax6{iLwkBZ2Wh_ zBm|rqmq>+rm!`i%$+MKaMw87<#DU_lwgQ&@_lqH{)c`-SNGx$TjT0hKmQ-2F16w8& zYF!|p25b{mX{X&b?DvN>wy$BFq0H0^G#z=*8FZ|jHQ3*wEhBDsWu^uS67WSJVd0E5 zpyVR3ur5GZ#50777C8w+7m2i>Ms?BkoOu%-P0gFsj^m`jglVD(mq1fsq6l&tz&!*A zp#!6b$&X7#S^H%4WXRUkAd3Kl?SsZx%(LX(9%M5p6e=-=xw-CA>Tw4w*=2xF8ufv zE04?NoV*8l$I2r;Cx06r?Bfk=KbN959Mlr#@W&PGsj7i2t5|gvEUsWtbU4I|R>1%0 znD)CQJK=Y9Luevu(5j?M+tk)RwdJS+@wz;2W9g^X;*Cv+$G`Q9O5(l6w+yAwRK~unltqiXJ{= z&=Q>TOF_Zysi(kysE6SP4>`zO*avrQf9DiX6a_l}qVZ_(Wp?;nbT9zFOmrOfo)(`r z=g)ZP&(KGGNXgGBA%|7Jpv1x@A*EWPgcfWZjEt~w*f*XZ{u% zRx0=j2JbeQgybKg}(N^bIk-x&-uhcJT7CMW6{P7R;{N?pW z5+9q$7SYG86(Kf-*=|r0C0>Hg)B`k~gKd;8WZR7sFVwi1wCI9Yt6!nmbc&0FNPV%G z(*@X^J-!M>GR4xb(F#c06<2+Vj|tlCKH{Qw8@Dvot9+V_C`Wb8hDs>^ElP-csJlpv z2PQDMuWnF<)fWXG1#R7=9No63>a&}h&Y6V$sNFtw+fxU{pE&(g4tvo}I`Ql>k^lr1 c7DPWMcr~~lECzn?N-$Ts5nKs;TzFsq4>vX36aWAK diff --git a/src/algorithm/__pycache__/MultiSJA.cpython-310.pyc b/src/algorithm/__pycache__/MultiSJA.cpython-310.pyc deleted file mode 100644 index 5f7c35c8faf94cf0f4746657105ac31343066f64..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6276 zcmb7I|8E>gR`05=?&jRc(vqi6UA|s)it*a?Wy); z#xv71)isF|RZAc*h+ohh5;_SG()bt9{)U7Q+MnPjbU(l!KpKG$ zGDc~eLxjPbYZw|4#+baU2NF78G8;oaO< zv7V@5o2aLM9F<%6(gqUkXtvLKoc>$RhwSc}tB4^FTCJUKKW?>he-x>Bs6TX62@N*+ zBVi`Qc0r8@F@-1V5*GlIep3Q9zLt@briHSWs*sLz%xhlsVlkU##wgG zI|nRdr&_t;(%lv6ykT38VO3|#Src~^gN$dCbnxN#4>#8YEENr9G6+kPVRt7{gIo+? z!QAa9oi#USQO>q=);^iO3eYnT-ibzclWrQ59n%_;8e3pB7BInluI52_ZY~QTP)9qL zA7aKGC&EsO7;=u>MK17p_aAiyHWj^o69=tZwSY zfgS!*Gvk>0b?oe{uCZ6HIT0HQqG9a1sbQvdJHtQGUNNSrSxw81J*HkQ${5$T8dyDPUuZ!*#>ZTbN_HLRi0gZ7T*D{EpB-y< zHET$gH4AAN=VFF^md&5^1+ALRnfYQZNE_^zc}V%DX_|R6zv!HEvL@=^&?VE{#TY6! z&8P&*8`vS|bO}48B<;p!qQ~dx!5v^pywlywYu({!0z`>ZfKBd4qftDR zxz`=acn>Ez9_C^rLcO$MyiJV4#@rP zFh-w#JOIR|p}Vo$%LR>^vu@6MAZ&F*fn@hZ_$l9yBYB%7EyKjtF^P);*z>tR7o2DT zu&Durs;~xMW;I?xojzgHz!f~;0b4?w;7vwlY6&>?LXHnT@fE#l;Ax`XuyU7}_>>>k z%EHQPvPHC2NR!_Hs{aD?YZKGnxqfO;puo!zi6L&s#P2fMyfm1vHC8=q$G=GqsWsyrx9T@A-L7( zub~_H#dGj5_-M~F?M}r3_WO5#kioM|J?-}VUP%WB97xNjWnIzLeVk^AUKSUC7ZSed zfTPbJ1Fzj)z1Qf#LoGVCJclx)^K@L}$F*j`hjE2;sXWtV(J?@Cl7Ti)il5l#7j27< z?ayZOu>DHLoz9yAT^CG&S<6`f>KPN5k_mupG%I(yh`OM<@oehbHUZ!87_Dcum21~* zK`&V(`*v2==S&$mThR-8nM#)aPF5wv##h%XL^V~5bTy1yo35I`+N?fZm4Zs+KPReo zOywLnk1R^9p5=3==S_v^hLGH~x?U(K)|wafrIS&xCSuw7JmrAq@u%ZIk{o!wpbI_U z%>w;XG)o}i-3$nOr1NY}m#rP3Kibb`)t76je|k1U(iYe_U+~c?n#Rj!1CIsz>Dd$% zJDaB6N6^+Zq<_uH8j#4wpFXGe&u0O2oilS}wb^blK`|C)cd3}wezDq3c;92rV~tLv z53#zhVC*mSu1MF+k%ckld!T+1)K`129j(Hb=#+xG2CCotGEr&28SLw<3Hr}~{(A3c zO_TPMzB*kun2WYC38FRl6Vu$`?>QVNUEsfuah|VjIX{Py(;qyBWcq&_-+QOH2_Y=U z-=F5N{-<>J>B9Owl~547K1kAd_iDe}w(;M`IOz6w6a)VI%~|2E3U~Dtbq4L~EG6fW zvS?0MCwWI4$@BOA==;D3rlFUV%;uG@dXSEK%WfO7ILP-`fxI6 zVTRpkyAQYN#xSzbWGHugcMnOQlF6QGlFnbHWZf!0^Zl(|^)(`hD4lF0MK}?P$K-E) z5C-uew#p#FOZGyIR)CO-$tqtXb$BcvF&gT| z#?n>XG~hG%2@MDqWUk?e23;HH@$nUn$kOnA4nEi5;)S@oGLvE8pNiOfJU=eaVmu1( z?4eq#+I3Y|h(KF2_cmL%TgvX`VuxO)CfcUCf4h}pi(-#Ndv+JD3A?6t9m$=;ZQF&W zc^8XjuUuzuB?D~9_<@=X+mVjt<|I~6{uxXuodLT>uq?$T6?Td7PsO2n2My2Jll=Vg zJ?s<2B9djeL2KM0HKGs+Uro13_-O%)0nN0x??{0as);$v*bSc_%A_ikM-U;vBLY>?i|jC)?rXepDt) z7rnJ3OVnFe3#-Gz75H)={n-@vm10dyxoD+zk!~SHmY;8<5l%+aL2qu_#LJ4S=MP+X zq^e%nhyTEh5|Kl+Xm@b+?lXFN=>F^B;i1L;XS9!=eF@2f&L92({r>T0=kNZOem}hV zjE-Wl+t22uc$9Yg$xzXLvgn_S?HIe-`VPANs})M^>Zlj)iGVbdrcSp(N zoW%-HPL8S~3H>47AEJH%?zDB)FB5-hv!%Oz8L!naNR-Aodk~f$6ttYZ6?!;ONk3;3 zwLwxmT#8z6!C&G0eajLav>rw}tiByh(zG?CSBn?1uy%hkd~^0>^B?Gx{t6P|t^%%# z3j|<|F96zt|Hfqj!CL^pQHkpO$II0(gAyEG=LD$@z^M-)Rlf?+ z>K>Ah3nPDY^W)$Bx4)SF_;3E~Mw^U0`w_exW8Z(GVw?zw7U4xB!dV~$v}=l;MsE-i zBJ!vVLk zQt^ycxlT9VHh}fUEy(^6$!t1LuON16$7yQc?l$T1?s`9INB#5$3bPJBQ5p4Xka9B8 zhg|(S2s!JV(Cp}H8)O<*5k&uA#ih=YPlP2r-j!f0LZJwBS)-MqSR}a;c0iI z=x(}Jp{ESEKJ_kYYR)!p#Qo6U#QVt3PVU)WVZ~-G1>czaww#956Ixn$*%!QO^TnmU z3^S&$V+`TqFYuh{`A1I(^pHS*x<;?!q_5I7pTELiWhz03=Nv8SC)a#B$r_NZ`Hnlg z*1Kv4RMnxROUa31wq$*Ng&vHHXO_1~FWR3Kp+?B)k$MnyhpiS~tqD!kcPM!u2|~*f z1>gPdwtdf5H>kxz^))J4u=Nmmj-C`NP$sHHZF;V-fqVho7KANc(FBjj9YzB@j=xmn r;QAnulYV>y>N@EaBsGVgQ;L7X4Mb209tU2q8PqE4!D~Uu2QK~(NO(}f diff --git a/src/algorithm/__pycache__/MultiSJF.cpython-310.pyc b/src/algorithm/__pycache__/MultiSJF.cpython-310.pyc deleted file mode 100644 index 2c9522b67d67783a532aa0b104441639374dcdca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3642 zcmb7G&5s;M6|buPo|*k#j5q7VfQ*%if;U1)G16kkiDMK60k>JJ!3B(amCnOGG5hp+babWqq>Ye>?90A?x*H!giy?WoT zim9mv!}E*R+VW?!jQx=&Cm#=!xA2u80!SwLfNj)&zQNsGY{DAel-IIg>|y|9lH*jkt1R&gQzkSC$)KBNiZt5cPKPN8`Aq=rJ~pulfWsV` z9FBm)$|}<#G%E$6EVa264EbqIS1OK|b^4&61<;sJ! zEb_-I*R%4WE=DVNuB}|ZwSN2NN~yXl>7ZArzTO(Hd}ln+{q+xSTpB&LjnzTF)ZJn@ zx^kWlCMjmwlb7#gseC^ljrFC~LD5YI*{`ym(~MXMbOmx zJ&@YwAs^D2M9niW9Av{R*N|?_@`si`IAKPzn@x^f5@K3z0QiZ|eID}`pXT$t!KcMp z-V(ItV?IxR%vwAac=;LNT6`9?4L&Ot#k`Hjpn8RifBSxmU*!IQ`Wo1us&eF6 za_2kv%2fc3Dj>NOd!k}m>{1mIhMi^0$i|AFzzWne@1d`j4XUW&P09kl&H_d5@8QGq zhpTNL-b`~@3~e;d`VG0wGm4LHkhyV!c(DotcB z3ST#BDM%7@_6D5~*+{}89La-KhlBLZ2d&*Hbbnt@??GntxX3hdZe$`jsJYhv<-x(h zK9$;i=LHv|zxg#if4}OA_uc(hfmKa_UXA$6)%MJ>Gi{WO%Ko6pUHGauKxh5?nND3S zI(u7hRQy1_LYP2i1E5?0T%v_AN93mi7YOavp_A|0;BIFN)y%)2qSjM2*$8rqdduHc zmuVlru@T&GG@ee!8(;%$`Fvco?u_$mlU|TwU->#f9GpRcpzsi`n8XMZ3KRh-{&~3( za&a(?!Vn8zD9SUJx}SdXF+IP!`ssT={rM+<{oRu*U9#Kcp%@}TT^#)h1H)1zP>S16 zimH$z$#xiLxSt?dbeKlQPh;GkaC^aBg{O&b=%!1*OI4&3bbsGOOO$YcXVF=uSU}pP zi3xV6K8tm?g78I{ngc}+?oa3R^zMvKa7REN*K_r)$@p|*I=nB<)0pBljU5GI2luMT zQABQhOcA**AFmx&wZkwuReva;Jba(&^oG}+{@7*DgraI?`S$UeBD1w-*Y3;vp zYA+}%Ug}!}-Xd^?0ClrEY8NoFb1Fmgch`u?m1D{!i=0fGy3ioWlqytRi>WGGFTG!? zDPrcbmC{$8afm1Vk@^Bg?U-&Y)Yo~YzD_7EU3P9H*jqES4ac{mP@7*^x@VvFrU9e! z0)X$~6}}fXLzk?Su+-ttp_*Ud4e=H4B3Iu7;i-C#xeGX#hh%W7=UQxfsCRs=?WtQ> zQ@06Fr#Rtt7Zc<#+vsJJ``J%0FDZx{zM^Z0jXRxT+Rr;3u%dPKK7kJa+ReHIy1EwN z@-}*0o6dQJIWwQor7BZhf8%sQPHiW0953ppNSSLpg&o&|`Rt5;t9cHD?(8q4vk JaRdV${0C-f^Q-^> diff --git a/src/algorithm/__pycache__/OptSJA.cpython-310.pyc b/src/algorithm/__pycache__/OptSJA.cpython-310.pyc deleted file mode 100644 index d040193f43031533e22ca3c5bc9f80fd28b09c3e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4213 zcmai1&2t<_74M#&nVlVtq}`QlOR*Dw5FkrPgdIo-n8d~=PEtk*wv(b3sAaT0l4rHM zv+5ZsmaCTo3Ky!Vf*VjAMmKWd&YAy#L!YSPK>1%tRe;~?+1*GcRm@bse$(^%_1Am9 z_c~i#3>ltZ{OOi__7Y?Nq>q!2i;o*9`L97FlYGEBRn0rxemfn<)}kY9?RH#SdmZ1_ zwN6bsQhdxhfpn#Zx_*aAU)DZmvX;30u)lC5I*|-y9sd?(C>KyK$w)4uUY4t}A)6ob zPD8fj3R;?y-)HSJPcR?0*&{l81Q-wH7D|2{q+lhNTsp_jgcVLHFb_wwHFlmI^9euk z*U^~auB*;#=+=CJVFY(map-tLWCsP$Ki+J+##ix3WpJ*c>t zYqzXv|Cn>OcL`@y(=PFa0O#4(r6MTA5s&MVPDNCwq8u!-9U4EC-O>Kgtjg@8(MPe8 z-8>oPX^|eJ#i3c~WusBjE7EL~n^wHDqmrGNzPbnTU_Z%Cl#a&xMOP*H{-7}4_8^N3 ziteS~4myQj(t>vn$h{ofXyYARQ$pNCwFe zvbhOApO0z&&vZ;}DrhaIB6RwuelLmTJ2cZX_{^_?xQi}#9lS7yz|zQRIFShLSHL5u z1uk;H@en2A>N#{fWit3?w2e1V@>P(8kxRp!;WBVEAqBaCslzJMVfr{xJ@SRwLtUv@ zi+ceKEPx%u+F%{5&v%(>NpM|wTq%wj2Ws56rR{7!fZCOvr08Z_eLA0mq&g8JiddO2 ziPhk+o5N9z7jI>%XgkKulfky}?7=;~K|#0MiwA>l_Z>_lC%+4@Y9Fh1|NXc%pZ?1; zu}ut*KBIV;bh{?(c88hV9}pjPamw+a`lG%AO((`8#fnZuT?Ba>B`0_c;F`9jC-60U zR=~_1@?C6{hQsW7$}a#hg+nGifispKw#?OT-zU@W`~9F_w+;}N3k3&EdBu$L@jvP> z!kK)@f8)SO!g5Iq$ka&2a=}!lrh}(s>cz6gm~iZ964LlOB&V0o8q;+hUT1r^rO;xP zO%|Y|fpO}3LAuj%>|8X%-$;srqh6d!MS8K40+=kJETc5$W3?xtUMib#{O}w=movRw zHg#yZM%k3s#jCksX3a^Hq;>NPBpqto``zB-x$ft%4!a8UJXiI6Y8;`P(@`;2(V`Vw zEq~ejGtItSDOUBeUeb*t>>}pomSysQ7wWXL?t7&Oq|yn50Xd5yJf5T+zU-T zhf`Vw#;dAe7(cS!Z=BKCxY-D>SwDzX8gC82idahH0UNU1c!-QUiD?n)#i|Ri%L^5! zKt4tAq8c=GgGT6O$@X^IOD!sj$LZKCQ8Pj)ydGFPti%$fGBs;ykpv)ql#b?7wyi>M zpm5a_SiO_xMS=+c6Sm^su60gznWk`srVx%}1=oj2Zi2xW+{283J}Hbp4GgXX3SbMe z!2tjq)jov{bqU?uD~2np%|zRAUUUzj2R}Q@2t>5#dBhRD@uQB5&Ar+Y`&(DBd0YK! z#9r&TIH{|K_dB)fw65BgYqQU5#_eT;+_)F{{ z!{3pdER>6u^NJ#^)+m&dWp!(L@4aG zuT-9WmKtc&73Briz11tkeUr$yh`dVVH6q_8@;Z_45cwXFH;9mHs_zn^yji_T#QN+F z;%*Z8J`oD`>Nb%d5V=Ey0->VFtKJ1M{#d2Mc%bf5%MXcM0%9HDLUq-7Z{FJ)=Tmnc_dw|atf%fs9t}~ z$I?`aChB_Xp+(wdkmdqsw(Vf5e6#H+>$KZwJ~7q_6Q<0#nwW>Z$VpKkR38=k(w9nz zZy6twhza&k5x@>}CFn2tzwzK$<3sE1#zg|3R7XvlVd}Hcn49JM>5#I3Tc?60!Me4D p>bz)ucKk1#Z*aZJpWmQ^%V~frNX)4S!qr!TAb2}?F#lZnm0f*( zt7kfLNs&4%Be7rutLkO;Y}g@w0z14wk&r;yAvQ=6VffBl2Mzu`%!mjYA`7a+^D!Xu%S9xregFYrKwfnfttf zvdx=(3FR5S#?SE1CoJgjvwQ_DXX6#Vx~q#1<4(*5tGvb8uk>~8kYP=BSEcUy7dVBs z*=42m(`Yz}LAbq;XOpdA+@r3OkB-VUq`ZSH*66r6D&sm9Gq`bA>zbwald()j$w#s1 zF^pO~G}S=5j+FluS)t9eBW=nGZNNCgE`LxksiS6G)ucJFK-mRb)pnV$tyNJ^!wBrB~GpwuX~|uI)0k*B^}%SFx6Vq|r^|)JRxK&AJK-*c5d9FDUCa%QO90hS}C2?#X074)^J*OXudT zP#%myj72hvhI#49Xc8tVj~|q^{xI8$hT$lBP+HY^dpqjMOq8`;X5)}gXoeAO<)t;J zF}tkCkr*C?Gz1;dRwiWWs|oW(x_W8m@o-!C*xF~BSV8gSs~hjb8-T48DdhlW0>)kiRL!IMVuB#E^6(1MLRd zO=;fHetCXs6&8n9lm1NY!bV$b>J-k@E!@J`XW~j>7DnMhZcMJfp-s)bOM4wPH=0@! zvk%$S>ub}Rv58!3Tf8@C!C`jpuL4o ztyoLPSY55BV~#eTGbyyfw=~FT{S7GHRV_6O=mX7=kZa-xtvalzNGodK>l5F-ccc!w zMzl0Q)m7^&(v!v^i)tj@!om%9>t%;$_tX24;9(x8c>=LcBt#-kCFnRW>%A;ZN!+tE zFWXVSFXDbgwMzK&(ofRyM1~^HCqr49!z7obwLQ!tRCeQoytJe7I8J$K!G9Hbo z-HnIw2(0t6_UI77^4HE#I`e_2WU=(_#1Vg&j&~WCm2V<58>T*gnWb~AGT<8fjNw4~Sz0F+RM~zy2yy7LWd`z|tG|-n(vV9eV)Mk)tt{>@B4PVuO zqyf3swDZ~#o3azVIwZ*0HO2V?Hlbj3eB-WJp_d?S=_BF!{7q=6EuxfXqtcnA$#y12 zr8$b^PH7CYe%B~jRI;s-^-f=hdovnuW=S58z$Yiz@mU+Epwrp9cpVLoorU0pY_K}Z zJ}d|H5a)sX>(VH+BOSWK3Y-m6+@=!x#-Oh{=`0d-c4i%EGy6!3oD;Nrg7!|(wG(t* z`m!O7x+a_FVVAl2sSzzfZ!Ou9%ZChl>OlXkS-X#WB#q(L)RC=%slJ~B4_M)@0VA;6 zGte*__wN-};jC-3Hfp534%cCEmRGc?!!78t4ZZ9fYK7NPs}>C9SsK#;nouw0&}NVJ zqLRTgTaIvo`GSnp`h5 zc@8_r%8(pm+v3ir75ab^PII0t>fY~<@5QCLs^^00`7_t2b=;0GTbPfpw5lEIxWCO) zcjL)(yO#L3T&->*?MTIi?E9~ouxS4{ySk;+rjspd1#U&59QxL^3hC$uPPf6Lqkqot zxxK&B81V3SP4IuMssTssX+M#iokSGI)RvKe zbuUd6dG{jlB$eVj=o2p^E8BBBei9{@Mu5CzEV5pl=SkWxo%fc^rq0bWvgG`mF^ zIa-4BL9{6&FG`TA1A4}JGR#s@rnqXSqE8?!mTm4qC74w_$Xl# zShqvvfXHOX<4{!sZzqy-awQUKmxQcgwoPLRjZ2o4YybdG#Io9k#Oe`H+EHAA|B8SI z6nNW73L}8cokke{2j&)8K~h5sh%GBQ;UbDAA3+K zq)`HufyO3ELS+kHg9)!5CmL8Yl@FYOtNaOXR>Nr-(9)aL73E5w{36_kJN4jad(cX^ z@PJBBVSaP4R5*pp*{3?Z5t?lt;l?`fp@1hQO6c{Vg;BMl&J7v`Ec1~XNX=v3Yv8RD zZ>+OCT`K(9a^Wc)Zh%@+RCBh>!5#Pg!QO4+3Jg3`G~iP;4-GZKo^`lF{KB4{J*HNU zsnt2fxP@5eH{h{0Sf8D%xTZj@dA+g#_Uyd8fV*e`V=n^bTj3?hLMwa%)}s|Z3AAg4 z--0Bx!f#`}rwC7hpv3uK;PmwDnIl-1!PA3h2hSb1J%6OhZlS}LTpqko7(}mYDt;;- z)#T;y1#Ze0zj53@PmP1*RnZbR6&Anz^|gVZuc}sB{iPFJL079|#5wSt*XQeB`9C9H zKQ(f`gGE%;bv6OG2I8nvpTN_&Nj0$pSCP;aut2g|fiodxcLSbDoQjLodI?!+^~S^a zL10~1g?Ai~1zwK$Ofe~~s&D~6DvmAnbX=NQ3ZK?ORGa1E8u5DqS>WwQB8d9{Gt&D;v~se7S+q8$S^Di0Av1DJc7ZsGlJ#&0dB`*c91)brMJY9WL zt=siWz27VCI3=MKPlCO8lQLRJe4nznD5J&2_b8)4Ls>O^5^~}BBKoos3{;^Ti^Z!L z1rselAqEPeD4QwDK|V1K}CO-#JC|E^M=D!bYptHLIwg#>4}3_K+TX zj<1FUm8{RUj+pcgDqUUqF@h}ri4U%JP%TcPTkZKKEl#XlLmZ%>Uk@XwYZ!K|3Pe)k zl2L}C)ko{DDH9B(I>^NpYW)SW(t@at55$MmFs6*6ui(Nd7w<~G6N#Aj=zmYH{J7Gd zR}32OEp#&vmhL7a!l&zxX{-7W6e_8&p-ginPjpS{5MmvaazppO@%--9>YvIF=tTMR f$jH@s^pdD|4ELgY9V)SfUpYT?pL6XMi1Pmcj?@F> diff --git a/src/algorithm/__pycache__/ShiftedInverse1.cpython-310.pyc b/src/algorithm/__pycache__/ShiftedInverse1.cpython-310.pyc deleted file mode 100644 index 12366c998341bec87444b85903bc7bd94ec6104b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3629 zcma)9&2J<}6|btU{+JKj<6V0_#wH0_lcbZ?I)O+l60;$!g#=nz>^kHyQZ#B$^~}zC zW_ny*W7eVPoN~?%A`=u?DG5zvm^!<& z3cm2LZ^Z-#-}a@xuXs{bY?m2~D$!)P74|L1;&4ZEocS4vk2)mJA8HG{yLj^mqQI?I z(T&B9rVme9%&qL3bf`Uao%q)Wn?K4^oo_x)@@#apxt-?wCL3=){BU#ohr18%Z|1tQ znGAZF?wd!$&D}@+u1VGRM~9e`zH@tgWTUM?GTc+i-J7&8?Gv$2*B+(`w8j&2duxz& zl0kkKjZ!ZbHq@zGzR;;`y-V;vGs#Vs*ldriGxii5&1zUpGS0D+KMi|dwW7*+C8vmGZ8cc!BLKNN> zJ6OehX*`TR0RE)m%4@QHuHVOZpK#z4j=Vd}W5B4S)>IyB0FmM(JY^JrUx;a7q9Oo# z8O72uzgXpCtl_N!kpubC-qhskeed%fVWxQ7wvwqL0WNehGc?_!{HlTAj3 zV8Q#=gS%Tn>E71d`4fR^cuq7ApqG>DH);{gUFB7N1Lm&td9L5W z@FnvSfG*7Y19XVU5e_ApuLGtqE>cAUq|QOV9*ho?p9E#!gk*FRlG!LL#McR9|JDS&OBba*iP&ky{0Xl8|KUivOt9i&8VCWdh*=y;&e~Mbm2v%^~CQyFL3rVY$ZaV0au5t)>tAG*RD$D<~N>F>gf&0k{E8tjO%qSj=|qPh0GXRedglR3wwScz=6 zFMh!_Y7(a@UZ^3V@!rgiKAob8K%ffZ%Ff>GH#&jw&rb2eFSGl(u!v?jqz5#<(moO2 zEen$`@m$Ju6rNNIQn2L2JUw{!lZdj%&&f&%%3q2Du{#qfC=tZ%w=IA26a`|% zkRZM)naiae3UBLSWxY-<-%+3qkcU(@wBcmb?`C>vsYJ=N4YG+r{;*!Mmpgih>yG(a z^Cgd-=e1USh%)4&H$TXJ43@p-|eP49f2=ei>1oUkmHz^H&)%2*HK*?g2b^x znKLi!_KnhRZ_}+D0o7)#^b?Hc--FnwHbk8_z_k+P$~uzUx@$MYEpTsxzkzawMj9w- zKvCi)aB&lCEb(ogzlQb(m;cf4(JthTzyA7H^!(w2XMaSmL&ehhLv5;6@8Qkgg(yJR zg`c33Nl?oqULg}%@WeO4F%(VcWaMOpBq8uA--)Z5{sAf7=e+6M=N##(2CBJSws1`9 z&K3voTv=K2ealnJ_w+ZRX2oRhDpB7!BkC(P@9&|WXG@o9`OEcbdecd7P*7AJIc&4JQ{O8tM=0KlK4*;bbzn=dK+yUwcEp_ zKWevQS?2WjX&N~}Z$SWn4)kPcR7ppYeD7C~9)~<$nQ2oc8ko diff --git a/src/algorithm/__pycache__/ShiftedInverse2.cpython-310.pyc b/src/algorithm/__pycache__/ShiftedInverse2.cpython-310.pyc deleted file mode 100644 index ecbf89ccc3d208f79e2e5dfee0b58cef71559f26..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5436 zcmaJ_&2J<}6|bu9p6>bZc-G&JHzZ0{Q6?MK5QPLq)`lc2L}25Db$~ErHEB=v*lo{r zkE?4o_R<3fZy-U6lpH}q8Xq|9pTLE`fpZ#=% z)+$L?dieUXBz=4X8OSoeWm%C`d@HghXYj4cx}3$gCRgOVT)4+uGjdTbp{5?q%H@5b z-i&6B8OQ8f_HC{{z-riLhtkuhij0T%Exg%toUxdVAU)zcY$WhHD7%{7V7EUQd79_m zF&~vs?`x5lBwuAH2gqSJpNLhGdv;XL%cDwO$=TRBVRx8X&sknsWur%r67(Oi@IicQ$bnm0VN$uP9GN;Qs@@m>=`6GEN8c8J`^K961jkaN+SoGce>(B z=O8!`TkJklWH3$Fh-=?6-Y|<)W=eO$-Z07xk8Pt({{8y;PpENyFU->9X#GZ%?dx=~ ze(R0(8}DuX_~v@1I_qI?CsncD?XPd`#$6rBx05^Q6TNh8aAYbQy*SgIv_H819a^A3 z@(jECMk*vqaXb1M1%g$>eXshizkfJ_d1Sw-=_*i_U|Y=#>*)=~d# z@rq9u9huDP^gG9*mPk+Bm%^G38{~Zx?#@XqB0%SarAUSQTouT{Gjw ziHr_S?O+(Gqsf%bs_|plmT^a$5;721bmXm4(Wx~V%nD$6No0bk7xkk=V{KNZZ9F@n z@h1(&r{%O``LM#uQ{9wFV;e|Z-{r2r%L6AsuErZE)kK9?1j;->z9I0UT;qY56LUzB z;|oGPhXF2}Cn123*CN1OoUUeLenQ^J_rw^cx+nJBJ+JWDaY?dC&Y$LjX|7!4bY)z{ z3vW8%_||juIT0gK)a7g$<&gsvaq)V1fh4dZvCI|-S!Q#1DIDc0ki{EU3onQN!VmL+ z`T{*ls?_xz%uL%2(~xBw~&k6k>2Nm&~0xK%hPdjrW%Qlpz;& zbbKc=W!QS#iW#r?1pPRP`|;h#)TynP?zA(lqOfloRF0HN)oINf)qEVPBu=JMwyyBG z>{L!912vC@8aGX%Os!DXyBnz#j-$e4CsI$?s!*x$NL{9U`A(?faJvU+b3!T29QjA6 z+MP7Pa>AH=#4f0lM%`}QiLKW-cjEyzlJ>B*0H2|b)5Og0V42fBx2;DttF4(c|84gM z#_0?WRTFco7ia-)C+%g%Rq4k_(IrzaQX9E|;d|;T6$p-tqyVj(gDsj`y^i|qI+DPh zgLA>*@LLh}i96P;6V_4Jux&LizN&c@UgtG_1uog(3vk4WP;X<14o!0U(Cl<`SSK7H zgBwEZGxZ8NBjsUke9gK|?r6BBy;xUp3B&M5ZW3rWcW_O*xl47pR^6QKd+Np9!L{w- zs->&j*s7MQ|U!r-7X-ss^Xp9NugEl12=7btK1v3)1d~Uw;D-w z2wXoWH*0IoMtSID)FzhF$D5Jq1N6`RFXf14M{qDbevg|%E6?@4Z0VAe}YU>&n|&+9=wYY zo#}{+kafll9qe7wbLa&Q*5%Uu+#-MOPW?i|00n zdkHtkqid^T#Wh+RI4^qSw#UIADK9N%)9E+8P4z=4{S2pp%OxEuppkL$$(l>*M-Twi zL~##S{2+^afGY=>YpO$V6V={Mhaf1`0(E=fH$CHSM>;f)4u@v84`ve&RN9HM3>a+u zcfuqDoEn!%g?fjkzeUNDl)Ofh%}m6B;;^;?mi_mNA*|H^Ke0$GaW9P%B2ku9S;_-j zCKGC1BA^Cr6IE%a-8Ss^2Q;>?VVt4N)Uz}ldCxg?terR5-=QrdZg*v-1_~1JB_Ltp zj5VO-60oo?Kv~2Sgo+k92}75Nw4g?H(e;9P6CO>?o79fuqyU9!q6n8kQ(&S9b{fDv z1PY-8qld|lOGR1xWb|an*3=-2K!Y8C##qd=B;FomGbj`)F@?HBiB&D#ZuT(MPbt?z z(iB!%>S?NYNEfq{Ljbl}?fwNaSpx}iHj7LAG8E$eN9~~U!bWSTAWx?evqNl~p#T5- zF_ZunM*Mr(Mqltnzu-}@U~!r+BC|8Tg$Soisv`bk{dRU#qUZ?zRazzAL^QVy&sr(` z_+=}P%jJT+2YJWJBfTJh6CUj24QxM`qBk7WGUo8d73`_1fh=oSbrmeGU{Q29#EVwJ z|LB+wx+FW{ceGkWopzMPM@A`CailA@AGp=o2%p*6ZS;uQX@>!3uYr$YR$4DbtH(AH zJ@}J!!Wms3&q_p;(pzOG;xqPJLK=#|gFK+9aa6`S%J|yIcAu+%VlNH!s-Mthi}-ed z5z@lgJh0d*^oY8=y28Q&yCx+)Pg)OW$N(uurnT&gFStT*XeGsCC0t0nc5X(G`mW&Q z>3(`>X6Jkxtw4K|RXn!k{Cw6ef^+yrX8Lq{EZ!_U<#`k@+@$25L|n*E>?Lf&-GHKp zPZ_iXr~Fb-kbCAS@E_`7_~CsHG8gv2UEAM20~AGp&cA3p8hnM_eJVN}fL|s$4ttM_ zPn!$pJoIPiqduVI=ai7cs$WoI;gXP2EmJ}ZHV#HcSUBt(&myrY0sds#M>Lki-Bk6} zO%f{=`~-w`JF~FRtf>#Fp6LG4L~Ev{dN2JLP(LkSDx_uH?M5m}fbHf|A#$#& zAfnDdc0pW`_#k2!AuHJPsk&Y&)D_cC2sCh$oayMVsK~yLSy);4W zhyE<_v6*ZMecf6SYE!uF1|?DACFoAwL(>KLM%hBQRRHlrjT=gfF8HDpgp$Q2G44dR9 From e2b5b7e40cd5c72faacb1614db7be0971603c1ce Mon Sep 17 00:00:00 2001 From: PavithaDissanayake Date: Tue, 12 May 2026 23:22:17 +0530 Subject: [PATCH 7/9] Add app.py metrics for rewrite and processing time in query execution results --- app.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/app.py b/app.py index cd3943a..72c42d6 100644 --- a/app.py +++ b/app.py @@ -6,6 +6,7 @@ import pandas as pd import matplotlib.pyplot as plt import sys +import re st.set_page_config(layout="wide", page_title="DOP-SQL Interface") @@ -147,17 +148,31 @@ true_res_str = "" noise_res_str = "" + rewrite_time_str = "N/A" + process_time_str = "N/A" for line in out_lines: if line.startswith("true result:"): true_res_str = line.split("true result:")[1].strip() if line.startswith("noise result:"): noise_res_str = line.split("noise result:")[1].strip() + if line.startswith("rewrite time:"): + rewrite_time_str = line.split("rewrite time:")[1].strip() + if line.startswith("process time:"): + process_time_str = line.split("process time:")[1].strip() # --- Visualization (Mirroring Fig 2c of the paper) --- st.subheader(f"3. Results Overview (Mechanism: {q_type})") try: + # Clean up numpy specific formats before parsing + if true_res_str: + true_res_str = re.sub(r"np\.float64\((.*?)\)", r"\1", true_res_str) + if noise_res_str: + noise_res_str = re.sub( + r"np\.float64\((.*?)\)", r"\1", noise_res_str + ) + # Try to parse python lists if output is group-by (e.g. [(val1, grp1), (val2, grp2)]) true_vals = ast.literal_eval(true_res_str) noise_vals = ast.literal_eval(noise_res_str) @@ -175,17 +190,29 @@ "Privatized Result": [float(x[0]) for x in noise_vals], } ).set_index("Group") - + st.dataframe(df) st.bar_chart(df) else: # Single Aggregate - st.metric( - "Privatized Output", - f"{float(noise_vals):,.4f}", - delta=f"True: {float(true_vals):,.4f}", - delta_color="off", + true_val_f = float(true_vals) + noise_val_f = float(noise_vals) + rel_error = ( + abs(true_val_f - noise_val_f) / abs(true_val_f) + if true_val_f != 0 + else 0 ) + col_metric1, col_metric2 = st.columns(2) + with col_metric1: + st.metric( + "Privatized Output", + f"{noise_val_f:,.4f}", + delta=f"True: {true_val_f:,.4f}", + delta_color="off", + ) + with col_metric2: + st.metric("Relative Error", f"{rel_error:.4%}") + except Exception as e: # Fallback if output parsing fails (shows raw logs) st.warning( @@ -193,6 +220,28 @@ ) st.text(output_content) + st.divider() + t_col1, t_col2 = st.columns(2) + try: + t_col1.metric( + "Rewrite Time", + ( + f"{float(rewrite_time_str):.4f} s" + if rewrite_time_str != "N/A" + else "N/A" + ), + ) + t_col2.metric( + "Processing Time", + ( + f"{float(process_time_str):.4f} s" + if process_time_str != "N/A" + else "N/A" + ), + ) + except Exception: + pass + with st.expander("View Debug Logs / Rewritten Query"): st.code(output_content) From c9c56a21e564a8714ed6ed855c281ae723d57ccd Mon Sep 17 00:00:00 2001 From: Gayan-Kaushalya Date: Tue, 12 May 2026 23:22:41 +0530 Subject: [PATCH 8/9] Enhance debugging output in query processing and rewriting steps --- main.py | 8 ++++---- src/process.py | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index c227bf5..825fbcd 100644 --- a/main.py +++ b/main.py @@ -151,8 +151,6 @@ def main(): start = time.time() process.rewrite(query, private_relations) - output_file.write("rewritten Query:" + "\n") - output_file.write(prettify(process.rewrite_query)) process.get_input_result() end1 = time.time() @@ -163,9 +161,9 @@ def main(): if opt.debug: output_file.write("original Query:" + "\n") output_file.write(prettify(query)) - output_file.write("\n" + "rewritten Query:" + "\n") + output_file.write("\n\n" + "rewritten Query:" + "\n") output_file.write(prettify(process.rewrite_query)) - output_file.write("\n" + "true result:") + output_file.write("\n\n" + "true result:") output_file.write(str(process.true_result)) if process.error is not None: output_file.write("\n" + "error:") @@ -181,6 +179,8 @@ def main(): output_file.write("\n" + "process time:") output_file.write(str(end2 - end1)) + print("Processing completed. Check output file for results.") + # Press the green button in the gutter to run the script. if __name__ == "__main__": diff --git a/src/process.py b/src/process.py index 0d7622b..a472a3e 100644 --- a/src/process.py +++ b/src/process.py @@ -70,7 +70,11 @@ def rewrite(self, query, private_relations): selectstmt = root[0].stmt if not isinstance(selectstmt, ast.SelectStmt): raise Exception + print("Original Query:") + print(stream.RawStream()(selectstmt)) apply_unnest_subqueries(selectstmt) + print("After unnesting subqueries:") + print(stream.RawStream()(selectstmt)) ImplicitJoin()(selectstmt) add_table_name(selectstmt, self.schema)(selectstmt) aggregationVisit()(selectstmt) From 6f0dc455083cdd13a0717ea37ce6cfa7733e6051 Mon Sep 17 00:00:00 2001 From: Gayan-Kaushalya Date: Tue, 12 May 2026 23:23:14 +0530 Subject: [PATCH 9/9] Add evaluation script and test queries for recursive and nested queries --- evaluate.py | 205 ++++++++++++++++++++++++++++++++++++++++++++++++ test_nested.txt | 2 +- test_rec.txt | 12 +++ 3 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 evaluate.py create mode 100644 test_rec.txt diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 0000000..1a54139 --- /dev/null +++ b/evaluate.py @@ -0,0 +1,205 @@ +import subprocess +import configparser +import os +import ast +import re +import sys +import pandas as pd +import argparse + + +def parse_output(output_content): + """ + Parses the out.txt content generated by main.py + and extracts metrics along with calculating relative errors. + """ + out_lines = output_content.split("\n") + + q_type = "Unknown" + for line in out_lines: + if line.startswith("Query type:"): + q_type = line.replace("Query type:", "").strip() + break + + true_res_str, noise_res_str = "", "" + rewrite_time_str, process_time_str = "N/A", "N/A" + + for line in out_lines: + if line.startswith("true result:"): + true_res_str = line.split("true result:")[1].strip() + if line.startswith("noise result:"): + noise_res_str = line.split("noise result:")[1].strip() + if line.startswith("rewrite time:"): + rewrite_time_str = line.split("rewrite time:")[1].strip() + if line.startswith("process time:"): + process_time_str = line.split("process time:")[1].strip() + + # Clean numpy types formatting + if true_res_str: + true_res_str = re.sub(r"np\.float64\((.*?)\)", r"\1", true_res_str) + if noise_res_str: + noise_res_str = re.sub(r"np\.float64\((.*?)\)", r"\1", noise_res_str) + + rel_error = None + try: + true_vals = ast.literal_eval(true_res_str) + noise_vals = ast.literal_eval(noise_res_str) + + # If result is a list of tuples (e.g., from Group By queries) + if ( + isinstance(true_vals, list) + and len(true_vals) > 0 + and isinstance(true_vals[0], tuple) + ): + errors = [] + for t, n in zip(true_vals, noise_vals): + t_val = float(t[0]) + n_val = float(n[0]) + if t_val != 0: + errors.append(abs(t_val - n_val) / abs(t_val)) + else: + errors.append(0) + # Calculate Average Relative Error for Group By + rel_error = sum(errors) / len(errors) if errors else 0 + else: + # Single aggregate result + true_val_f = float(true_vals) + noise_val_f = float(noise_vals) + if true_val_f != 0: + rel_error = abs(true_val_f - noise_val_f) / abs(true_val_f) + else: + rel_error = 0 + except Exception: + pass # Leaving relative error as None if unparsable + + return { + "query_type": q_type, + "true_result": true_res_str, + "noise_result": noise_res_str, + "relative_error": rel_error, + "rewrite_time": rewrite_time_str, + "process_time": process_time_str, + } + + +def run_experiment( + query_file, relation_file, base_config_file, section, param, values, output_csv +): + """ + Runs the main.py pipeline over a list of parameter values, logs them, + and exports them to a CSV. + """ + print(f"Starting evaluation: Varying {section}.{param} for {query_file}") + results = [] + + config = configparser.ConfigParser() + config.read(base_config_file) + + for val in values: + print(f" -> Testing {param} = {val}...", end="", flush=True) + + # Add section if not present & set specific config variable + if not config.has_section(section): + config.add_section(section) + config.set(section, param, str(val)) + + # Temporary config/output files specifically for evaluating + temp_config = f"temp_eval_{param}.config" + temp_out = f"temp_eval_out_{param}.txt" + + with open(temp_config, "w") as f: + config.write(f) + + cmd = [ + sys.executable, + "main.py", + "--d", + "config/database.ini", + "--q", + query_file, + "--r", + relation_file, + "--c", + temp_config, + "--o", + temp_out, + "--debug", + ] + + # Run main.py with modified configs + run_result = subprocess.run(cmd, capture_output=True, text=True) + + if os.path.exists(temp_out): + with open(temp_out, "r") as f: + out_content = f.read() + + parsed = parse_output(out_content) + parsed["changed_parameter"] = param + parsed["parameter_value"] = val + parsed["success"] = run_result.returncode == 0 + if not parsed["success"]: + parsed["error_log"] = run_result.stderr.strip() + + results.append(parsed) + os.remove(temp_out) + print(" Done") + else: + print(" Failed") + results.append( + { + "changed_parameter": param, + "parameter_value": val, + "success": False, + "error_log": run_result.stderr.strip(), + } + ) + + if os.path.exists(temp_config): + os.remove(temp_config) + + # Dump to CSV + df = pd.DataFrame(results) + + # Reorder columns slightly for better readability + cols = [ + "success", + "changed_parameter", + "parameter_value", + "query_type", + "relative_error", + "true_result", + "noise_result", + "rewrite_time", + "process_time", + ] + # add remaining columns (like error_log if any failed) + cols.extend([c for c in df.columns if c not in cols]) + df = df[cols] + + df.to_csv(output_csv, index=False) + print(f"Results successfully saved to {output_csv}\n") + + +if __name__ == "__main__": + # --- Example 1: Vary Epsilon for a Nested Query --- + run_experiment( + query_file="test_rec.txt", + relation_file="test_relation.txt", + base_config_file="config/parameter.config", + section="global", + param="epsilon", + values=[0.1, 0.2, 0.5, 0.75, 1.0, 2.0, 5.0, 10.0], + output_csv="eval_rec6_epsilon.csv", + ) + + # # --- Example 2: Vary Recursion Bound for a Recursive Query --- + # # Using 'test.txt' as a placeholder since it might contain a recursive statement + # run_experiment( + # query_file="test_rec.txt", + # relation_file="test_relation.txt", + # base_config_file="config/parameter.config", + # section="global", + # param="recursion_bound", + # values=[2, 3, 4, 5], + # output_csv="eval_recursion_bound.csv", + # diff --git a/test_nested.txt b/test_nested.txt index fd1e67a..b08dd86 100644 --- a/test_nested.txt +++ b/test_nested.txt @@ -3,5 +3,5 @@ FROM lineitem WHERE l_orderkey IN ( SELECT o_orderkey FROM orders - WHERE o_custkey = 123 + WHERE o_custkey > 1 AND o_custkey < 200 ); diff --git a/test_rec.txt b/test_rec.txt new file mode 100644 index 0000000..05ef0e0 --- /dev/null +++ b/test_rec.txt @@ -0,0 +1,12 @@ +WITH RECURSIVE order_path AS ( + SELECT o_orderkey, o_custkey, 1 AS depth + FROM orders + WHERE o_orderkey > 1 AND o_orderkey < 500 + + UNION ALL + + SELECT orders.o_orderkey, orders.o_custkey, r.depth + 1 + FROM orders, order_path r + WHERE orders.o_custkey = r.o_custkey +) +SELECT COUNT(*) FROM order_path; \ No newline at end of file