-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdb_calls.php
More file actions
237 lines (214 loc) · 7.1 KB
/
db_calls.php
File metadata and controls
237 lines (214 loc) · 7.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
<?php
include_once 'db_config.php';
$aggregator_field = null;
$groupByMode = false;
$isCategorical = false;
$isGeographical = false;
$isUnknownDataType = false;
$isNumerical = false;
$groupByCandidateFields = array();
if (isset($_POST['task'])) {
$task = $_POST['task'];
} else {
$task = "nothing";
}
if ($task == "getUniqueFieldValues") {
$table = $_POST['table'];
$field = $_POST['field'];
$strSQL = "SELECT DISTINCT(`$field`) FROM $table;";
$result = execute($strSQL);
$name = "$table~$field~";
?>
<ul class='options'>
<?php foreach ($result as $value): ?>
<li name="<?php echo $name . $value[$field] ?>"><?php echo $value[$field]; ?>
</li>
<?php endforeach; ?>
</ul>
<?php
} else if ($task == "getJoinFields") {
$table = $_POST['tables'];
$sql = "select distinct(column_name) from information_schema.columns where table_schema = '$DBName' AND table_name in ('" . implode($table, "','") . "') order by table_name,ordinal_position";
$result = execute($sql);
$default = "checked";
foreach ($result as $row) {?>
<input type="radio" name="joinFields" <?php echo $default; ?> onclick="updateJoinField(this)" value="<?php echo $row['column_name']; ?>" />
<?php echo $row['column_name']; ?>
<br />
<?php
$default = "";
}
// } else if ($task == "checkAggregationGenerateStats") {
// include_once 'query_creator.php';
// $result = execute($mainQuery);
// // check query if we need to do aggregation
// if (sizeof($result) > 15) {
// echo "<p>Result has too many rows.</p>" .
// "<p>Please pick a group by field:</p>";
// $groupByMode = true;
// include_once 'groupbyselector.php';
// } else {
// $groupByMode = false;
// include_once 'viz_generator.php';
// }
} else if ($task == "generateStatistics") {
$names = explode("|", $_POST["name"]);
$table;
$field;
$value;
$joinField = $_POST["joinField"];
$chooseField = $_POST['chooseField'];
$chooseType = $_POST['chooseType'];
$prevTable = "";
$queryFrom = "";
$queryWhere = "";
$groupByField = $_POST['groupByField'];
$joinQuery;
$mainQuery;
$table;
$field;
$value;
include_once 'query_creator.php';
///////////////////////////
// Basic Rule-Based Inference Algo: //
///////////////////////////
// if chooseField is categorical
// if # of results after group by chooseField > 15
// pie chart
// else
// bar chart
// else (chooseField is numerical)
// we need an X-axis
// group-by field is auto selected - pick a categorical field with
// the least missing values - or - most populated field (sub-routine)
// If group-by field is not "acceptable" to user, let user select a different "group by" field
// with a group-by field selected (auto or user-selected) invoke @author aniruddha
// sub-routine to generate viz using the "avg" values for ea group as follows:
// sub-routine: generate viz using "avg" values for each group
// if group-by field is geographic - generate a map
// else generate a bar chart
// the algorithm can be extended with more types of visualizations at this point
// based on a more rules to infer which scenarios additional visualizations are
// better suited for.
if (isNumerical($table, $queryFrom, $queryWhere, $chooseField)) {
echo "<p>Infered Visualization Field Type: <b>Numerical</b></p>";
$isCategorical = false;
$isNumerical = true;
$groupByMode = true;
// global $groupByField;
$val_bool = is_null($groupByField);
if (is_null($groupByField)) {
$groupByField = selectGroupByField($table, $chooseField, $queryFrom, $queryWhere, $field, $value);
}
} else {
echo "<p>Infered Visualization Field Type: <b>Categorical</b></p>";
$isCategorical = true;
$isNumerical = false;
$groupByMode = false;
}
$mainQuery = "SELECT temp." . $chooseField. ", COUNT(0) as value " .
"FROM (" . $joinQuery .") AS temp ".
"GROUP BY temp." . $chooseField . " ORDER BY value DESC";
if ($groupByMode) {
$aggregateQuery = "SELECT temp.$groupByField, avg(temp.$chooseField) as value " .
"FROM (SELECT * FROM $queryFrom WHERE $queryWhere) AS temp " .
"GROUP BY temp.$groupByField";
// echo "<p>AGGR QUERY: <br />".$aggregateQuery."</p>"; // for debugging
$result = execute($aggregateQuery);
$groupByMode = true;
} else {
// echo "<p>MAIN QUERY: <br />".$mainQuery."</p>"; // for debugging
$result = execute($mainQuery);
$groupByMode = false;
}
include_once 'viz_generator.php';
include_once 'groupbyselector.php';
include_once 'viz_type_changer.php';
}
function isNumerical($tab, $from, $where, $selectedField) {
$sql = "SELECT " . $tab . "." . $selectedField .
" FROM " . $from .
" WHERE " . $where .
" ORDER BY $tab.$selectedField DESC";
$queryResult = execute($sql);
$total = sizeof($queryResult);
$i = 0;
$skips = floor($total / 11);
foreach ($queryResult as $index => $val) {
if ($index == $i) {
$trm_val = trim($val[0]);
if (!(isNotApplicableValue($trm_val)) // the value is not N/A, NA or Null or NotApplicable, empty etc
&& !is_numeric($trm_val)) { // its not numeric
// echo "<p>OUCH NON-NULL NON-NUMERIC VALUE FOUND!<br/>This is not a Numeric Field!</p>";
// echo "<p>value = $val[0]</p>";
return false;
}
$i += $skips;
}
}
return true;
}
function isNotApplicableValue($val) {
$val = strtolower($val);
if (is_null($val) // null
|| strlen($val) == 0 // empty string
|| $val == "na"
|| $val == "n/a"
|| $val == "null"
|| $val == "not applicable") {
return true;
}
return false;
}
// function isCategorical($selectedField) {
// $catFields = array("ethnicity","gender", "county", "subgroup");
// foreach ($catFields as $cf) {
// $lc = strtolower($selectedField);
// if (strpos(strtolower($selectedField), $cf) !== false) {
// return true;
// }
// }
// return false;
// }
function isGeographical($selectedField) {
$geoFields = array("city","county","district","state","country");
foreach ($geoFields as $gf) {
$lcAndTrim = strtolower(trim($selectedField));
if ($lcAndTrim == $gf) {
return true;
}
}
return false;
}
/*
* @params: $t is table and $cf is chosen field.
*/
function selectGroupByField($t, $cf, $from, $where, $where_field, $where_value) {
// get all columns in the table
$getColSql = "SELECT column_name FROM information_schema.columns WHERE table_name = '$t'";
$cols = execute($getColSql);
// for only non-numerical columns get the column with the lowest stddev
// i.e. the one that best "distributes" the data for the chosen field
$minStdDev = PHP_INT_MAX;
global $groupByCandidateFields;
foreach ($cols as $i => $col) {
if ($where_field != $col[0] && !isNumerical($t, $from, $where, $col[0])) {
$stdDevSql = "SELECT STDDEV(ct.c) as stddev FROM ".
"(SELECT count(distinct $cf) AS c FROM $t".
" WHERE $where_field = '$where_value' GROUP BY $col[0]) AS ct;";
$stdDevLoc = execute($stdDevSql);
$tmp = $stdDevLoc[0];
$groupByCandidateFields[] = $col[0];
if ($tmp[0] < $minStdDev) {
$minStdDev = $tmp[0];
$gbf = $col[0];
}
}
}
echo "<p>Inference Engine Auto-selected Group By Field: <b>$gbf</b></p>";
// foreach ($groupByCandidateFields as $i => $gbcf) {
// echo "<p>field $i: <b>$gbcf</b><br/></p>";
// }
return $gbf;
}
?>