-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathZOMATO_DATA_EXPLORATION.sql
More file actions
187 lines (124 loc) · 5.03 KB
/
ZOMATO_DATA_EXPLORATION.sql
File metadata and controls
187 lines (124 loc) · 5.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
USE [Zomato];
SELECT
COLUMN_NAME,
DATA_TYPE
FROM INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = 'Zomato_Dataset' -- Check Datatype of table
SELECT DISTINCT(TABLE_CATALOG),TABLE_NAME FROM INFORMATION_SCHEMA.COLUMNS -- CHECK TABLES IN ALL THE DATABSE
SELECT * FROM INFORMATION_SCHEMA.COLUMNS
SELECT * FROM [dbo].[Zomato_Dataset]
--CHECKING FOR DUPLICATE
SELECT [RestaurantID],COUNT([RestaurantID]) FROM
[dbo].[Zomato_Dataset]
GROUP BY [RestaurantID]
ORDER BY 2 DESC
SELECT * FROM country_code;
select distinct CountryCode FROM [dbo].[Zomato_Dataset]
SELECT * FROM [dbo].[Zomato_Dataset]
-- COUNTRY CODE COLUMN
SELECT
distinct A.[CountryCode],
B.COUNTRY
FROM [dbo].[Zomato_Dataset] A JOIN [dbo].[country_code] B
ON A.[CountryCode] = B.COUNTRY_CODE
order by a.CountryCode
ALTER TABLE [dbo].[Zomato_Dataset] ADD COUNTRY_NAME VARCHAR(50)
UPDATE [dbo].[Zomato_Dataset] SET COUNTRY_NAME = B.COUNTRY -- MERGING AND ADDING COUNTRY DETAILS FROM DIFFERENT TABLE THROUGH UPDATE WITH JOIN STATEMENT
FROM [dbo].[Zomato_Dataset] A JOIN [dbo].[country_code] B
ON A.[CountryCode] = B.[COUNTRY_CODE]
SELECT * FROM [dbo].[Zomato_Dataset]
--CITY COLUMN
SELECT DISTINCT [City] FROM [dbo].[Zomato_Dataset]
WHERE CITY LIKE '%?%' --IDENTIFYING IF THERE ARE ANY MISS-SPELLED WORD
SELECT REPLACE(CITY,'?','i')
FROM [Zomato_Dataset] WHERE CITY LIKE '%?%' --REPLACING MISS-SPELLED WORD
UPDATE [dbo].[Zomato_Dataset] SET [City] = REPLACE(CITY,'?','i')
FROM [Zomato_Dataset] WHERE CITY LIKE '%?%' -- UPDATING WITH REPLACE STRING FUNCTION
SELECT [COUNTRY_NAME], CITY, COUNT([City]) TOTAL_REST -- COUNTING TOTAL REST. IN EACH CITY OF PARTICULAR COUNTRY
FROM [dbo].[Zomato_Dataset]
GROUP BY [COUNTRY_NAME],CITY
ORDER BY 1,2,3 DESC
--LOCALITY COLUMN
SELECT CITY,[Locality], COUNT([Locality]) COUNT_LOCALITY, -- ROLLING COUNT
SUM(COUNT([Locality])) OVER(PARTITION BY [City] ORDER BY CITY,[Locality]) ROLL_COUNT
FROM [dbo].[Zomato_Dataset]
WHERE [COUNTRY_NAME] = 'INDIA'
GROUP BY [Locality],CITY
ORDER BY 1,2,3 DESC
--DROP COLUMN,[Locality],[LocalityVerbose][Address]
ALTER TABLE [dbo].[Zomato_Dataset] DROP COLUMN [Address]
ALTER TABLE [dbo].[Zomato_Dataset] DROP COLUMN [LocalityVerbose]
-- CUISINES COLUMN
SELECT [Cuisines], COUNT([Cuisines]) FROM [dbo].[Zomato_Dataset]
WHERE [Cuisines] IS NULL OR [Cuisines] = ' '
GROUP BY [Cuisines]
ORDER BY 2 DESC
SELECT [Cuisines],COUNT([Cuisines]) as CuisinesCount
FROM [dbo].[Zomato_Dataset]
GROUP BY [Cuisines]
ORDER BY CuisinesCount ASC
-- CURRENCY COULMN
SELECT
[Currency],
COUNT([Currency]) TransactionCount
FROM [dbo].[Zomato_Dataset]
GROUP BY [Currency]
ORDER BY 2 DESC
-- YES/NO COLUMNS
SELECT DISTINCT([Has_Table_booking]) FROM [dbo].[Zomato_Dataset]
SELECT DISTINCT([Has_Online_delivery]) FROM [dbo].[Zomato_Dataset]
SELECT DISTINCT([Is_delivering_now]) FROM [dbo].[Zomato_Dataset]
SELECT DISTINCT([Switch_to_order_menu]) FROM [dbo].[Zomato_Dataset]
-- DROP COULLMN [Switch_to_order_menu]
ALTER TABLE [dbo].[Zomato_Dataset] DROP COLUMN [Switch_to_order_menu]
-- PRICE RANGE COLUMN
SELECT DISTINCT([Price_range]) FROM [dbo].[Zomato_Dataset]
-- VOTES COLUMN (CHECKING MIN,MAX,AVG OF VOTE COLUMN)
ALTER TABLE [dbo].[Zomato_Dataset] ALTER COLUMN [Votes] INT
SELECT MIN(CAST([Votes] AS INT)) MIN_VT,AVG(CAST([Votes] AS INT)) AVG_VT,MAX(CAST([Votes] AS INT)) MAX_VT
FROM [dbo].[Zomato_Dataset]
-- COST COLUMN
ALTER TABLE [dbo].[Zomato_Dataset] ALTER COLUMN [Average_Cost_for_two] FLOAT
SELECT
[Currency],
MIN(CAST([Average_Cost_for_two] AS INT)) MIN_CST,
AVG(CAST([Average_Cost_for_two] AS INT)) AVG_CST,
MAX(CAST([Average_Cost_for_two] AS INT)) MAX_CST
FROM [dbo].[Zomato_Dataset]
--WHERE [Currency] LIKE '%U%'
GROUP BY [Currency]
--RATING COLUMN
SELECT
MIN([Rating]),
ROUND(AVG(CAST([Rating] AS DECIMAL)),1),
MAX([Rating])
FROM [dbo].[Zomato_Dataset]
SELECT
CAST([Rating] AS decimal) NUM
FROM [dbo].[Zomato_Dataset]
WHERE CAST([Rating] AS decimal) >= 4
ALTER TABLE [dbo].[Zomato_Dataset] ALTER COLUMN [Rating] DECIMAL
SELECT
RATING
FROM [dbo].[Zomato_Dataset]
WHERE [Rating] >= 4
SELECT
RATING,
CASE
WHEN [Rating] >= 1 AND [Rating] < 2.5 THEN 'POOR'
WHEN [Rating] >= 2.5 AND [Rating] < 3.5 THEN 'GOOD'
WHEN [Rating] >= 3.5 AND [Rating] < 4.5 THEN 'GREAT'
WHEN [Rating] >= 4.5 THEN 'EXCELLENT'
END RATE_CATEGORY
FROM [dbo].[Zomato_Dataset]
ALTER TABLE [dbo].[Zomato_Dataset] ADD RATE_CATEGORY VARCHAR(20)
SELECT * FROM [dbo].[Zomato_Dataset]
--UPDATING NEW ADDED COLUMN WITH REFFERENCE OF AN EXISTING COLUMN
UPDATE [dbo].[Zomato_Dataset]
SET [RATE_CATEGORY] =
(CASE -- UPDATE WITH CASE-WHEN STATEMENT
WHEN [Rating] >= 1 AND [Rating] < 2.5 THEN 'POOR'
WHEN [Rating] >= 2.5 AND [Rating] < 3.5 THEN 'GOOD'
WHEN [Rating] >= 3.5 AND [Rating] < 4.5 THEN 'GREAT'
WHEN [Rating] >= 4.5 THEN 'EXCELLENT'
END)