Obsessed with Boba? Analyzing Bubble Tea Shops in NYC Using the Yelp Fusion API

Exploratory Data Analysis

# # imports for Google Colab Sessions
# !apt install gdal-bin python-gdal python3-gdal 
# # Install rtree - Geopandas requirment
# !apt install python3-rtree 
# # Install Geopandas
# !pip install git+git://github.com/geopandas/geopandas.git
# # Install descartes - Geopandas requirment
# !pip install descartes 

import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
# google colab path to data
url = 'https://raw.githubusercontent.com/mebauer/boba-nyc/master/teabook/boba-nyc.csv'
df = pd.read_csv(url)

# # local path to data
# df = pd.read_csv('boba-nyc.csv')
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.9962197 40.7191742)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.9971 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.984539 40.720597)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775000000001 40.7288)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919100000001 40.735516)
# preview last five rows
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
409 QBebLBqhiuDlWasPgp6s4Q tbaar-flushing-5 TBaar https://s3-media1.fl.yelpcdn.com/bphoto/bgao-b... False https://www.yelp.com/biz/tbaar-flushing-5?adju... 2 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 3.5 {'latitude': 40.759357, 'longitude': -73.830396} ['pickup', 'delivery'] NaN {'address1': '135-52 Roosevelt Ave', 'address2... NaN NaN 15054.917573 40.759357 -73.830396 POINT (-73.83039599999999 40.759357)
410 7apqk24GsFC9dptoD5dDZg uncleman-cafe-new-york Uncleman Cafe NaN False https://www.yelp.com/biz/uncleman-cafe-new-yor... 1 [{'alias': 'desserts', 'title': 'Desserts'}, {... 1.0 {'latitude': 40.71781, 'longitude': -73.99795} [] NaN {'address1': '122 Mulberry St', 'address2': No... 1.212967e+10 (212) 966-6206 1414.238245 40.717810 -73.997950 POINT (-73.99795 40.71781)
411 Vz6hbICqQfyMWq9wzAKakw tbaar-flushing-4 TBaar https://s3-media1.fl.yelpcdn.com/bphoto/phUYWa... False https://www.yelp.com/biz/tbaar-flushing-4?adju... 6 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.7613116, 'longitude': -73.8308... [] NaN {'address1': '37-11 Main St', 'address2': '', ... NaN NaN 15147.410738 40.761312 -73.830812 POINT (-73.8308122 40.7613116)
412 YBgAPbviWYtVP2gAUhc7_A t-baar-flushing TBaar https://s3-media4.fl.yelpcdn.com/bphoto/8pk-_Q... False https://www.yelp.com/biz/t-baar-flushing?adjus... 8 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 3.5 {'latitude': 40.7587509, 'longitude': -73.830162} [] $ {'address1': '135-36 40th Rd', 'address2': '',... NaN NaN 15045.970192 40.758751 -73.830162 POINT (-73.830162 40.7587509)
413 f5UVQXvDpOrrIZdCjsw47w maxins-cafe-flushing Maxin's Cafe https://s3-media1.fl.yelpcdn.com/bphoto/S3vWvD... False https://www.yelp.com/biz/maxins-cafe-flushing?... 15 [{'alias': 'bakeries', 'title': 'Bakeries'}, {... 3.5 {'latitude': 40.7585, 'longitude': -73.83064} [] $ {'address1': '135-24 40th Rd', 'address2': '',... 1.718887e+10 (718) 886-9200 14997.842205 40.758500 -73.830640 POINT (-73.83064 40.7585)
rows, columns = df.shape
print('number of rows: {}\nnumber of columns: {}'.format(rows, columns))
number of rows: 414
number of columns: 19
# review concise summary of data
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 19 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             414 non-null    object 
 1   alias          414 non-null    object 
 2   name           414 non-null    object 
 3   image_url      409 non-null    object 
 4   is_closed      414 non-null    bool   
 5   url            414 non-null    object 
 6   review_count   414 non-null    int64  
 7   categories     414 non-null    object 
 8   rating         414 non-null    float64
 9   coordinates    414 non-null    object 
 10  transactions   414 non-null    object 
 11  price          258 non-null    object 
 12  location       414 non-null    object 
 13  phone          362 non-null    float64
 14  display_phone  362 non-null    object 
 15  distance       414 non-null    float64
 16  latitude       414 non-null    float64
 17  longitude      414 non-null    float64
 18  geometry       414 non-null    object 
dtypes: bool(1), float64(5), int64(1), object(12)
memory usage: 58.7+ KB
# identifiying number of nulls and percentage of total per column
ser1 = df.isnull().sum().sort_values(ascending=False)
ser2 = round((df.isnull().sum().sort_values(ascending=False) / len(df)) * 100, 2)

pd.concat([ser1.rename('null_count'), ser2.rename('null_perc')], axis=1)
null_count null_perc
price 156 37.68
display_phone 52 12.56
phone 52 12.56
image_url 5 1.21
id 0 0.00
transactions 0 0.00
longitude 0 0.00
latitude 0 0.00
distance 0 0.00
location 0 0.00
coordinates 0 0.00
alias 0 0.00
rating 0 0.00
categories 0 0.00
review_count 0 0.00
url 0 0.00
is_closed 0 0.00
name 0 0.00
geometry 0 0.00
# descriptive statistics of numeric columns
review_count rating phone distance latitude longitude
count 414.000000 414.000000 3.620000e+02 414.000000 414.000000 414.000000
mean 89.722222 3.985507 1.613962e+10 8980.359386 40.716170 -73.941997
std 116.765312 0.608481 2.375761e+09 5684.076763 0.058255 0.084129
min 1.000000 1.000000 1.201432e+10 953.875131 40.537609 -74.166080
25% 16.250000 3.500000 1.347732e+10 3954.891025 40.689505 -73.994920
50% 49.000000 4.000000 1.718286e+10 8254.243639 40.731610 -73.975096
75% 109.000000 4.500000 1.718886e+10 13019.588280 40.757300 -73.880179
max 740.000000 5.000000 1.934235e+10 25132.157979 40.886959 -73.702410
# descriptive statistics of string/object columns
count unique top freq
id 414 414 Q3fmTHT7zilDWtfzLK9lMA 1
alias 414 414 truedan-new-york-3 1
name 414 259 Kung Fu Tea 26
image_url 409 409 https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... 1
url 414 414 https://www.yelp.com/biz/truedan-new-york-3?ad... 1
categories 414 177 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 115
coordinates 414 409 {'latitude': 40.71697, 'longitude': -73.99471} 2
transactions 414 5 ['pickup', 'delivery'] 229
price 258 3 $ 166
location 414 411 {'address1': '90-15 Queens Blvd', 'address2': ... 2
display_phone 362 357 (718) 395-8297 2
geometry 414 409 POINT (-73.99471 40.71697) 2
# confirm that unique id is actually unique
print('id is unique: {}'.format(df['id'].is_unique))
id is unique: True
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.9962197 40.7191742)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.9971 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.984539 40.720597)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775000000001 40.7288)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919100000001 40.735516)
# identify number of unique bubble tea shop entries
names_counts = df['name'].value_counts().reset_index()
names_counts = names_counts.rename(columns={'index':'name', 'name':'counts'})

print('number of unique bubble tea shops: {}'.format(len(names_counts)))
# save file
name_counts_file_path = '../teaapp/name_counts.csv'
# view dataframe
number of unique bubble tea shops: 259
name counts
0 Kung Fu Tea 26
1 Vivi Bubble Tea 25
2 Gong Cha 24
3 CoCo Fresh Tea & Juice 10
4 Möge Tee 9
... ... ...
254 Chokolat Patisserie & Culture Tea Bar 1
255 Siips Bubble 1
256 Yummy Boba Desserts 1
257 Bread & Joe 1
258 Maxin's Cafe 1

259 rows × 2 columns

index name
0 Kung Fu Tea 26
1 Vivi Bubble Tea 25
2 Gong Cha 24
3 CoCo Fresh Tea & Juice 10
4 Möge Tee 9
... ... ...
254 Chokolat Patisserie & Culture Tea Bar 1
255 Siips Bubble 1
256 Yummy Boba Desserts 1
257 Bread & Joe 1
258 Maxin's Cafe 1

259 rows × 2 columns

names_counts = df['name'].value_counts().reset_index(drop=False)
names_counts = names_counts.rename(columns={'index':'names', 'name':'counts'})

fig, ax = plt.subplots(figsize=(8, 6))


plt.title('Number of bubble tea shops by business in nyc', fontsize=15)
review_count_df = df.groupby(by='name')['review_count'].mean().sort_values(ascending=False)
review_count_df = round(review_count_df, 2)
review_count_df = review_count_df.reset_index()

name review_count
0 Ten Ren's Tea Time 656.0
1 Hanco's 519.0
2 Ajisen Ramen 438.0
3 Teado Tea Shop 428.0
4 520 Dessert 358.0
fig, ax = plt.subplots(figsize=(8, 6))


plt.title('Average number of reviews per business in nyc', fontsize=15)
most_reviewed = df.sort_values(by='review_count', ascending=False).head(20)

id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
170 bn0zZ9nT_j1INOwfpxWmEw tiger-sugar-flushing tigersugar https://s3-media2.fl.yelpcdn.com/bphoto/hai87B... False https://www.yelp.com/biz/tiger-sugar-flushing?... 740 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.75922023369396, 'longitude': -... ['pickup', 'delivery'] $$ {'address1': '40-10 Main St', 'address2': 'Ste... 1.347732e+10 (347) 732-4001 15055.920826 40.759220 -73.830305 POINT (-73.83030496537685 40.75922023369396)
90 NqnYnZtajJ4Y-Lmxa26VdA hancos-brooklyn Hanco's https://s3-media3.fl.yelpcdn.com/bphoto/_vQWWX... False https://www.yelp.com/biz/hancos-brooklyn?adjus... 704 [{'alias': 'vietnamese', 'title': 'Vietnamese'... 4.0 {'latitude': 40.6870318621828, 'longitude': -7... ['pickup', 'delivery'] $ {'address1': '134 Smith St', 'address2': '', '... 1.718859e+10 (718) 858-6818 2073.182530 40.687032 -73.990429 POINT (-73.9904292380982 40.6870318621828)
51 WxNlqGSsj_2TCodkx9Sa4A vivi-bubble-tea-new-york-6 Vivi Bubble Tea https://s3-media2.fl.yelpcdn.com/bphoto/Fw3SYB... False https://www.yelp.com/biz/vivi-bubble-tea-new-y... 658 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.715099, 'longitude': -73.997535} ['pickup', 'delivery'] $ {'address1': '49 Bayard St', 'address2': '', '... 1.212567e+10 (212) 566-6833 1114.073275 40.715099 -73.997535 POINT (-73.997535 40.715099)
45 SqVHETmH6bf3rPKmdftvvw ten-rens-tea-time-new-york-2 Ten Ren's Tea Time https://s3-media1.fl.yelpcdn.com/bphoto/UChdn5... False https://www.yelp.com/biz/ten-rens-tea-time-new... 656 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.7163819, 'longitude': -73.9982... ['delivery'] $$ {'address1': '73 Mott St', 'address2': '', 'ad... 1.212733e+10 (212) 732-7178 1261.450313 40.716382 -73.998268 POINT (-73.99826830000001 40.7163819)
266 ZLo3HRSSvN--Mw4eCBLxCg mango-mango-dessert-flushing-3 Mango Mango Dessert https://s3-media3.fl.yelpcdn.com/bphoto/X7bnQ9... False https://www.yelp.com/biz/mango-mango-dessert-f... 622 [{'alias': 'desserts', 'title': 'Desserts'}, {... 4.0 {'latitude': 40.7603809317017, 'longitude': -7... ['pickup', 'delivery'] $ {'address1': '136-28 39th Ave', 'address2': ''... 1.917563e+10 (917) 563-1847 15199.500511 40.760381 -73.829452 POINT (-73.8294516357502 40.7603809317017)
fig, ax = plt.subplots(figsize=(8, 6))


plt.title('Most reviews per business location in nyc', fontsize=15)
count    414.000000
mean       3.985507
std        0.608481
min        1.000000
25%        3.500000
50%        4.000000
75%        4.500000
max        5.000000
Name: rating, dtype: float64
fig, ax = plt.subplots(figsize=(8, 6))


plt.title('Count of Yelp ratings per business location in nyc', fontsize=15)
price_df = df['price'].dropna().value_counts()
price_df = price_df.reset_index()
price_df.columns = ['price', 'counts']

price counts
0 $ 166
1 $$ 91
2 $$$$ 1
price_df['price'] = price_df['price'].str.count('\\$')

price counts
0 1 166
1 2 91
2 4 1
fig, ax = plt.subplots(figsize=(8, 6))


plt.title('Yelp price level (1 = $) per business location in NYC', fontsize=15)
url = 'https://data.cityofnewyork.us/api/geospatial/cpf4-rkhq?method=export&format=Shapefile'
neighborhoods = gpd.read_file(url)

boro_code boro_name county_fip ntacode ntaname shape_area shape_leng geometry
0 4.0 Queens 081 QN51 Murray Hill 5.248828e+07 33266.904856 POLYGON ((-73.80379 40.77561, -73.80099 40.775...
1 4.0 Queens 081 QN27 East Elmhurst 1.972685e+07 19816.711894 POLYGON ((-73.86110 40.76366, -73.85993 40.762...
2 4.0 Queens 081 QN41 Fresh Meadows-Utopia 2.777485e+07 22106.431272 POLYGON ((-73.77758 40.73019, -73.77849 40.729...
3 1.0 Manhattan 061 MN17 Midtown-Midtown South 3.019153e+07 27032.700375 POLYGON ((-73.97301 40.76428, -73.97141 40.763...
4 2.0 Bronx 005 BX09 Soundview-Castle Hill-Clason Point-Harding Park 5.198380e+07 67340.977626 MULTIPOLYGON (((-73.88064 40.81852, -73.88098 ...
<Geographic 2D CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...>
Name: WGS84(DD)
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- undefined
Datum: WGS84
- Ellipsoid: WGS84
- Prime Meridian: Greenwich
neighborhoods = neighborhoods.to_crs('EPSG:4326')

id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.9962197 40.7191742)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.9971 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.984539 40.720597)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775000000001 40.7288)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919100000001 40.735516)
gdf = gpd.GeoDataFrame(df, crs=4326,
      geometry=gpd.points_from_xy(df.longitude, df.latitude))

id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.99622 40.71917)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.99710 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.98454 40.72060)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775 40.72880)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919 40.73552)
join_df = gpd.sjoin(gdf, 

id alias name image_url is_closed url review_count categories rating coordinates ... longitude geometry index_right boro_code boro_name county_fip ntacode ntaname shape_area shape_leng
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ... -73.996220 POINT (-73.99622 40.71917) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
16 DbhBca8LzskjS0B-L4QfZg tiger-sugar-new-york tigersugar https://s3-media3.fl.yelpcdn.com/bphoto/5wNVyj... False https://www.yelp.com/biz/tiger-sugar-new-york?... 515 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.717891007822274, 'longitude': ... ... -73.999780 POINT (-73.99978 40.71789) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
29 bxCjNLVDQggNTbpviV0RMw lazy-sundaes-new-york-4 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/iDul5E... False https://www.yelp.com/biz/lazy-sundaes-new-york... 65 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.7189689, 'longitude': -74.0006... ... -74.000677 POINT (-74.00068 40.71897) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
60 jsJdz6pvyYZjtut9vkUwBw yaya-tea-grand-new-york-2 Yaya Tea Grand https://s3-media2.fl.yelpcdn.com/bphoto/iutBCc... False https://www.yelp.com/biz/yaya-tea-grand-new-yo... 295 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.7192268371582, 'longitude': -7... ... -73.996277 POINT (-73.99628 40.71923) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
72 uge-Lp_kY1J6Z4CzzceULw honestea-new-york Honestea https://s3-media1.fl.yelpcdn.com/bphoto/77X7Xd... False https://www.yelp.com/biz/honestea-new-york?adj... 104 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 3.5 {'latitude': 40.71789, 'longitude': -73.99692} ... -73.996920 POINT (-73.99692 40.71789) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527

5 rows × 27 columns

join_df = join_df.groupby(by=['ntaname', 'shape_area'])['id'].count().sort_values(ascending=False)
join_df = join_df.reset_index()

join_df = join_df.rename(columns={'id':'counts'})
join_df['counts_squaremile'] = join_df['counts'] / (join_df['shape_area'] / 27878400)

ntaname shape_area counts counts_squaremile
0 Flushing 3.786424e+07 40 29.450898
1 Chinatown 1.450188e+07 28 53.827184
2 Sunset Park East 2.711007e+07 23 23.651849
3 Hunters Point-Sunnyside-West Maspeth 1.024191e+08 19 5.171787
4 Elmhurst 3.267480e+07 17 14.504535
fig, ax = plt.subplots(figsize=(10, 6))
data = join_df.sort_values(by='counts', ascending=False).head(20)


plt.title('Most bubble tea locations per neighborhood in NYC', fontsize=15)

plt.savefig('busineses-per-neighborhood.png', dpi=200)
fig, ax = plt.subplots(figsize=(10, 6))
data = join_df.sort_values(by='counts_squaremile', ascending=False).head(20)


plt.suptitle('Most bubble tea locations per square mile by neighborhood in NYC', 
             y=.96, x=.60)
plt.xlabel('count per square mile')

plt.savefig('busineses-per-neighborhood.png', dpi=200)