Obsessed with Boba? Analyzing Bubble Tea Shops in NYC Using the Yelp Fusion API

Obsessed with Boba? Analyzing Bubble Tea Shops in NYC Using the Yelp Fusion API

Exploratory Data Analysis

# # imports for Google Colab Sessions
# !apt install gdal-bin python-gdal python3-gdal 
# # Install rtree - Geopandas requirment
# !apt install python3-rtree 
# # Install Geopandas
# !pip install git+git://github.com/geopandas/geopandas.git
# # Install descartes - Geopandas requirment
# !pip install descartes 

import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set(color_codes=True)
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\pyproj\__init__.py:89: UserWarning: pyproj unable to set database path.
  _pyproj_global_context_initialize()
# google colab path to data
url = 'https://raw.githubusercontent.com/mebauer/boba-nyc/master/teabook/boba-nyc.csv'
df = pd.read_csv(url)

# # local path to data
# df = pd.read_csv('boba-nyc.csv')
df.head()
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.9962197 40.7191742)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.9971 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.984539 40.720597)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775000000001 40.7288)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919100000001 40.735516)
# preview last five rows
df.tail()
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
409 QBebLBqhiuDlWasPgp6s4Q tbaar-flushing-5 TBaar https://s3-media1.fl.yelpcdn.com/bphoto/bgao-b... False https://www.yelp.com/biz/tbaar-flushing-5?adju... 2 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 3.5 {'latitude': 40.759357, 'longitude': -73.830396} ['pickup', 'delivery'] NaN {'address1': '135-52 Roosevelt Ave', 'address2... NaN NaN 15054.917573 40.759357 -73.830396 POINT (-73.83039599999999 40.759357)
410 7apqk24GsFC9dptoD5dDZg uncleman-cafe-new-york Uncleman Cafe NaN False https://www.yelp.com/biz/uncleman-cafe-new-yor... 1 [{'alias': 'desserts', 'title': 'Desserts'}, {... 1.0 {'latitude': 40.71781, 'longitude': -73.99795} [] NaN {'address1': '122 Mulberry St', 'address2': No... 1.212967e+10 (212) 966-6206 1414.238245 40.717810 -73.997950 POINT (-73.99795 40.71781)
411 Vz6hbICqQfyMWq9wzAKakw tbaar-flushing-4 TBaar https://s3-media1.fl.yelpcdn.com/bphoto/phUYWa... False https://www.yelp.com/biz/tbaar-flushing-4?adju... 6 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.7613116, 'longitude': -73.8308... [] NaN {'address1': '37-11 Main St', 'address2': '', ... NaN NaN 15147.410738 40.761312 -73.830812 POINT (-73.8308122 40.7613116)
412 YBgAPbviWYtVP2gAUhc7_A t-baar-flushing TBaar https://s3-media4.fl.yelpcdn.com/bphoto/8pk-_Q... False https://www.yelp.com/biz/t-baar-flushing?adjus... 8 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 3.5 {'latitude': 40.7587509, 'longitude': -73.830162} [] $ {'address1': '135-36 40th Rd', 'address2': '',... NaN NaN 15045.970192 40.758751 -73.830162 POINT (-73.830162 40.7587509)
413 f5UVQXvDpOrrIZdCjsw47w maxins-cafe-flushing Maxin's Cafe https://s3-media1.fl.yelpcdn.com/bphoto/S3vWvD... False https://www.yelp.com/biz/maxins-cafe-flushing?... 15 [{'alias': 'bakeries', 'title': 'Bakeries'}, {... 3.5 {'latitude': 40.7585, 'longitude': -73.83064} [] $ {'address1': '135-24 40th Rd', 'address2': '',... 1.718887e+10 (718) 886-9200 14997.842205 40.758500 -73.830640 POINT (-73.83064 40.7585)
rows, columns = df.shape
print('number of rows: {}\nnumber of columns: {}'.format(rows, columns))
number of rows: 414
number of columns: 19
# review concise summary of data
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 19 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             414 non-null    object 
 1   alias          414 non-null    object 
 2   name           414 non-null    object 
 3   image_url      409 non-null    object 
 4   is_closed      414 non-null    bool   
 5   url            414 non-null    object 
 6   review_count   414 non-null    int64  
 7   categories     414 non-null    object 
 8   rating         414 non-null    float64
 9   coordinates    414 non-null    object 
 10  transactions   414 non-null    object 
 11  price          258 non-null    object 
 12  location       414 non-null    object 
 13  phone          362 non-null    float64
 14  display_phone  362 non-null    object 
 15  distance       414 non-null    float64
 16  latitude       414 non-null    float64
 17  longitude      414 non-null    float64
 18  geometry       414 non-null    object 
dtypes: bool(1), float64(5), int64(1), object(12)
memory usage: 58.7+ KB
# identifiying number of nulls and percentage of total per column
ser1 = df.isnull().sum().sort_values(ascending=False)
ser2 = round((df.isnull().sum().sort_values(ascending=False) / len(df)) * 100, 2)

pd.concat([ser1.rename('null_count'), ser2.rename('null_perc')], axis=1)
null_count null_perc
price 156 37.68
display_phone 52 12.56
phone 52 12.56
image_url 5 1.21
id 0 0.00
transactions 0 0.00
longitude 0 0.00
latitude 0 0.00
distance 0 0.00
location 0 0.00
coordinates 0 0.00
alias 0 0.00
rating 0 0.00
categories 0 0.00
review_count 0 0.00
url 0 0.00
is_closed 0 0.00
name 0 0.00
geometry 0 0.00
# descriptive statistics of numeric columns
df.describe()
review_count rating phone distance latitude longitude
count 414.000000 414.000000 3.620000e+02 414.000000 414.000000 414.000000
mean 89.722222 3.985507 1.613962e+10 8980.359386 40.716170 -73.941997
std 116.765312 0.608481 2.375761e+09 5684.076763 0.058255 0.084129
min 1.000000 1.000000 1.201432e+10 953.875131 40.537609 -74.166080
25% 16.250000 3.500000 1.347732e+10 3954.891025 40.689505 -73.994920
50% 49.000000 4.000000 1.718286e+10 8254.243639 40.731610 -73.975096
75% 109.000000 4.500000 1.718886e+10 13019.588280 40.757300 -73.880179
max 740.000000 5.000000 1.934235e+10 25132.157979 40.886959 -73.702410
# descriptive statistics of string/object columns
df.describe(include=['O']).T
count unique top freq
id 414 414 Q3fmTHT7zilDWtfzLK9lMA 1
alias 414 414 truedan-new-york-3 1
name 414 259 Kung Fu Tea 26
image_url 409 409 https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... 1
url 414 414 https://www.yelp.com/biz/truedan-new-york-3?ad... 1
categories 414 177 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 115
coordinates 414 409 {'latitude': 40.71697, 'longitude': -73.99471} 2
transactions 414 5 ['pickup', 'delivery'] 229
price 258 3 $ 166
location 414 411 {'address1': '90-15 Queens Blvd', 'address2': ... 2
display_phone 362 357 (718) 395-8297 2
geometry 414 409 POINT (-73.99471 40.71697) 2
# confirm that unique id is actually unique
print('id is unique: {}'.format(df['id'].is_unique))
id is unique: True
df.head()
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.9962197 40.7191742)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.9971 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.984539 40.720597)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775000000001 40.7288)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919100000001 40.735516)
# identify number of unique bubble tea shop entries
names_counts = df['name'].value_counts().reset_index()
names_counts = names_counts.rename(columns={'index':'name', 'name':'counts'})

print('number of unique bubble tea shops: {}'.format(len(names_counts)))
# save file
name_counts_file_path = '../teaapp/name_counts.csv'
names_counts.to_csv(name_counts_file_path)
# view dataframe
names_counts
number of unique bubble tea shops: 259
name counts
0 Kung Fu Tea 26
1 Vivi Bubble Tea 25
2 Gong Cha 24
3 CoCo Fresh Tea & Juice 10
4 Möge Tee 9
... ... ...
254 Chokolat Patisserie & Culture Tea Bar 1
255 Siips Bubble 1
256 Yummy Boba Desserts 1
257 Bread & Joe 1
258 Maxin's Cafe 1

259 rows × 2 columns

df['name'].value_counts().reset_index(drop=False)
index name
0 Kung Fu Tea 26
1 Vivi Bubble Tea 25
2 Gong Cha 24
3 CoCo Fresh Tea & Juice 10
4 Möge Tee 9
... ... ...
254 Chokolat Patisserie & Culture Tea Bar 1
255 Siips Bubble 1
256 Yummy Boba Desserts 1
257 Bread & Joe 1
258 Maxin's Cafe 1

259 rows × 2 columns

names_counts = df['name'].value_counts().reset_index(drop=False)
names_counts = names_counts.rename(columns={'index':'names', 'name':'counts'})

fig, ax = plt.subplots(figsize=(8, 6))

sns.barplot(x='counts',
            y="names", 
            data=names_counts.head(10), 
            ax=ax)

plt.title('Number of bubble tea shops by business in nyc', fontsize=15)
plt.tight_layout()
_images/boba-analysis-nyc_13_0.png
review_count_df = df.groupby(by='name')['review_count'].mean().sort_values(ascending=False)
review_count_df = round(review_count_df, 2)
review_count_df = review_count_df.reset_index()

review_count_df.head()
name review_count
0 Ten Ren's Tea Time 656.0
1 Hanco's 519.0
2 Ajisen Ramen 438.0
3 Teado Tea Shop 428.0
4 520 Dessert 358.0
fig, ax = plt.subplots(figsize=(8, 6))

sns.barplot(x="review_count", 
            y="name", 
            data=review_count_df.head(20), 
            ax=ax)

plt.title('Average number of reviews per business in nyc', fontsize=15)
plt.tight_layout()
C:\Users\datal\AppData\Local\Temp\ipykernel_20044\1422840523.py:9: UserWarning: Glyph 27792 (\N{CJK UNIFIED IDEOGRAPH-6C90}) missing from current font.
  plt.tight_layout()
C:\Users\datal\AppData\Local\Temp\ipykernel_20044\1422840523.py:9: UserWarning: Glyph 30333 (\N{CJK UNIFIED IDEOGRAPH-767D}) missing from current font.
  plt.tight_layout()
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\IPython\core\pylabtools.py:151: UserWarning: Glyph 27792 (\N{CJK UNIFIED IDEOGRAPH-6C90}) missing from current font.
  fig.canvas.print_figure(bytes_io, **kw)
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\IPython\core\pylabtools.py:151: UserWarning: Glyph 30333 (\N{CJK UNIFIED IDEOGRAPH-767D}) missing from current font.
  fig.canvas.print_figure(bytes_io, **kw)
_images/boba-analysis-nyc_15_1.png
most_reviewed = df.sort_values(by='review_count', ascending=False).head(20)

most_reviewed.head()
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
170 bn0zZ9nT_j1INOwfpxWmEw tiger-sugar-flushing tigersugar https://s3-media2.fl.yelpcdn.com/bphoto/hai87B... False https://www.yelp.com/biz/tiger-sugar-flushing?... 740 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.75922023369396, 'longitude': -... ['pickup', 'delivery'] $$ {'address1': '40-10 Main St', 'address2': 'Ste... 1.347732e+10 (347) 732-4001 15055.920826 40.759220 -73.830305 POINT (-73.83030496537685 40.75922023369396)
90 NqnYnZtajJ4Y-Lmxa26VdA hancos-brooklyn Hanco's https://s3-media3.fl.yelpcdn.com/bphoto/_vQWWX... False https://www.yelp.com/biz/hancos-brooklyn?adjus... 704 [{'alias': 'vietnamese', 'title': 'Vietnamese'... 4.0 {'latitude': 40.6870318621828, 'longitude': -7... ['pickup', 'delivery'] $ {'address1': '134 Smith St', 'address2': '', '... 1.718859e+10 (718) 858-6818 2073.182530 40.687032 -73.990429 POINT (-73.9904292380982 40.6870318621828)
51 WxNlqGSsj_2TCodkx9Sa4A vivi-bubble-tea-new-york-6 Vivi Bubble Tea https://s3-media2.fl.yelpcdn.com/bphoto/Fw3SYB... False https://www.yelp.com/biz/vivi-bubble-tea-new-y... 658 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.715099, 'longitude': -73.997535} ['pickup', 'delivery'] $ {'address1': '49 Bayard St', 'address2': '', '... 1.212567e+10 (212) 566-6833 1114.073275 40.715099 -73.997535 POINT (-73.997535 40.715099)
45 SqVHETmH6bf3rPKmdftvvw ten-rens-tea-time-new-york-2 Ten Ren's Tea Time https://s3-media1.fl.yelpcdn.com/bphoto/UChdn5... False https://www.yelp.com/biz/ten-rens-tea-time-new... 656 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.7163819, 'longitude': -73.9982... ['delivery'] $$ {'address1': '73 Mott St', 'address2': '', 'ad... 1.212733e+10 (212) 732-7178 1261.450313 40.716382 -73.998268 POINT (-73.99826830000001 40.7163819)
266 ZLo3HRSSvN--Mw4eCBLxCg mango-mango-dessert-flushing-3 Mango Mango Dessert https://s3-media3.fl.yelpcdn.com/bphoto/X7bnQ9... False https://www.yelp.com/biz/mango-mango-dessert-f... 622 [{'alias': 'desserts', 'title': 'Desserts'}, {... 4.0 {'latitude': 40.7603809317017, 'longitude': -7... ['pickup', 'delivery'] $ {'address1': '136-28 39th Ave', 'address2': ''... 1.917563e+10 (917) 563-1847 15199.500511 40.760381 -73.829452 POINT (-73.8294516357502 40.7603809317017)
fig, ax = plt.subplots(figsize=(8, 6))

sns.barplot(x="review_count", 
            y="alias", 
            data=most_reviewed, 
            ax=ax)

plt.title('Most reviews per business location in nyc', fontsize=15)
plt.tight_layout()
_images/boba-analysis-nyc_17_0.png
df['rating'].describe()
count    414.000000
mean       3.985507
std        0.608481
min        1.000000
25%        3.500000
50%        4.000000
75%        4.500000
max        5.000000
Name: rating, dtype: float64
fig, ax = plt.subplots(figsize=(8, 6))

sns.countplot(data=df, 
             x="rating")

plt.title('Count of Yelp ratings per business location in nyc', fontsize=15)
plt.tight_layout()
_images/boba-analysis-nyc_19_0.png
price_df = df['price'].dropna().value_counts()
price_df = price_df.reset_index()
price_df.columns = ['price', 'counts']

price_df
price counts
0 $ 166
1 $$ 91
2 $$$$ 1
price_df['price'] = price_df['price'].str.count('\\$')

price_df
price counts
0 1 166
1 2 91
2 4 1
fig, ax = plt.subplots(figsize=(8, 6))

sns.barplot(y="counts", 
            x="price", 
            data=price_df, 
            ax=ax)

plt.title('Yelp price level (1 = $) per business location in NYC', fontsize=15)
plt.tight_layout()
_images/boba-analysis-nyc_22_0.png
url = 'https://data.cityofnewyork.us/api/geospatial/cpf4-rkhq?method=export&format=Shapefile'
neighborhoods = gpd.read_file(url)

neighborhoods.head()
boro_code boro_name county_fip ntacode ntaname shape_area shape_leng geometry
0 4.0 Queens 081 QN51 Murray Hill 5.248828e+07 33266.904856 POLYGON ((-73.80379 40.77561, -73.80099 40.775...
1 4.0 Queens 081 QN27 East Elmhurst 1.972685e+07 19816.711894 POLYGON ((-73.86110 40.76366, -73.85993 40.762...
2 4.0 Queens 081 QN41 Fresh Meadows-Utopia 2.777485e+07 22106.431272 POLYGON ((-73.77758 40.73019, -73.77849 40.729...
3 1.0 Manhattan 061 MN17 Midtown-Midtown South 3.019153e+07 27032.700375 POLYGON ((-73.97301 40.76428, -73.97141 40.763...
4 2.0 Bronx 005 BX09 Soundview-Castle Hill-Clason Point-Harding Park 5.198380e+07 67340.977626 MULTIPOLYGON (((-73.88064 40.81852, -73.88098 ...
neighborhoods.crs
<Geographic 2D CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...>
Name: WGS84(DD)
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- undefined
Datum: WGS84
- Ellipsoid: WGS84
- Prime Meridian: Greenwich
neighborhoods = neighborhoods.to_crs('EPSG:4326')

neighborhoods.crs
---------------------------------------------------------------------------
CRSError                                  Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_20044\3555093168.py in <module>
----> 1 neighborhoods = neighborhoods.to_crs('EPSG:4326')
      2 
      3 neighborhoods.crs

~\anaconda3\envs\boba-nyc\lib\site-packages\geopandas\geodataframe.py in to_crs(self, crs, epsg, inplace)
   1273         else:
   1274             df = self.copy()
-> 1275         geom = df.geometry.to_crs(crs=crs, epsg=epsg)
   1276         df.geometry = geom
   1277         df.crs = geom.crs

~\anaconda3\envs\boba-nyc\lib\site-packages\geopandas\geoseries.py in to_crs(self, crs, epsg)
   1118         """
   1119         return GeoSeries(
-> 1120             self.values.to_crs(crs=crs, epsg=epsg), index=self.index, name=self.name
   1121         )
   1122 

~\anaconda3\envs\boba-nyc\lib\site-packages\geopandas\array.py in to_crs(self, crs, epsg)
    766             )
    767         if crs is not None:
--> 768             crs = CRS.from_user_input(crs)
    769         elif epsg is not None:
    770             crs = CRS.from_epsg(epsg)

~\anaconda3\envs\boba-nyc\lib\site-packages\pyproj\crs\crs.py in from_user_input(cls, value, **kwargs)
    477         if isinstance(value, cls):
    478             return value
--> 479         return cls(value, **kwargs)
    480 
    481     def get_geod(self) -> Optional[Geod]:

~\anaconda3\envs\boba-nyc\lib\site-packages\pyproj\crs\crs.py in __init__(self, projparams, **kwargs)
    324             self._local.crs = projparams
    325         else:
--> 326             self._local.crs = _CRS(self.srs)
    327 
    328     @property

pyproj\_crs.pyx in pyproj._crs._CRS.__init__()

CRSError: Invalid projection: EPSG:4326: (Internal Proj Error: proj_create: no database context specified)
df.head()
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.9962197 40.7191742)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.9971 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.984539 40.720597)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775000000001 40.7288)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919100000001 40.735516)
gdf = gpd.GeoDataFrame(df, crs=4326,
      geometry=gpd.points_from_xy(df.longitude, df.latitude))

gdf.head()
id alias name image_url is_closed url review_count categories rating coordinates transactions price location phone display_phone distance latitude longitude geometry
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ['pickup', 'delivery'] $$ {'address1': '208 Grand St', 'address2': None,... 1.646559e+10 (646) 559-2886 1535.241614 40.719174 -73.996220 POINT (-73.99622 40.71917)
1 7-bx74TooPuZKZDNW4WFcQ chun-yang-tea-new-york Chun Yang Tea https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... False https://www.yelp.com/biz/chun-yang-tea-new-yor... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.71617, 'longitude': -73.9971} ['pickup', 'delivery'] $$ {'address1': '26B Elizabeth St', 'address2': '... 1.212420e+10 (212) 420-0123 1212.877370 40.716170 -73.997100 POINT (-73.99710 40.71617)
2 3aypSFXLfkAL4dhHVFobKg lazy-sundaes-new-york-6 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... False https://www.yelp.com/biz/lazy-sundaes-new-york... 32 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.5 {'latitude': 40.720597, 'longitude': -73.984539} ['pickup', 'delivery'] $$ {'address1': '23 Clinton St', 'address2': None... NaN NaN 1877.016204 40.720597 -73.984539 POINT (-73.98454 40.72060)
3 bVJQEeRNi34-3XN_F1AZEg xing-fu-tang-new-york Xing Fu Tang https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... False https://www.yelp.com/biz/xing-fu-tang-new-york... 91 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7288, 'longitude': -73.98775} ['pickup', 'delivery'] NaN {'address1': '133 2nd Ave', 'address2': '', 'a... NaN NaN 2656.450427 40.728800 -73.987750 POINT (-73.98775 40.72880)
4 zupVwJAFYkHDwrsQd2ktXA fiftylan-union-square-new-york-3 FIFTYLAN Union Square https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... False https://www.yelp.com/biz/fiftylan-union-square... 60 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.735516, 'longitude': -73.989191} ['pickup', 'delivery'] NaN {'address1': '32 Union Square E', 'address2': ... 1.646767e+10 (646) 767-0085 3367.900728 40.735516 -73.989191 POINT (-73.98919 40.73552)
join_df = gpd.sjoin(gdf, 
                    neighborhoods, 
                    op='intersects') 

join_df.head()
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\IPython\core\interactiveshell.py:3377: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if (await self.run_code(code, result,  async_=asy)):
id alias name image_url is_closed url review_count categories rating coordinates ... longitude geometry index_right boro_code boro_name county_fip ntacode ntaname shape_area shape_leng
0 Q3fmTHT7zilDWtfzLK9lMA truedan-new-york-3 Truedan https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... False https://www.yelp.com/biz/truedan-new-york-3?ad... 99 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.5 {'latitude': 40.7191742, 'longitude': -73.9962... ... -73.996220 POINT (-73.99622 40.71917) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
16 DbhBca8LzskjS0B-L4QfZg tiger-sugar-new-york tigersugar https://s3-media3.fl.yelpcdn.com/bphoto/5wNVyj... False https://www.yelp.com/biz/tiger-sugar-new-york?... 515 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] 4.0 {'latitude': 40.717891007822274, 'longitude': ... ... -73.999780 POINT (-73.99978 40.71789) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
29 bxCjNLVDQggNTbpviV0RMw lazy-sundaes-new-york-4 Lazy Sundaes https://s3-media3.fl.yelpcdn.com/bphoto/iDul5E... False https://www.yelp.com/biz/lazy-sundaes-new-york... 65 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.7189689, 'longitude': -74.0006... ... -74.000677 POINT (-74.00068 40.71897) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
60 jsJdz6pvyYZjtut9vkUwBw yaya-tea-grand-new-york-2 Yaya Tea Grand https://s3-media2.fl.yelpcdn.com/bphoto/iutBCc... False https://www.yelp.com/biz/yaya-tea-grand-new-yo... 295 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 4.0 {'latitude': 40.7192268371582, 'longitude': -7... ... -73.996277 POINT (-73.99628 40.71923) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527
72 uge-Lp_kY1J6Z4CzzceULw honestea-new-york Honestea https://s3-media1.fl.yelpcdn.com/bphoto/77X7Xd... False https://www.yelp.com/biz/honestea-new-york?adj... 104 [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... 3.5 {'latitude': 40.71789, 'longitude': -73.99692} ... -73.996920 POINT (-73.99692 40.71789) 94 1.0 Manhattan 061 MN24 SoHo-TriBeCa-Civic Center-Little Italy 2.503268e+07 26855.031527

5 rows × 27 columns

join_df = join_df.groupby(by=['ntaname', 'shape_area'])['id'].count().sort_values(ascending=False)
join_df = join_df.reset_index()

join_df = join_df.rename(columns={'id':'counts'})
join_df['counts_squaremile'] = join_df['counts'] / (join_df['shape_area'] / 27878400)

join_df.head()
ntaname shape_area counts counts_squaremile
0 Flushing 3.786424e+07 40 29.450898
1 Chinatown 1.450188e+07 28 53.827184
2 Sunset Park East 2.711007e+07 23 23.651849
3 Hunters Point-Sunnyside-West Maspeth 1.024191e+08 19 5.171787
4 Elmhurst 3.267480e+07 17 14.504535
fig, ax = plt.subplots(figsize=(10, 6))
data = join_df.sort_values(by='counts', ascending=False).head(20)

sns.barplot(x="counts", 
            y="ntaname", 
            data=data, 
            ax=ax)

plt.title('Most bubble tea locations per neighborhood in NYC', fontsize=15)
plt.ylabel('neighborhood')
plt.xlabel('count')

plt.tight_layout()
plt.savefig('busineses-per-neighborhood.png', dpi=200)
_images/boba-analysis-nyc_30_0.png
fig, ax = plt.subplots(figsize=(10, 6))
data = join_df.sort_values(by='counts_squaremile', ascending=False).head(20)

sns.barplot(x="counts_squaremile", 
            y="ntaname", 
            data=data, 
            ax=ax)

plt.suptitle('Most bubble tea locations per square mile by neighborhood in NYC', 
             fontsize=15,
             y=.96, x=.60)
plt.ylabel('neighborhood')
plt.xlabel('count per square mile')

plt.tight_layout()
plt.savefig('busineses-per-neighborhood.png', dpi=200)
_images/boba-analysis-nyc_31_0.png