Obsessed with Boba? Analyzing Bubble Tea Shops in NYC Using the Yelp Fusion API
Obsessed with Boba? Analyzing Bubble Tea Shops in NYC Using the Yelp Fusion API¶
Exploratory Data Analysis
# # imports for Google Colab Sessions
# !apt install gdal-bin python-gdal python3-gdal
# # Install rtree - Geopandas requirment
# !apt install python3-rtree
# # Install Geopandas
# !pip install git+git://github.com/geopandas/geopandas.git
# # Install descartes - Geopandas requirment
# !pip install descartes
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(color_codes=True)
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\pyproj\__init__.py:89: UserWarning: pyproj unable to set database path.
_pyproj_global_context_initialize()
# google colab path to data
url = 'https://raw.githubusercontent.com/mebauer/boba-nyc/master/teabook/boba-nyc.csv'
df = pd.read_csv(url)
# # local path to data
# df = pd.read_csv('boba-nyc.csv')
df.head()
id | alias | name | image_url | is_closed | url | review_count | categories | rating | coordinates | transactions | price | location | phone | display_phone | distance | latitude | longitude | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Q3fmTHT7zilDWtfzLK9lMA | truedan-new-york-3 | Truedan | https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... | False | https://www.yelp.com/biz/truedan-new-york-3?ad... | 99 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7191742, 'longitude': -73.9962... | ['pickup', 'delivery'] | $$ | {'address1': '208 Grand St', 'address2': None,... | 1.646559e+10 | (646) 559-2886 | 1535.241614 | 40.719174 | -73.996220 | POINT (-73.9962197 40.7191742) |
1 | 7-bx74TooPuZKZDNW4WFcQ | chun-yang-tea-new-york | Chun Yang Tea | https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... | False | https://www.yelp.com/biz/chun-yang-tea-new-yor... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.71617, 'longitude': -73.9971} | ['pickup', 'delivery'] | $$ | {'address1': '26B Elizabeth St', 'address2': '... | 1.212420e+10 | (212) 420-0123 | 1212.877370 | 40.716170 | -73.997100 | POINT (-73.9971 40.71617) |
2 | 3aypSFXLfkAL4dhHVFobKg | lazy-sundaes-new-york-6 | Lazy Sundaes | https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... | False | https://www.yelp.com/biz/lazy-sundaes-new-york... | 32 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.5 | {'latitude': 40.720597, 'longitude': -73.984539} | ['pickup', 'delivery'] | $$ | {'address1': '23 Clinton St', 'address2': None... | NaN | NaN | 1877.016204 | 40.720597 | -73.984539 | POINT (-73.984539 40.720597) |
3 | bVJQEeRNi34-3XN_F1AZEg | xing-fu-tang-new-york | Xing Fu Tang | https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... | False | https://www.yelp.com/biz/xing-fu-tang-new-york... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7288, 'longitude': -73.98775} | ['pickup', 'delivery'] | NaN | {'address1': '133 2nd Ave', 'address2': '', 'a... | NaN | NaN | 2656.450427 | 40.728800 | -73.987750 | POINT (-73.98775000000001 40.7288) |
4 | zupVwJAFYkHDwrsQd2ktXA | fiftylan-union-square-new-york-3 | FIFTYLAN Union Square | https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... | False | https://www.yelp.com/biz/fiftylan-union-square... | 60 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.0 | {'latitude': 40.735516, 'longitude': -73.989191} | ['pickup', 'delivery'] | NaN | {'address1': '32 Union Square E', 'address2': ... | 1.646767e+10 | (646) 767-0085 | 3367.900728 | 40.735516 | -73.989191 | POINT (-73.98919100000001 40.735516) |
# preview last five rows
df.tail()
id | alias | name | image_url | is_closed | url | review_count | categories | rating | coordinates | transactions | price | location | phone | display_phone | distance | latitude | longitude | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
409 | QBebLBqhiuDlWasPgp6s4Q | tbaar-flushing-5 | TBaar | https://s3-media1.fl.yelpcdn.com/bphoto/bgao-b... | False | https://www.yelp.com/biz/tbaar-flushing-5?adju... | 2 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 3.5 | {'latitude': 40.759357, 'longitude': -73.830396} | ['pickup', 'delivery'] | NaN | {'address1': '135-52 Roosevelt Ave', 'address2... | NaN | NaN | 15054.917573 | 40.759357 | -73.830396 | POINT (-73.83039599999999 40.759357) |
410 | 7apqk24GsFC9dptoD5dDZg | uncleman-cafe-new-york | Uncleman Cafe | NaN | False | https://www.yelp.com/biz/uncleman-cafe-new-yor... | 1 | [{'alias': 'desserts', 'title': 'Desserts'}, {... | 1.0 | {'latitude': 40.71781, 'longitude': -73.99795} | [] | NaN | {'address1': '122 Mulberry St', 'address2': No... | 1.212967e+10 | (212) 966-6206 | 1414.238245 | 40.717810 | -73.997950 | POINT (-73.99795 40.71781) |
411 | Vz6hbICqQfyMWq9wzAKakw | tbaar-flushing-4 | TBaar | https://s3-media1.fl.yelpcdn.com/bphoto/phUYWa... | False | https://www.yelp.com/biz/tbaar-flushing-4?adju... | 6 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.0 | {'latitude': 40.7613116, 'longitude': -73.8308... | [] | NaN | {'address1': '37-11 Main St', 'address2': '', ... | NaN | NaN | 15147.410738 | 40.761312 | -73.830812 | POINT (-73.8308122 40.7613116) |
412 | YBgAPbviWYtVP2gAUhc7_A | t-baar-flushing | TBaar | https://s3-media4.fl.yelpcdn.com/bphoto/8pk-_Q... | False | https://www.yelp.com/biz/t-baar-flushing?adjus... | 8 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 3.5 | {'latitude': 40.7587509, 'longitude': -73.830162} | [] | $ | {'address1': '135-36 40th Rd', 'address2': '',... | NaN | NaN | 15045.970192 | 40.758751 | -73.830162 | POINT (-73.830162 40.7587509) |
413 | f5UVQXvDpOrrIZdCjsw47w | maxins-cafe-flushing | Maxin's Cafe | https://s3-media1.fl.yelpcdn.com/bphoto/S3vWvD... | False | https://www.yelp.com/biz/maxins-cafe-flushing?... | 15 | [{'alias': 'bakeries', 'title': 'Bakeries'}, {... | 3.5 | {'latitude': 40.7585, 'longitude': -73.83064} | [] | $ | {'address1': '135-24 40th Rd', 'address2': '',... | 1.718887e+10 | (718) 886-9200 | 14997.842205 | 40.758500 | -73.830640 | POINT (-73.83064 40.7585) |
rows, columns = df.shape
print('number of rows: {}\nnumber of columns: {}'.format(rows, columns))
number of rows: 414
number of columns: 19
# review concise summary of data
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 19 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 414 non-null object
1 alias 414 non-null object
2 name 414 non-null object
3 image_url 409 non-null object
4 is_closed 414 non-null bool
5 url 414 non-null object
6 review_count 414 non-null int64
7 categories 414 non-null object
8 rating 414 non-null float64
9 coordinates 414 non-null object
10 transactions 414 non-null object
11 price 258 non-null object
12 location 414 non-null object
13 phone 362 non-null float64
14 display_phone 362 non-null object
15 distance 414 non-null float64
16 latitude 414 non-null float64
17 longitude 414 non-null float64
18 geometry 414 non-null object
dtypes: bool(1), float64(5), int64(1), object(12)
memory usage: 58.7+ KB
# identifiying number of nulls and percentage of total per column
ser1 = df.isnull().sum().sort_values(ascending=False)
ser2 = round((df.isnull().sum().sort_values(ascending=False) / len(df)) * 100, 2)
pd.concat([ser1.rename('null_count'), ser2.rename('null_perc')], axis=1)
null_count | null_perc | |
---|---|---|
price | 156 | 37.68 |
display_phone | 52 | 12.56 |
phone | 52 | 12.56 |
image_url | 5 | 1.21 |
id | 0 | 0.00 |
transactions | 0 | 0.00 |
longitude | 0 | 0.00 |
latitude | 0 | 0.00 |
distance | 0 | 0.00 |
location | 0 | 0.00 |
coordinates | 0 | 0.00 |
alias | 0 | 0.00 |
rating | 0 | 0.00 |
categories | 0 | 0.00 |
review_count | 0 | 0.00 |
url | 0 | 0.00 |
is_closed | 0 | 0.00 |
name | 0 | 0.00 |
geometry | 0 | 0.00 |
# descriptive statistics of numeric columns
df.describe()
review_count | rating | phone | distance | latitude | longitude | |
---|---|---|---|---|---|---|
count | 414.000000 | 414.000000 | 3.620000e+02 | 414.000000 | 414.000000 | 414.000000 |
mean | 89.722222 | 3.985507 | 1.613962e+10 | 8980.359386 | 40.716170 | -73.941997 |
std | 116.765312 | 0.608481 | 2.375761e+09 | 5684.076763 | 0.058255 | 0.084129 |
min | 1.000000 | 1.000000 | 1.201432e+10 | 953.875131 | 40.537609 | -74.166080 |
25% | 16.250000 | 3.500000 | 1.347732e+10 | 3954.891025 | 40.689505 | -73.994920 |
50% | 49.000000 | 4.000000 | 1.718286e+10 | 8254.243639 | 40.731610 | -73.975096 |
75% | 109.000000 | 4.500000 | 1.718886e+10 | 13019.588280 | 40.757300 | -73.880179 |
max | 740.000000 | 5.000000 | 1.934235e+10 | 25132.157979 | 40.886959 | -73.702410 |
# descriptive statistics of string/object columns
df.describe(include=['O']).T
count | unique | top | freq | |
---|---|---|---|---|
id | 414 | 414 | Q3fmTHT7zilDWtfzLK9lMA | 1 |
alias | 414 | 414 | truedan-new-york-3 | 1 |
name | 414 | 259 | Kung Fu Tea | 26 |
image_url | 409 | 409 | https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... | 1 |
url | 414 | 414 | https://www.yelp.com/biz/truedan-new-york-3?ad... | 1 |
categories | 414 | 177 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 115 |
coordinates | 414 | 409 | {'latitude': 40.71697, 'longitude': -73.99471} | 2 |
transactions | 414 | 5 | ['pickup', 'delivery'] | 229 |
price | 258 | 3 | $ | 166 |
location | 414 | 411 | {'address1': '90-15 Queens Blvd', 'address2': ... | 2 |
display_phone | 362 | 357 | (718) 395-8297 | 2 |
geometry | 414 | 409 | POINT (-73.99471 40.71697) | 2 |
# confirm that unique id is actually unique
print('id is unique: {}'.format(df['id'].is_unique))
id is unique: True
df.head()
id | alias | name | image_url | is_closed | url | review_count | categories | rating | coordinates | transactions | price | location | phone | display_phone | distance | latitude | longitude | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Q3fmTHT7zilDWtfzLK9lMA | truedan-new-york-3 | Truedan | https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... | False | https://www.yelp.com/biz/truedan-new-york-3?ad... | 99 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7191742, 'longitude': -73.9962... | ['pickup', 'delivery'] | $$ | {'address1': '208 Grand St', 'address2': None,... | 1.646559e+10 | (646) 559-2886 | 1535.241614 | 40.719174 | -73.996220 | POINT (-73.9962197 40.7191742) |
1 | 7-bx74TooPuZKZDNW4WFcQ | chun-yang-tea-new-york | Chun Yang Tea | https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... | False | https://www.yelp.com/biz/chun-yang-tea-new-yor... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.71617, 'longitude': -73.9971} | ['pickup', 'delivery'] | $$ | {'address1': '26B Elizabeth St', 'address2': '... | 1.212420e+10 | (212) 420-0123 | 1212.877370 | 40.716170 | -73.997100 | POINT (-73.9971 40.71617) |
2 | 3aypSFXLfkAL4dhHVFobKg | lazy-sundaes-new-york-6 | Lazy Sundaes | https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... | False | https://www.yelp.com/biz/lazy-sundaes-new-york... | 32 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.5 | {'latitude': 40.720597, 'longitude': -73.984539} | ['pickup', 'delivery'] | $$ | {'address1': '23 Clinton St', 'address2': None... | NaN | NaN | 1877.016204 | 40.720597 | -73.984539 | POINT (-73.984539 40.720597) |
3 | bVJQEeRNi34-3XN_F1AZEg | xing-fu-tang-new-york | Xing Fu Tang | https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... | False | https://www.yelp.com/biz/xing-fu-tang-new-york... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7288, 'longitude': -73.98775} | ['pickup', 'delivery'] | NaN | {'address1': '133 2nd Ave', 'address2': '', 'a... | NaN | NaN | 2656.450427 | 40.728800 | -73.987750 | POINT (-73.98775000000001 40.7288) |
4 | zupVwJAFYkHDwrsQd2ktXA | fiftylan-union-square-new-york-3 | FIFTYLAN Union Square | https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... | False | https://www.yelp.com/biz/fiftylan-union-square... | 60 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.0 | {'latitude': 40.735516, 'longitude': -73.989191} | ['pickup', 'delivery'] | NaN | {'address1': '32 Union Square E', 'address2': ... | 1.646767e+10 | (646) 767-0085 | 3367.900728 | 40.735516 | -73.989191 | POINT (-73.98919100000001 40.735516) |
# identify number of unique bubble tea shop entries
names_counts = df['name'].value_counts().reset_index()
names_counts = names_counts.rename(columns={'index':'name', 'name':'counts'})
print('number of unique bubble tea shops: {}'.format(len(names_counts)))
# save file
name_counts_file_path = '../teaapp/name_counts.csv'
names_counts.to_csv(name_counts_file_path)
# view dataframe
names_counts
number of unique bubble tea shops: 259
name | counts | |
---|---|---|
0 | Kung Fu Tea | 26 |
1 | Vivi Bubble Tea | 25 |
2 | Gong Cha | 24 |
3 | CoCo Fresh Tea & Juice | 10 |
4 | Möge Tee | 9 |
... | ... | ... |
254 | Chokolat Patisserie & Culture Tea Bar | 1 |
255 | Siips Bubble | 1 |
256 | Yummy Boba Desserts | 1 |
257 | Bread & Joe | 1 |
258 | Maxin's Cafe | 1 |
259 rows × 2 columns
df['name'].value_counts().reset_index(drop=False)
index | name | |
---|---|---|
0 | Kung Fu Tea | 26 |
1 | Vivi Bubble Tea | 25 |
2 | Gong Cha | 24 |
3 | CoCo Fresh Tea & Juice | 10 |
4 | Möge Tee | 9 |
... | ... | ... |
254 | Chokolat Patisserie & Culture Tea Bar | 1 |
255 | Siips Bubble | 1 |
256 | Yummy Boba Desserts | 1 |
257 | Bread & Joe | 1 |
258 | Maxin's Cafe | 1 |
259 rows × 2 columns
names_counts = df['name'].value_counts().reset_index(drop=False)
names_counts = names_counts.rename(columns={'index':'names', 'name':'counts'})
fig, ax = plt.subplots(figsize=(8, 6))
sns.barplot(x='counts',
y="names",
data=names_counts.head(10),
ax=ax)
plt.title('Number of bubble tea shops by business in nyc', fontsize=15)
plt.tight_layout()
review_count_df = df.groupby(by='name')['review_count'].mean().sort_values(ascending=False)
review_count_df = round(review_count_df, 2)
review_count_df = review_count_df.reset_index()
review_count_df.head()
name | review_count | |
---|---|---|
0 | Ten Ren's Tea Time | 656.0 |
1 | Hanco's | 519.0 |
2 | Ajisen Ramen | 438.0 |
3 | Teado Tea Shop | 428.0 |
4 | 520 Dessert | 358.0 |
fig, ax = plt.subplots(figsize=(8, 6))
sns.barplot(x="review_count",
y="name",
data=review_count_df.head(20),
ax=ax)
plt.title('Average number of reviews per business in nyc', fontsize=15)
plt.tight_layout()
C:\Users\datal\AppData\Local\Temp\ipykernel_20044\1422840523.py:9: UserWarning: Glyph 27792 (\N{CJK UNIFIED IDEOGRAPH-6C90}) missing from current font.
plt.tight_layout()
C:\Users\datal\AppData\Local\Temp\ipykernel_20044\1422840523.py:9: UserWarning: Glyph 30333 (\N{CJK UNIFIED IDEOGRAPH-767D}) missing from current font.
plt.tight_layout()
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\IPython\core\pylabtools.py:151: UserWarning: Glyph 27792 (\N{CJK UNIFIED IDEOGRAPH-6C90}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\IPython\core\pylabtools.py:151: UserWarning: Glyph 30333 (\N{CJK UNIFIED IDEOGRAPH-767D}) missing from current font.
fig.canvas.print_figure(bytes_io, **kw)
most_reviewed = df.sort_values(by='review_count', ascending=False).head(20)
most_reviewed.head()
id | alias | name | image_url | is_closed | url | review_count | categories | rating | coordinates | transactions | price | location | phone | display_phone | distance | latitude | longitude | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
170 | bn0zZ9nT_j1INOwfpxWmEw | tiger-sugar-flushing | tigersugar | https://s3-media2.fl.yelpcdn.com/bphoto/hai87B... | False | https://www.yelp.com/biz/tiger-sugar-flushing?... | 740 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.0 | {'latitude': 40.75922023369396, 'longitude': -... | ['pickup', 'delivery'] | $$ | {'address1': '40-10 Main St', 'address2': 'Ste... | 1.347732e+10 | (347) 732-4001 | 15055.920826 | 40.759220 | -73.830305 | POINT (-73.83030496537685 40.75922023369396) |
90 | NqnYnZtajJ4Y-Lmxa26VdA | hancos-brooklyn | Hanco's | https://s3-media3.fl.yelpcdn.com/bphoto/_vQWWX... | False | https://www.yelp.com/biz/hancos-brooklyn?adjus... | 704 | [{'alias': 'vietnamese', 'title': 'Vietnamese'... | 4.0 | {'latitude': 40.6870318621828, 'longitude': -7... | ['pickup', 'delivery'] | $ | {'address1': '134 Smith St', 'address2': '', '... | 1.718859e+10 | (718) 858-6818 | 2073.182530 | 40.687032 | -73.990429 | POINT (-73.9904292380982 40.6870318621828) |
51 | WxNlqGSsj_2TCodkx9Sa4A | vivi-bubble-tea-new-york-6 | Vivi Bubble Tea | https://s3-media2.fl.yelpcdn.com/bphoto/Fw3SYB... | False | https://www.yelp.com/biz/vivi-bubble-tea-new-y... | 658 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.0 | {'latitude': 40.715099, 'longitude': -73.997535} | ['pickup', 'delivery'] | $ | {'address1': '49 Bayard St', 'address2': '', '... | 1.212567e+10 | (212) 566-6833 | 1114.073275 | 40.715099 | -73.997535 | POINT (-73.997535 40.715099) |
45 | SqVHETmH6bf3rPKmdftvvw | ten-rens-tea-time-new-york-2 | Ten Ren's Tea Time | https://s3-media1.fl.yelpcdn.com/bphoto/UChdn5... | False | https://www.yelp.com/biz/ten-rens-tea-time-new... | 656 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.0 | {'latitude': 40.7163819, 'longitude': -73.9982... | ['delivery'] | $$ | {'address1': '73 Mott St', 'address2': '', 'ad... | 1.212733e+10 | (212) 732-7178 | 1261.450313 | 40.716382 | -73.998268 | POINT (-73.99826830000001 40.7163819) |
266 | ZLo3HRSSvN--Mw4eCBLxCg | mango-mango-dessert-flushing-3 | Mango Mango Dessert | https://s3-media3.fl.yelpcdn.com/bphoto/X7bnQ9... | False | https://www.yelp.com/biz/mango-mango-dessert-f... | 622 | [{'alias': 'desserts', 'title': 'Desserts'}, {... | 4.0 | {'latitude': 40.7603809317017, 'longitude': -7... | ['pickup', 'delivery'] | $ | {'address1': '136-28 39th Ave', 'address2': ''... | 1.917563e+10 | (917) 563-1847 | 15199.500511 | 40.760381 | -73.829452 | POINT (-73.8294516357502 40.7603809317017) |
fig, ax = plt.subplots(figsize=(8, 6))
sns.barplot(x="review_count",
y="alias",
data=most_reviewed,
ax=ax)
plt.title('Most reviews per business location in nyc', fontsize=15)
plt.tight_layout()
df['rating'].describe()
count 414.000000
mean 3.985507
std 0.608481
min 1.000000
25% 3.500000
50% 4.000000
75% 4.500000
max 5.000000
Name: rating, dtype: float64
fig, ax = plt.subplots(figsize=(8, 6))
sns.countplot(data=df,
x="rating")
plt.title('Count of Yelp ratings per business location in nyc', fontsize=15)
plt.tight_layout()
price_df = df['price'].dropna().value_counts()
price_df = price_df.reset_index()
price_df.columns = ['price', 'counts']
price_df
price | counts | |
---|---|---|
0 | $ | 166 |
1 | $$ | 91 |
2 | $$$$ | 1 |
price_df['price'] = price_df['price'].str.count('\\$')
price_df
price | counts | |
---|---|---|
0 | 1 | 166 |
1 | 2 | 91 |
2 | 4 | 1 |
fig, ax = plt.subplots(figsize=(8, 6))
sns.barplot(y="counts",
x="price",
data=price_df,
ax=ax)
plt.title('Yelp price level (1 = $) per business location in NYC', fontsize=15)
plt.tight_layout()
url = 'https://data.cityofnewyork.us/api/geospatial/cpf4-rkhq?method=export&format=Shapefile'
neighborhoods = gpd.read_file(url)
neighborhoods.head()
boro_code | boro_name | county_fip | ntacode | ntaname | shape_area | shape_leng | geometry | |
---|---|---|---|---|---|---|---|---|
0 | 4.0 | Queens | 081 | QN51 | Murray Hill | 5.248828e+07 | 33266.904856 | POLYGON ((-73.80379 40.77561, -73.80099 40.775... |
1 | 4.0 | Queens | 081 | QN27 | East Elmhurst | 1.972685e+07 | 19816.711894 | POLYGON ((-73.86110 40.76366, -73.85993 40.762... |
2 | 4.0 | Queens | 081 | QN41 | Fresh Meadows-Utopia | 2.777485e+07 | 22106.431272 | POLYGON ((-73.77758 40.73019, -73.77849 40.729... |
3 | 1.0 | Manhattan | 061 | MN17 | Midtown-Midtown South | 3.019153e+07 | 27032.700375 | POLYGON ((-73.97301 40.76428, -73.97141 40.763... |
4 | 2.0 | Bronx | 005 | BX09 | Soundview-Castle Hill-Clason Point-Harding Park | 5.198380e+07 | 67340.977626 | MULTIPOLYGON (((-73.88064 40.81852, -73.88098 ... |
neighborhoods.crs
<Geographic 2D CRS: GEOGCS["WGS84(DD)",DATUM["WGS84",SPHEROID["WGS84", ...>
Name: WGS84(DD)
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- undefined
Datum: WGS84
- Ellipsoid: WGS84
- Prime Meridian: Greenwich
neighborhoods = neighborhoods.to_crs('EPSG:4326')
neighborhoods.crs
---------------------------------------------------------------------------
CRSError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_20044\3555093168.py in <module>
----> 1 neighborhoods = neighborhoods.to_crs('EPSG:4326')
2
3 neighborhoods.crs
~\anaconda3\envs\boba-nyc\lib\site-packages\geopandas\geodataframe.py in to_crs(self, crs, epsg, inplace)
1273 else:
1274 df = self.copy()
-> 1275 geom = df.geometry.to_crs(crs=crs, epsg=epsg)
1276 df.geometry = geom
1277 df.crs = geom.crs
~\anaconda3\envs\boba-nyc\lib\site-packages\geopandas\geoseries.py in to_crs(self, crs, epsg)
1118 """
1119 return GeoSeries(
-> 1120 self.values.to_crs(crs=crs, epsg=epsg), index=self.index, name=self.name
1121 )
1122
~\anaconda3\envs\boba-nyc\lib\site-packages\geopandas\array.py in to_crs(self, crs, epsg)
766 )
767 if crs is not None:
--> 768 crs = CRS.from_user_input(crs)
769 elif epsg is not None:
770 crs = CRS.from_epsg(epsg)
~\anaconda3\envs\boba-nyc\lib\site-packages\pyproj\crs\crs.py in from_user_input(cls, value, **kwargs)
477 if isinstance(value, cls):
478 return value
--> 479 return cls(value, **kwargs)
480
481 def get_geod(self) -> Optional[Geod]:
~\anaconda3\envs\boba-nyc\lib\site-packages\pyproj\crs\crs.py in __init__(self, projparams, **kwargs)
324 self._local.crs = projparams
325 else:
--> 326 self._local.crs = _CRS(self.srs)
327
328 @property
pyproj\_crs.pyx in pyproj._crs._CRS.__init__()
CRSError: Invalid projection: EPSG:4326: (Internal Proj Error: proj_create: no database context specified)
df.head()
id | alias | name | image_url | is_closed | url | review_count | categories | rating | coordinates | transactions | price | location | phone | display_phone | distance | latitude | longitude | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Q3fmTHT7zilDWtfzLK9lMA | truedan-new-york-3 | Truedan | https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... | False | https://www.yelp.com/biz/truedan-new-york-3?ad... | 99 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7191742, 'longitude': -73.9962... | ['pickup', 'delivery'] | $$ | {'address1': '208 Grand St', 'address2': None,... | 1.646559e+10 | (646) 559-2886 | 1535.241614 | 40.719174 | -73.996220 | POINT (-73.9962197 40.7191742) |
1 | 7-bx74TooPuZKZDNW4WFcQ | chun-yang-tea-new-york | Chun Yang Tea | https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... | False | https://www.yelp.com/biz/chun-yang-tea-new-yor... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.71617, 'longitude': -73.9971} | ['pickup', 'delivery'] | $$ | {'address1': '26B Elizabeth St', 'address2': '... | 1.212420e+10 | (212) 420-0123 | 1212.877370 | 40.716170 | -73.997100 | POINT (-73.9971 40.71617) |
2 | 3aypSFXLfkAL4dhHVFobKg | lazy-sundaes-new-york-6 | Lazy Sundaes | https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... | False | https://www.yelp.com/biz/lazy-sundaes-new-york... | 32 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.5 | {'latitude': 40.720597, 'longitude': -73.984539} | ['pickup', 'delivery'] | $$ | {'address1': '23 Clinton St', 'address2': None... | NaN | NaN | 1877.016204 | 40.720597 | -73.984539 | POINT (-73.984539 40.720597) |
3 | bVJQEeRNi34-3XN_F1AZEg | xing-fu-tang-new-york | Xing Fu Tang | https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... | False | https://www.yelp.com/biz/xing-fu-tang-new-york... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7288, 'longitude': -73.98775} | ['pickup', 'delivery'] | NaN | {'address1': '133 2nd Ave', 'address2': '', 'a... | NaN | NaN | 2656.450427 | 40.728800 | -73.987750 | POINT (-73.98775000000001 40.7288) |
4 | zupVwJAFYkHDwrsQd2ktXA | fiftylan-union-square-new-york-3 | FIFTYLAN Union Square | https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... | False | https://www.yelp.com/biz/fiftylan-union-square... | 60 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.0 | {'latitude': 40.735516, 'longitude': -73.989191} | ['pickup', 'delivery'] | NaN | {'address1': '32 Union Square E', 'address2': ... | 1.646767e+10 | (646) 767-0085 | 3367.900728 | 40.735516 | -73.989191 | POINT (-73.98919100000001 40.735516) |
gdf = gpd.GeoDataFrame(df, crs=4326,
geometry=gpd.points_from_xy(df.longitude, df.latitude))
gdf.head()
id | alias | name | image_url | is_closed | url | review_count | categories | rating | coordinates | transactions | price | location | phone | display_phone | distance | latitude | longitude | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Q3fmTHT7zilDWtfzLK9lMA | truedan-new-york-3 | Truedan | https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... | False | https://www.yelp.com/biz/truedan-new-york-3?ad... | 99 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7191742, 'longitude': -73.9962... | ['pickup', 'delivery'] | $$ | {'address1': '208 Grand St', 'address2': None,... | 1.646559e+10 | (646) 559-2886 | 1535.241614 | 40.719174 | -73.996220 | POINT (-73.99622 40.71917) |
1 | 7-bx74TooPuZKZDNW4WFcQ | chun-yang-tea-new-york | Chun Yang Tea | https://s3-media1.fl.yelpcdn.com/bphoto/JrGwrZ... | False | https://www.yelp.com/biz/chun-yang-tea-new-yor... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.71617, 'longitude': -73.9971} | ['pickup', 'delivery'] | $$ | {'address1': '26B Elizabeth St', 'address2': '... | 1.212420e+10 | (212) 420-0123 | 1212.877370 | 40.716170 | -73.997100 | POINT (-73.99710 40.71617) |
2 | 3aypSFXLfkAL4dhHVFobKg | lazy-sundaes-new-york-6 | Lazy Sundaes | https://s3-media3.fl.yelpcdn.com/bphoto/kEfCYC... | False | https://www.yelp.com/biz/lazy-sundaes-new-york... | 32 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.5 | {'latitude': 40.720597, 'longitude': -73.984539} | ['pickup', 'delivery'] | $$ | {'address1': '23 Clinton St', 'address2': None... | NaN | NaN | 1877.016204 | 40.720597 | -73.984539 | POINT (-73.98454 40.72060) |
3 | bVJQEeRNi34-3XN_F1AZEg | xing-fu-tang-new-york | Xing Fu Tang | https://s3-media2.fl.yelpcdn.com/bphoto/cyFHFE... | False | https://www.yelp.com/biz/xing-fu-tang-new-york... | 91 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7288, 'longitude': -73.98775} | ['pickup', 'delivery'] | NaN | {'address1': '133 2nd Ave', 'address2': '', 'a... | NaN | NaN | 2656.450427 | 40.728800 | -73.987750 | POINT (-73.98775 40.72880) |
4 | zupVwJAFYkHDwrsQd2ktXA | fiftylan-union-square-new-york-3 | FIFTYLAN Union Square | https://s3-media3.fl.yelpcdn.com/bphoto/UIwR20... | False | https://www.yelp.com/biz/fiftylan-union-square... | 60 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.0 | {'latitude': 40.735516, 'longitude': -73.989191} | ['pickup', 'delivery'] | NaN | {'address1': '32 Union Square E', 'address2': ... | 1.646767e+10 | (646) 767-0085 | 3367.900728 | 40.735516 | -73.989191 | POINT (-73.98919 40.73552) |
join_df = gpd.sjoin(gdf,
neighborhoods,
op='intersects')
join_df.head()
C:\Users\datal\anaconda3\envs\boba-nyc\lib\site-packages\IPython\core\interactiveshell.py:3377: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
if (await self.run_code(code, result, async_=asy)):
id | alias | name | image_url | is_closed | url | review_count | categories | rating | coordinates | ... | longitude | geometry | index_right | boro_code | boro_name | county_fip | ntacode | ntaname | shape_area | shape_leng | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Q3fmTHT7zilDWtfzLK9lMA | truedan-new-york-3 | Truedan | https://s3-media2.fl.yelpcdn.com/bphoto/BmUZXO... | False | https://www.yelp.com/biz/truedan-new-york-3?ad... | 99 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.5 | {'latitude': 40.7191742, 'longitude': -73.9962... | ... | -73.996220 | POINT (-73.99622 40.71917) | 94 | 1.0 | Manhattan | 061 | MN24 | SoHo-TriBeCa-Civic Center-Little Italy | 2.503268e+07 | 26855.031527 |
16 | DbhBca8LzskjS0B-L4QfZg | tiger-sugar-new-york | tigersugar | https://s3-media3.fl.yelpcdn.com/bphoto/5wNVyj... | False | https://www.yelp.com/biz/tiger-sugar-new-york?... | 515 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}] | 4.0 | {'latitude': 40.717891007822274, 'longitude': ... | ... | -73.999780 | POINT (-73.99978 40.71789) | 94 | 1.0 | Manhattan | 061 | MN24 | SoHo-TriBeCa-Civic Center-Little Italy | 2.503268e+07 | 26855.031527 |
29 | bxCjNLVDQggNTbpviV0RMw | lazy-sundaes-new-york-4 | Lazy Sundaes | https://s3-media3.fl.yelpcdn.com/bphoto/iDul5E... | False | https://www.yelp.com/biz/lazy-sundaes-new-york... | 65 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.0 | {'latitude': 40.7189689, 'longitude': -74.0006... | ... | -74.000677 | POINT (-74.00068 40.71897) | 94 | 1.0 | Manhattan | 061 | MN24 | SoHo-TriBeCa-Civic Center-Little Italy | 2.503268e+07 | 26855.031527 |
60 | jsJdz6pvyYZjtut9vkUwBw | yaya-tea-grand-new-york-2 | Yaya Tea Grand | https://s3-media2.fl.yelpcdn.com/bphoto/iutBCc... | False | https://www.yelp.com/biz/yaya-tea-grand-new-yo... | 295 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 4.0 | {'latitude': 40.7192268371582, 'longitude': -7... | ... | -73.996277 | POINT (-73.99628 40.71923) | 94 | 1.0 | Manhattan | 061 | MN24 | SoHo-TriBeCa-Civic Center-Little Italy | 2.503268e+07 | 26855.031527 |
72 | uge-Lp_kY1J6Z4CzzceULw | honestea-new-york | Honestea | https://s3-media1.fl.yelpcdn.com/bphoto/77X7Xd... | False | https://www.yelp.com/biz/honestea-new-york?adj... | 104 | [{'alias': 'bubbletea', 'title': 'Bubble Tea'}... | 3.5 | {'latitude': 40.71789, 'longitude': -73.99692} | ... | -73.996920 | POINT (-73.99692 40.71789) | 94 | 1.0 | Manhattan | 061 | MN24 | SoHo-TriBeCa-Civic Center-Little Italy | 2.503268e+07 | 26855.031527 |
5 rows × 27 columns
join_df = join_df.groupby(by=['ntaname', 'shape_area'])['id'].count().sort_values(ascending=False)
join_df = join_df.reset_index()
join_df = join_df.rename(columns={'id':'counts'})
join_df['counts_squaremile'] = join_df['counts'] / (join_df['shape_area'] / 27878400)
join_df.head()
ntaname | shape_area | counts | counts_squaremile | |
---|---|---|---|---|
0 | Flushing | 3.786424e+07 | 40 | 29.450898 |
1 | Chinatown | 1.450188e+07 | 28 | 53.827184 |
2 | Sunset Park East | 2.711007e+07 | 23 | 23.651849 |
3 | Hunters Point-Sunnyside-West Maspeth | 1.024191e+08 | 19 | 5.171787 |
4 | Elmhurst | 3.267480e+07 | 17 | 14.504535 |
fig, ax = plt.subplots(figsize=(10, 6))
data = join_df.sort_values(by='counts', ascending=False).head(20)
sns.barplot(x="counts",
y="ntaname",
data=data,
ax=ax)
plt.title('Most bubble tea locations per neighborhood in NYC', fontsize=15)
plt.ylabel('neighborhood')
plt.xlabel('count')
plt.tight_layout()
plt.savefig('busineses-per-neighborhood.png', dpi=200)
fig, ax = plt.subplots(figsize=(10, 6))
data = join_df.sort_values(by='counts_squaremile', ascending=False).head(20)
sns.barplot(x="counts_squaremile",
y="ntaname",
data=data,
ax=ax)
plt.suptitle('Most bubble tea locations per square mile by neighborhood in NYC',
fontsize=15,
y=.96, x=.60)
plt.ylabel('neighborhood')
plt.xlabel('count per square mile')
plt.tight_layout()
plt.savefig('busineses-per-neighborhood.png', dpi=200)