@st.cache(allow_output_mutation=True)
def load_data():
"""
Loads the data
"""
import gdown
= 'https://drive.google.com/uc?id=1OD2l7ynVzLlqY92gYiCx5xq32-D1wJMe'
url = 'one1.pkl'
output
gdown.download(url, output)
= os.getcwd()
cda with open('one1.pkl', 'rb')as f:
= pickle.load(f)
m2, to2, xsAll2, yAll2 return m2, to2, xsAll2, yAll2
= st.text('Loading data...')
data_load_state =load_data()
m1, to1, xsAll1, yAll1"Loaded data (using st.cache)") data_load_state.text(
Creating a property prediction App
Using Python and Streamlit to create a property price app using machine learning
Overview
This is an app for predicting house prices in Swansea using Streamlit
The app uses the following for predictions:
- House price sales (price and date)
- Name of street and number
- Location of property (Lattitude and Longitude)
- Data on Census regions
More details of the prediction side of the app, using Random Forrests and neural networks, is dealt with here.
A video of the app is shown below or can be accessed via property app
The intention of the app is to provide a simple user interface to allow price predictions
The Python Script for the App
Loading the data
The following function is used to load the data.
The data consists of 4 parts: 1. m = the model (this is by far the biggest part in terms of data size) 1. to = a data frame object of the house data 1. xsAll = this is a form of ‘to’ used by the model (the input values of the model) 1. yAll = a form of ‘to’ the values the model is trying to predict (house price values)
The data is saved as a pickle file in a Jupyter Notebook, then uploaded to googledrive as filesize has to be less than 100 Mb on GitHub.
gdown.download(url, output)
downloads the data from GoogleDrive
@st.cache(allow_output_mutation=True)
This line at the top of the python file means the data is cached and don’t need to keep loading it
Updating data so prediction is for today
The following function changes the date of sale details for each property so that they are today’s date
def add_datepart(df, field_name, prefix=None, drop=True, time=False):
Getting the predictions
The following function takes the address selected and outputs the predicted property price
get_predTodayNotExact(m,address,toTEMP,xs_final,y)
Help with reducing code for selecting data
The following function is used to allow to select different details from the dataframes. Really just a function to get around the slightly unusual way data is selected in ‘to’ and dataFrames
def doSelect(typee,option2,typeeOut,toTEMP):
The user interface parts of the app
optionSELECT = st.sidebar.selectbox( 'Select how to search', choice)
This is a select box loacted in the LHS sidebar. The options for the user are as follows and choice dictates what boxes they see:
choice=['Post Code','Region', 'Street']
For example if ‘Post code’ is selected the folowing selectbox is shown with all post code options for first part of the postcode:
option2 = st.sidebar.selectbox( 'Select Postcode', indexPCSA)
Whatever option selected the individual address must be selected:
address = st.sidebar.selectbox( 'Select Address', AdSel)
Predictions and Output
Once the address is selected, teh predictions are found by calling the prediction function:
Pri1, Pri2, typa=get_predTodayNotExact(m,address,(to),(xsAll),(yAll))
And then displayed in the main screen with the following code:
'You selected: ', option2, 'and', address
'Property type is ',typa
stra = 'The predicted price is: ' st.subheader(stra) st.header('£'+ str(Pri1[0])+'k')
Creating the App
The Python .py file along with a requirements file are uploaded to a GitHub repository. The one for this is at git hub PropertyStreamlit
The requirements file includes all the modules not included in python that need to be loaded. For this function the file is as follows:
numpy
pandas
matplotlib
gdown
scikit-learn
fastai
Then simply log into Streamlit select the repository and the python file and hit go to get the app working
Full Python function
import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle
import copy
from sklearn.ensemble import RandomForestRegressor
'Swansea Property Price Predictor')
st.title(
@st.cache(allow_output_mutation=True)
def load_data():
"""
Loads the data
"""
import gdown
= 'https://drive.google.com/uc?id=1OD2l7ynVzLlqY92gYiCx5xq32-D1wJMe'
url = 'one1.pkl'
output
gdown.download(url, output)
= os.getcwd()
cda with open('one1.pkl', 'rb')as f:
= pickle.load(f)
m2, to2, xsAll2, yAll2 return m2, to2, xsAll2, yAll2
= st.text('Loading data...')
data_load_state =load_data()
m1, to1, xsAll1, yAll1"Loaded data (using st.cache)")
data_load_state.text(
def add_datepart(df, field_name, prefix=None, drop=True, time=False):
"Helper function that adds columns relevant to a date in the column `field_name` of `df`."
import re
import pandas as pd
import numpy as np
def ifnone(a, b):
"`b` if `a` is None else `a`"
return b if a is None else a
def make_date(df, date_field):
"Make sure `df[date_field]` is of the right date type."
= df[date_field].dtype
field_dtype if isinstance(field_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype):
= np.datetime64
field_dtype if not np.issubdtype(field_dtype, np.datetime64):
= pd.to_datetime(df[date_field], infer_datetime_format=True)
df[date_field]
make_date(df, field_name)= df[field_name]
field = ifnone(prefix, re.sub('[Dd]ate$', '', field_name))
prefix = ['Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear', 'Is_month_end', 'Is_month_start',
attr 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start']
if time: attr = attr + ['Hour', 'Minute', 'Second']
# Pandas removed `dt.week` in v1.1.10
= field.dt.isocalendar().week.astype(field.dt.day.dtype) if hasattr(field.dt, 'isocalendar') else field.dt.week
week for n in attr: df[prefix + n] = getattr(field.dt, n.lower()) if n != 'Week' else week
= ~field.isna()
mask + 'Elapsed'] = np.where(mask,field.values.astype(np.int64) // 10 ** 9,np.nan)
df[prefix if drop: df.drop(field_name, axis=1, inplace=True)
return df
def get_predTodayNotExact(m,address,toTEMP,xs_final,y):
"""
Given model m, address, initial pd of houses to, and adjusted pd xs_final
output is house price prediction
"""
import copy
# convert to current date
=xs_final.columns
colsAll=['Type', 'Index of Multiple Deprivation', 'Latitude',
colsNoDate'Average Income', 'Longitude', 'Postcode', 'Introduced', 'Address',
'Altitude']
=copy.copy(xs_final.drop(columns=['Elapsed','Year']))
xsNoDate
'Date'] = pd.to_datetime("today")
xsNoDate[= add_datepart(xsNoDate, 'Date')
xsNoDate =xsNoDate.loc[:,colsAll]
xs_finalTEMP
# each address has a unique number
=toTEMP.classes['Address']
aa# findwhich number is address give (take 1st if more than 1)
try:
=[ii for ii,aa1 in enumerate(aa) if aa1== address][0]
ii# 1 address can have multiple sales so we need index in dataframes
=toTEMP[toTEMP['Address']==ii].index[0]
ii
= np.round( np.exp( m.predict(xs_finalTEMP.loc[ii:ii]) )/1000 ,1)
preda = np.round( np.exp(y.loc[ii])/1000 ,1)
prev
=toTEMP.classes['Type']
typeAll=typeAll[xs_finalTEMP.loc[ii:ii,'Type']][0]
typa
except:
=toTEMP.classes['Address']
aa=toTEMP.classes['Street']
aaStreet=[ii for ii,aa1 in enumerate(aaStreet) if aa1== Street][0]
ii=copy.copy( xs_finalTEMP[xs_finalTEMP['Street']==ii] )
xsTemp=True,drop=True)
xsTemp.reset_index(inplace# find nearest house by houseno
=np.array(xsTemp['HouseNo'])
No
=(np.abs(No-HouseNo))
yo=np.min(yo)
yo1# get index of the nearest house
=No[yo==yo1][0]
yo
=[ii for ii,aa1 in enumerate(xsTemp.HouseNo) if aa1== yo][0]
ii'HouseNo']=HouseNo
xsTemp.loc[ii:ii,# If want to change house type
# xsTemp.loc[ii:ii,'Type']=2
# print(xsTemp.loc[ii:ii,'Type'])
# print(xsTemp.loc[ii:ii])
= np.round( np.exp( m.predict(xsTemp.loc[ii:ii]) )/1000 ,1)[0]
preda =0
prev
=toTEMP.classes['Type']
typeAll=typeAll[xsTemp.loc[ii:ii,'Type']][0]
typa
return preda, prev, typa
def doSelect(typee,option2,typeeOut,toTEMP):
=toTEMP.classes[typee]
streetAll=toTEMP.classes[typeeOut]
AdAll# this finds index of postcode for example SA1 0EA = 62
=[ita for ita,ij in enumerate(streetAll) if ij==option2][0]
indexPC1
# finds all indexes of addresses with given post code index
=[ita for ita, ij in enumerate(toTEMP[typee]) if ij==indexPC1]
indexAdds
# Find address index numbers for those given above
=toTEMP.iloc[indexAdds][typeeOut]
indexAddSel
# Convert these to actual addresses
=AdAll[indexAddSel]
AdSel
# unique values
=np.unique(AdSel)
AdSelreturn AdSel
=['SA1', 'SA2', 'SA3', 'SA4', 'SA5', 'SA6', 'SA7', 'SA8',
pcodesSA'SA9', 'SA10' ,'SA11', 'SA12', 'SA13', 'SA14','SA15','SA18']
=['Post Code','Region', 'Street']
choice
=copy.copy(to1)
to=copy.copy(m1)
m=copy.copy(xsAll1)
xsAll=copy.copy(yAll1)
yAll# These are the list of all addresses etc by actual name
=(to.classes['Address'])
AdAll=(to.classes['Postcode'])
pcAll=(to.classes['Region'])
regionAll=(to.classes['Street'])
streetAll
# An optionbox- Select How search
= st.sidebar.selectbox(
optionSELECT 'Select how to search',
choice)
if optionSELECT=='Post Code':
# An optionbox- Select Postcode Start e.g. SA1
= st.sidebar.selectbox(
option 'Select Area',
pcodesSA)
# Select Postcode All
# This finds a set of postcodes given by optionbox
=[ij for ij in pcAll if ij.split(' ')[0]==option]
indexPCSA
# optionbox to select particular postcode
# Outcome e.g. SA1 0EA
= st.sidebar.selectbox(
option2 'Select Postcode',
indexPCSA)
= doSelect(typee='Postcode',option2=option2,typeeOut='Address',toTEMP=(to))
AdSel
elif optionSELECT=='Region':
= st.sidebar.selectbox(
option2 'Select Region',
regionAll)
= doSelect(typee='Region',option2=option2,typeeOut='Street',toTEMP=(to))
StreetSel
= st.sidebar.selectbox(
option3 'Select Street',
StreetSel)
= doSelect(typee='Street',option2=option3,typeeOut='Address',toTEMP=(to))
AdSel
elif optionSELECT=='Street':
= st.sidebar.selectbox(
option2 'Select Street',
streetAll)
= doSelect(typee='Street',option2=option2,typeeOut='Address',toTEMP=(to))
AdSel
= st.sidebar.selectbox(
address 'Select Address',
AdSel)
=get_predTodayNotExact(m,address,(to),(xsAll),(yAll))
Pri1, Pri2, typa
#tell user what they selected
'You selected: ', option2, 'and', address
'Property type is ',typa
= 'The predicted price is: '
stra
st.subheader(stra)'£'+ str(Pri1[0])+'k')
st.header(