Climate Change and Elk Farming Calendar - PBL 02 - Azure Data Scientist Associate
- Tung San
- Aug 1, 2021
- 3 min read

Problem:
A customer is managing a farm in the Washington State. The farm is the longest-running elk farm for generations, and has been following a traditional farming calendar. The practicing of the calendar had brought well-being to the their elk, but the health of the elk has slowly worsened for decades.
It's known that the farm’s elk should not be fed grain only when the night temperature is very cold, i.e. below 32F or 0C. According to the traditional farming calendar, the farm should stop the grain-feeding since Jan31st when the average night temperature rises.
With some historical weather data, we want to determine whether the climate have changed, and whether the farming calendar needs to be updated.
Import dataset and libraries
import pandas
import wget
url1 = "https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/graphing.py"
url2 = "https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/m0b_optimizer.py"
url3 = "https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/Data/seattleWeather_1948-2017.csv"
list(map(wget.download, (url1,url2,url3)))
Filter data
# Load a file containing weather data for seattle
data = pandas.read_csv('seattleWeather_1948-2017.csv', parse_dates=['date'])
# Keep only January temperatures
jan = data[[d.month == 1 for d in data.date]].copy()
jan

Visualize data
import graphing # custom graphing code. See our GitHub repository for details
# Let's take a quick look at our data
graphing.scatter_2D(jan, label_x="date", label_y="min_temperature", title="January Temperatures (°F)")

Normalize data for ML training
# Offset date into number of years since 1982
jan["years_since_1982"] = [(d.year + d.timetuple().tm_yday / 365.25) - jan.date.mean().year for d in jan.date]
# Scale and offset temperature so that it has a smaller range of values
jan["normalised_temperature"] = \
(jan["min_temperature"] - jan.min_temperature.mean()) / jan.min_temperature.std()
# Graph
graphing.scatter_2D(jan, label_x="years_since_1982", label_y="normalised_temperature", title="January Temperatures (Normalised)")

Create an empty linear model to-be trained
class MyModel:
def __init__(self):
'''
Creates a new MyModel
'''
# Straight lines described by two parameters:
# The slop is the angle of the line
self.slope = 0
# The intercept moves the line up or down
self.intercept = 0
def predict(self, date):
'''
Estimates the temperature from the date
'''
return date * self.slope + self.intercept
# Create our model ready to be trained
model = MyModel()
Visualize the empty model
print(f"Model parameters before training: {model.intercept}, {model.slope}")
# Look at how well the model does before training
print("Model visualised before training:")
graphing.scatter_2D(jan, "years_since_1982", "normalised_temperature", trendline=model.predict)

Define a cost function - Average squared difference
def cost_function(actual_temperatures, estimated_temperatures):
'''
Calculates the difference between actual and estimated temperatures.
Returns the difference, and also returns the squared difference (the cost).
actual_temperatures: One or more temperatures recorded in the past.
estimated_temperatures: Corresponding temperature(s) estimated by the model.
'''
# Calculate the difference between actual temperatures and those
# estimated by the model
difference = estimated_temperatures - actual_temperatures
# Convert to a single number that tells us how well the model did
# (smaller numbers are better)
cost = sum(difference ** 2)
return difference, cost
Import an optimizer for linear model
from m0b_optimizer import MyOptimizer
# Create an optimiser. The optimizer use gradient descent to find optimal solution for linear regression coefficients.
optimizer = MyOptimizer()
Define training method in each iteration
def train_one_iteration(model_inputs, true_temperatures, last_cost:float):
'''
Runs a single iteration of training.
model_inputs: One or more dates to provide the model (dates)
true_temperatues: Corresponding temperatures known to occur on from those dates
Returns:
A boolean, as to whether training should continue
The cost calculated (small numbers are better)
'''
# === USE THE MODEL ===
# Estimate temperatures for all data that we have
estimated_temperatures = model.predict(model_inputs)
# === OBJECTIVE FUNCTION ===
# Calculate how well the model is working
# Smaller numbers are better
difference, cost = cost_function(true_temperatures, estimated_temperatures)
# Decide whether to keep training
# we'll stop if the training is no longer improving the model effectively
if cost >= last_cost:
# Abort training
return False, cost
else:
# === OPTIMISER ===
# Calculate updates to parameters
intercept_update, slope_update = optimizer.get_parameter_updates(model_inputs, cost, difference)
# Change the model parameters
model.slope += slope_update
model.intercept += intercept_update
return True, cost
Testing an iteration
import math
print(f"Model parameters before training:\t\t{model.intercept:.8f},\t{model.slope:.8f}")
continue_loop, cost = train_one_iteration(
model_inputs = jan["years_since_1982"],
true_temperatures = jan["normalised_temperature"],
last_cost = math.inf
)
print(f"Model parameters after 1 iteration of training:\t{model.intercept:.8f},\t{model.slope:.8f}")
Start training
import math
# start the loop
print("Training beginning...")
last_cost = math.inf
i = 0
continue_loop = True
while continue_loop:
# Run one iteration of training
# This will tell us whether to stop training, and also what
# the cost was for this iteration
continue_loop, last_cost = train_one_iteration(model_inputs = jan["years_since_1982"],
true_temperatures = jan["normalised_temperature"],
last_cost = last_cost)
# Print the status
if i % 400 == 0:
print("Iteration:", i)
i += 1
print("Training complete!")
print(f"Model parameters after training:\t{model.intercept:.8f},\t{model.slope:.8f}")
graphing.scatter_2D(jan, "years_since_1982", "normalised_temperature", trendline=model.predict)


Conclusion:
the January night temperature has been raising since 1948. The farm should have been stopped the grain-feeding to their elk earlier than Jan31st!
Comentários