This tutorial includes the following steps:
- Loading the required libraries.
- Creating classes for data and prediction.
- Generating a sample dataset.
- Building a regression model with a generalized additive regressor.
- Evaluating accuracy.
- Predicting a new set of data with a fitted model.
ML.NET package can be installed with a NuGet Package Manager. For installation, please refer to the previous post. We load below libraries.
using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System; using System.Collections.Generic; using System.Linq; using C = System.Console;
Data container class
A train and test data should be collected into the class object. We create a UnitData class for data collection and UnitPrediction class for predicted data.
public class UnitData { [Column("0")] public float UnitA; [Column("1")] public float UnitS; [Column("2")] public float Volume; [Column("3")] public string Status; [Column("4")] public float Level; } public class UnitPrediction { [ColumnName("Score")] public float Level; }
Creating Sample Dataset
We create sample data with a GenerateUData method below. You can use your dataset too. In this case, you should create the class properties according to your data content.
internal static List<UnitData> GenerateUData(int count) { var pdlist = new List<UnitData>(); var random = new Random(); for (int i = 0; i < count; i++) { var pd = new UnitData { UnitA = random.Next(0, 20), UnitS = random.Next(0, 10), Volume = random.Next(4, 30) }; if ((pd.UnitA > 16 || pd.UnitS > 7) && pd.Volume > 23) pd.Status = "Warning"; else if ((pd.UnitA < 5 || pd.UnitS < 3) && pd.Volume < 15) pd.Status = "Alert"; else pd.Status = "Normal"; pd.Level = (pd.UnitA + pd.UnitS + pd.Volume) - random.Next(1, 3); pdlist.Add(pd); } return pdlist; }
Next, we create a train and test data. Train contains 2000 records and test contains 20 records.
var trainData = GenerateUData(2000); var testData = GenerateUData(20); C.WriteLine("Dataset content:"); C.WriteLine("id,UnitA,UnitS,Volume,Status,Level"); foreach (var item in trainData.Take(20)) C.WriteLine(string.Join(",",item.UnitA, item.UnitS, item.Volume,
item.Status, item.Level));
The train data should be converted into the collection data source, and it can be done with CollectionDataSource class.
var trainCollection = CollectionDataSource.Create(trainData);
Building a model
Next, we create a learning pipe object and add all the components of a model. There are many regression algorithms ML.NET that can be used in regression analysis, here, I use GeneralizedAdditiveModelRegressor method.
var learningPipe = new LearningPipeline(); // add components learningPipe.Add(trainCollection); learningPipe.Add(new ColumnCopier(("Level", "Label"))); learningPipe.Add(new CategoricalOneHotVectorizer("Status")); learningPipe.Add(new ColumnConcatenator("Features",
"UnitA", "UnitS", "Volume", "Status"));
// add regression algorithm learningPipe.Add(new GeneralizedAdditiveModelRegressor());
Above components in a pipe are trained with Train() method.
C.WriteLine("\nTraining a model:"); var model = learningPipe.Train<UnitData, UnitPrediction>();
RegressionEvaluator() class helps us to evaluate regression model. We check the RMS and root squared values of a model.
var evaluator = new RegressionEvaluator(); var metrics = evaluator.Evaluate(model, trainCollection); C.WriteLine("\nModel Evaluation:"); C.WriteLine("RMS: " + metrics.Rms); C.WriteLine("Root Squared: " + metrics.RSquared);
Test data prediction
Finally, we predict the test data. To check the original and predicted data, we print them both together.
var predicted = model.Predict(testData); // print testdata and predicted output var results = testData.Zip(predicted, (t, p) => Tuple.Create(t.UnitA, t.UnitS, t.Volume, t.Status, t.Level, p.Level)); int i = 1; C.WriteLine("\nPredicted result:"); C.WriteLine("id,UnitA,UnitS,Volume,Status,LevelOriginal,LevelPredicted"); foreach (var item in results) { C.WriteLine(string.Join(",", i, item.Item1, item.Item2, item.Item3, item.Item4, item.Item5, item.Item6)); i++; }
We have briefly learned how to use regression model with ML.NET in C#. Thank you for reading!
The full source code is listed below.
using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System; using System.Collections.Generic; using System.Linq; using C = System.Console; namespace mltest { public class UnitData { [Column("0")] public float UnitA; [Column("1")] public float UnitS; [Column("2")] public float Volume; [Column("3")] public string Status; [Column("4")] public float Level; } public class UnitPrediction { [ColumnName("Score")] public float Level; } class Program { static void Main(string[] args) { // generate train and test data var trainData = GenerateUData(2000); var testData = GenerateUData(20); C.WriteLine("Dataset content:"); C.WriteLine("id,UnitA,UnitS,Volume,Status,Level"); foreach (var item in trainData.Take(20)) C.WriteLine(string.Join(",",item.UnitA, item.UnitS, item.Volume, item.Status, item.Level)); // convert list to collectiong data source var trainCollection = CollectionDataSource.Create(trainData); // create object for machine learning task var learningPipe = new LearningPipeline(); // add components learningPipe.Add(trainCollection); learningPipe.Add(new ColumnCopier(("Level", "Label"))); learningPipe.Add(new CategoricalOneHotVectorizer("Status")); learningPipe.Add(new ColumnConcatenator("Features", "UnitA", "UnitS", "Volume", "Status")); // add regression algorithm learningPipe.Add(new GeneralizedAdditiveModelRegressor()); // train the model C.WriteLine("\nTraining a model:"); var model = learningPipe.Train<UnitData, UnitPrediction>(); // model evaluation and accuracy check var evaluator = new RegressionEvaluator(); var metrics = evaluator.Evaluate(model, trainCollection); C.WriteLine("\nModel Evaluation:"); C.WriteLine("RMS: " + metrics.Rms); C.WriteLine("Root Squared: " + metrics.RSquared); // predict test data var predicted = model.Predict(testData); // print testdata and predicted output var results = testData.Zip(predicted, (t, p) => Tuple.Create(t.UnitA, t.UnitS, t.Volume, t.Status, t.Level, p.Level)); int i = 1; C.WriteLine("\nPredicted result:"); C.WriteLine("id,UnitA,UnitS,Volume,Status,LevelOriginal,LevelPredicted"); foreach (var item in results) { C.WriteLine(string.Join(",", i, item.Item1, item.Item2, item.Item3, item.Item4, item.Item5, item.Item6)); i++; } C.ReadLine(); } internal static List<UnitData> GenerateUData(int count) { var pdlist = new List<UnitData>(); var random = new Random(); for (int i = 0; i < count; i++) { var pd = new UnitData { UnitA = random.Next(0, 20), UnitS = random.Next(0, 10), Volume = random.Next(4, 30) }; if ((pd.UnitA > 16 || pd.UnitS > 7) && pd.Volume > 23) pd.Status = "Warning"; else if ((pd.UnitA < 5 || pd.UnitS < 3) && pd.Volume < 15) pd.Status = "Alert"; else pd.Status = "Normal"; pd.Level = (pd.UnitA + pd.UnitS + pd.Volume) - random.Next(1, 3); pdlist.Add(pd); } return pdlist; } } }
Very nice! Thanks for the post.
ReplyDeleteYou are welcome!
Delete