First, we create Console project in Visual Studio and install ML.NET package. A NuGet Package Manager helps us to install the package in Visual Studio.
After installation Microsoft.ML package, we can load below ML libraries:
using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System; using System.Collections.Generic; using System.Linq;
Preparing test data
Next, we generate a train and test data for this classification example. Target data should be collected into class, so ProcessData class as a container of train and test data, and ProcessPrediction class for output data are defined as a following:
public class ProcessData { [Column("0")] public float UnitA; [Column("1")] public float UnitS; [Column("2")] public float Volume; [Column("3")] [ColumnName("Label")] public string Label; } public class ProcessPrediction { [ColumnName("PredictedLabel")] public string PredictedLabels; }
To generate sample data, I use GeneratePData() method. It helps to create a train and test data. You may check the logic of data generation in it.
private static List<ProcessData> GeneratePData(int count, bool test = false) { var pdlist = new List<ProcessData>(); var random = new Random(); for (int i = 0; i < count; i++) { var pd = new ProcessData { UnitA = random.Next(0, 120), UnitS = random.Next(0, 60), Volume = random.Next(40, 280) }; if (!test) { if ((pd.UnitA > 75 || pd.UnitS > 30) && pd.Volume > 230) pd.Label = "Warning"; else if ((pd.UnitA < 25 || pd.UnitS < 20) && pd.Volume < 100) pd.Label = "Alert"; else pd.Label = "Normal"; } else pd.Label = string.Empty; pdlist.Add(pd); } return pdlist; }
To check data content, we write a Print() method to show elements in a list.
private static void Print(IEnumerable<ProcessData> results) { int i = 1; Console.WriteLine("id,UnitA,UnitS,Volume,Label")); foreach (var item in results) { Console.WriteLine(string.Join(",", i,
item.UnitA, item.UnitS, item.Volume, item.Label)); i++; } }
We create train data (2000 items) and test data (50 items) with GeneratePData() method.
var trainData = GeneratePData(2000); Print(trainData.Take(20));id,UnitA,UnitS,Volume,Label
1,4,13,133,Normal
2,90,0,271,Warning
3,37,12,130,Normal
4,47,11,189,Normal
5,102,47,101,Normal
6,11,38,259,Warning
7,46,46,271,Warning
....
var testData = GeneratePData(50, test: true); Print(testData.Take(20));id,UnitA,UnitS,Volume,Label
1,42,21,79,
2,7,26,258,
3,28,19,130,
4,84,17,124,
5,80,9,99,
6,64,22,153,
7,67,10,76,
....
Building a model
Train and test data are ready. Next, we build a model with LearningPipeline class of ML.NET package.
var learningPipe = new LearningPipeline();
List data should be converted into CollectionDataSource to add into a learningPipeline object.
var trainCollection = CollectionDataSource.Create(trainData);
learningPipe.Add(trainCollection);
Converting labels into the numeric index.
learningPipe.Add(new Dictionarizer("Label"));
Identifying feature columns.
learningPipe.Add(new ColumnConcatenator("Features", "UnitA", "UnitS", "Volume"));
Adding a classifying algorithm
learningPipe.Add(new StochasticDualCoordinateAscentClassifier());
Adding a predicted label converter.
learningPipe.Add(new PredictedLabelColumnOriginalValueConverter()
{ PredictedLabelColumn = "PredictedLabel" });
Training the model
var model = learningPipe.Train<ProcessData, ProcessPrediction>();
Now, we add ClassificationEvaluator() to check the model accuracy, then we run our program to verify the model. Through the evaluator, we can obtain accuracy, loss, confusion matrix and other parameters that describe the model performance.
var evaluator = new ClassificationEvaluator(); var metrics = evaluator.Evaluate(model, trainCollection); Console.WriteLine("AccuracyMicro: " + metrics.AccuracyMicro); Console.WriteLine("LogLoss: " + metrics.LogLoss);
Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off.
Using 4 threads to train.
Automatically choosing a check frequency of 4.
Auto-tuning parameters: maxIterations = 748.
Auto-tuning parameters: L2 = 2.688172E-05.
Auto-tuning parameters: L1Threshold (L1/L2) = 0.
Using best model from iteration 44.
Not training a calibrator because it is not needed.
AccuracyMicro: 0.8915
LogLoss: 0.244822749619737
Predicting test data
var predicted = model.Predict(testData);
Finally, we collect test data and predicted data and print the result.
var results = testData.Zip(predicted, (t, p) => new ProcessData { UnitA = t.UnitA, UnitS = t.UnitS, Volume = t.Volume, Label = p.PredictedLabels }).ToList(); Print(results);id,UnitA,UnitS,Volume,Label
1,108,12,233,Normal
2,82,49,89,Normal
3,41,0,83,Alert
4,101,47,84,Normal
5,35,34,200,Normal
6,109,7,132,Normal
7,8,2,133,Normal
8,39,16,72,Alert
9,99,33,245,Warning
10,80,41,228,Normal
....
In this post, we have learned how to use ML.NET for data classification problem.
Thank you for reading! I hope you have found the post useful. Regression example is also explained in below link.
Regression example with ML.NET in C#
Sentiment Analysis Example with ML.NET in C#
using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System; using System.Collections.Generic; using System.Linq; namespace mltest { public class ProcessData { [Column("0")] public float UnitA; [Column("1")] public float UnitS; [Column("2")] public float Volume; [Column("3")] [ColumnName("Label")] public string Label; } public class ProcessPrediction { [ColumnName("PredictedLabel")] public string PredictedLabels; } class Program { static void Main(string[] args) { // generate train and test data var trainData = GeneratePData(2000); Print(trainData.Take(20)); var testData = GeneratePData(50, test:true); // create object for machine learning task var learningPipe = new LearningPipeline(); var trainCollection = CollectionDataSource.Create(trainData); learningPipe.Add(trainCollection); // convert labels into numeric index. learningPipe.Add(new Dictionarizer("Label")); learningPipe.Add(
new ColumnConcatenator("Features", "UnitA", "UnitS", "Volume")); // classifying algorithm learningPipe.Add(new StochasticDualCoordinateAscentClassifier()); // converts predicted label column value. learningPipe.Add(new PredictedLabelColumnOriginalValueConverter()
{ PredictedLabelColumn = "PredictedLabel" }); // model training var model = learningPipe.Train<ProcessData, ProcessPrediction>(); // model evaluation and accuracy check var evaluator = new ClassificationEvaluator(); var metrics = evaluator.Evaluate(model,trainCollection); Console.WriteLine("AccuracyMicro: " + metrics.AccuracyMicro); Console.WriteLine("LogLoss: " + metrics.LogLoss); // predict test data var predicted = model.Predict(testData); // collect testdata and predicted Label var results = testData.Zip(predicted, (t, p) => new ProcessData { UnitA = t.UnitA, UnitS = t.UnitS, Volume = t.Volume, Label = p.PredictedLabels }).ToList(); // printing output results Print(results); Console.ReadLine(); } private static void Print(IEnumerable<ProcessData> results) { int i = 1; Console.WriteLine(string.Join(",", "id", "UnitA",
"UnitS", "Volume", "Label"));
foreach (var item in results) { Console.WriteLine(string.Join(",", i, item.UnitA, item.UnitS,
item.Volume, item.Label)); i++; } } private static List<ProcessData> GeneratePData(int count, bool test = false) { var pdlist = new List<ProcessData>(); var random = new Random(); for (int i = 0; i < count; i++) { var pd = new ProcessData { UnitA = random.Next(0, 120), UnitS = random.Next(0, 60), Volume = random.Next(40, 280) }; if (!test) { if ((pd.UnitA > 75 || pd.UnitS > 30) && pd.Volume > 230) pd.Label = "Warning"; else if ((pd.UnitA < 25 || pd.UnitS < 20) && pd.Volume < 100) pd.Label = "Alert"; else pd.Label = "Normal"; } else pd.Label = string.Empty; pdlist.Add(pd); } return pdlist; } } }
No comments:
Post a Comment