From 8bc297cb8a0886f36750eb60d47dbaf8cfd23e9a Mon Sep 17 00:00:00 2001 From: lewardo Date: Thu, 31 Aug 2023 15:39:12 +0100 Subject: [PATCH 01/12] `dataseries` initial ref --- doc/DataSeries.rst | 163 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 doc/DataSeries.rst diff --git a/doc/DataSeries.rst b/doc/DataSeries.rst new file mode 100644 index 0000000..29a17dc --- /dev/null +++ b/doc/DataSeries.rst @@ -0,0 +1,163 @@ +:digest: A set of data series associated with identifiers. +:species: data +:sc-categories: UGens>FluidManipulation +:sc-related: Classes/Dictionary +:see-also: LabelSet, DataSet, DTW, +:max-seealso: dict +:description: FluidDataSeries is a container associating series of data points with identifiers. + + +:control name: + + The name of the FluidDataSeries. This is unique between all FluidDataSeries. + + +:message addFrame: + + :arg identifier: The identifier for the series to add to. + + :arg buffer: A |buffer| containing the data for the frame (only the first channel is used). + + Add a new frame to the end of a series, creates the series if it does not exist. Sets the dimensionality of the DataSeries if it is the first frame added, otherwise if the buffer is too short an error will be reported. + + +:message addSeries: + + :arg identifier: The identifier for the series to add. + + :arg buffer: A |buffer| containing the data for the series (each channel is a distinct time frame). + + Add a new series from a buffer. Sets the dimensionality of the DataSeries if it is the first series added, otherwise if the buffer is too short an error will be reported. If the identifier already exists an error will be reported. + + +:message getFrame: + + :arg identifier: The identifier for the series to get from. + + :arg time: which time frame to get. + + :arg buffer: A |buffer| to write the frame to (only the first channel is used, will be resized). + + Get a frame from a series. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. + + +:message getSeries: + + :arg identifier: The identifier for the series to get. + + :arg buffer: A |buffer| containing the data for the series (each channel is a distinct time frame, will be resized). + + Get a series. If the identifier doesn't exist an error will be reported. + + +:message setFrame: + + :arg identifier: The identifier for the series to set a frame in. + + :arg time: which time frame to set. + + :arg buffer: A |buffer| containing the data for the frame (only the first channel is used). + + Updates a time frame in a series, or adds it to the end if there is no frame at that time point. Sets the dimensionality of the DataSeries if it is the first frame added, otherwise if the buffer is too short an error will be reported. + + +:message setSeries: + + :arg identifier: The identifier for the series to set. + + :arg buffer: A |buffer| containing the data for the series (each channel is a distinct time frame). + + Updates a time series, or adds it if it doesn't exist. Sets the dimensionality of the DataSeries if it is the first series added, otherwise if the buffer is too short an error will be reported. + + +:message updateFrame: + + :arg identifier: The identifier for the series to update a frame in. + + :arg time: which time frame to update. + + :arg buffer: A |buffer| containing the data for the frame (only the first channel is used). + + Updates an existing frame. If the buffer is too short an error will be reported. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. + + +:message updateSeries: + + :arg identifier: The identifier for the series to update. + + :arg buffer: A |buffer| containing the data for the series (each channel is a distinct time frame). + + Updates a new series. If the buffer is too short an error will be reported. If the identifier doesn't exist an error will be reported. + + +:message deleteFrame: + + :arg identifier: The identifier for the series to delete a frame from. + + :arg time: which time frame to remove. + + Delete a frame from a series, deletes the series if it is the last frame. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. + + +:message deleteSeries: + + :arg identifier: The identifier for the series to delete. + + Delete a series. If the identifier doesn't exist an error will be reported. + + +:message getDataSet: + + :arg dataSet: The Dataset to write the slice to. Will overwrite and resize. + + :arg time: which time frame to extract. + + Get a dataset with the `time`th frame of every series, for examples create a :fluid-obj:`DataSet` with every first frame of every point. If an identifier doesn't have enough points it is merely not added to the output dataset. + + +:message clear: + + Empty the data series of all series and frames. + + +:message getIds: + + :arg labelSet: The FluidLabelSet to export to. Its content will be replaced. + + Export the dataset identifiers to a :fluid-obj:`LabelSet`. + + +:message merge: + + :arg sourceDataSet: The source DataSet to be merged. + + :arg overwrite: A flag to allow overwrite points with the same identifier. + + Merge sourceDataSeries in the current DataSeries. It will replace the value of points with the same identifier if overwrite is set to 1. + + +:message print: + + Post an abbreviated content of the DataSeries in the window by default, but you can supply a custom action instead. + + +:message read: + + :arg filename: (optional) filename to save to + + Read a saved object in JSON format from disk, will prompt for file location if not filename not provided + + +:message write: + + Save the contents of the object to a JSON file on disk to the file specified, will prompt for file location if not filename not provided + + +:message load: + + Load the state of this object from a Dictionary. + + +:message dump: + + Dump the state of this object as a Dictionary. From 6a3c41479db17b58e2b366ec409b1ef89a3a618d Mon Sep 17 00:00:00 2001 From: lewardo Date: Mon, 11 Sep 2023 08:13:38 +0100 Subject: [PATCH 02/12] squash merge `data-series` into `lstm-rnn` --- doc/DataSeries.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/DataSeries.rst b/doc/DataSeries.rst index 29a17dc..4b766cc 100644 --- a/doc/DataSeries.rst +++ b/doc/DataSeries.rst @@ -38,14 +38,14 @@ :arg buffer: A |buffer| to write the frame to (only the first channel is used, will be resized). - Get a frame from a series. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. + Get a frame from a series. Negative indexing starts from the last frame. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. :message getSeries: :arg identifier: The identifier for the series to get. - :arg buffer: A |buffer| containing the data for the series (each channel is a distinct time frame, will be resized). + :arg buffer: A |buffer| to write the series to (each channel is a distinct time frame, will be resized). Get a series. If the identifier doesn't exist an error will be reported. @@ -58,7 +58,7 @@ :arg buffer: A |buffer| containing the data for the frame (only the first channel is used). - Updates a time frame in a series, or adds it to the end if there is no frame at that time point. Sets the dimensionality of the DataSeries if it is the first frame added, otherwise if the buffer is too short an error will be reported. + Updates a time frame in a series, or adds it to the end if there is no frame at that time point. Negative indexing starts from the last frame. Sets the dimensionality of the DataSeries if it is the first frame added, otherwise if the buffer is too short an error will be reported. :message setSeries: @@ -78,7 +78,7 @@ :arg buffer: A |buffer| containing the data for the frame (only the first channel is used). - Updates an existing frame. If the buffer is too short an error will be reported. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. + Updates an existing frame. Negative indexing starts from the last frame. If the buffer is too short an error will be reported. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. :message updateSeries: @@ -96,7 +96,7 @@ :arg time: which time frame to remove. - Delete a frame from a series, deletes the series if it is the last frame. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. + Delete a frame from a series, deletes the series if it is the only frame. Negative indexing starts from the last frame. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. :message deleteSeries: @@ -108,11 +108,11 @@ :message getDataSet: - :arg dataSet: The Dataset to write the slice to. Will overwrite and resize. - :arg time: which time frame to extract. - Get a dataset with the `time`th frame of every series, for examples create a :fluid-obj:`DataSet` with every first frame of every point. If an identifier doesn't have enough points it is merely not added to the output dataset. + :arg dataSet: The Dataset to write the slice to. Will overwrite and resize. + + Get a dataset with the `time`th frame of every series, i.e. can create a :fluid-obj:`DataSet` with every Nth frame of every series. Negative indexing starts from the last frame. If an identifier doesn't have enough frames it is merely not added to the output dataset. :message clear: @@ -124,12 +124,12 @@ :arg labelSet: The FluidLabelSet to export to. Its content will be replaced. - Export the dataset identifiers to a :fluid-obj:`LabelSet`. + Export the dataseries identifiers to a :fluid-obj:`LabelSet`. :message merge: - :arg sourceDataSet: The source DataSet to be merged. + :arg sourceDataSeries: The source DataSeries to be merged. :arg overwrite: A flag to allow overwrite points with the same identifier. From d7c82fee0b524e93e7a1479b4709d1768c0f9857 Mon Sep 17 00:00:00 2001 From: lewardo Date: Mon, 11 Sep 2023 13:24:46 +0100 Subject: [PATCH 03/12] initial ref --- doc/LSTMClassifier.rst | 58 ++++++++++++++++++++++++++++++++++++++++ doc/LSTMRegressor.rst | 60 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 doc/LSTMClassifier.rst create mode 100644 doc/LSTMRegressor.rst diff --git a/doc/LSTMClassifier.rst b/doc/LSTMClassifier.rst new file mode 100644 index 0000000..ae726cc --- /dev/null +++ b/doc/LSTMClassifier.rst @@ -0,0 +1,58 @@ +:digest: Series Classification with an LSTM +:species: data +:sc-categories: Machine learning, Classification, LSTM +:sc-related: +:see-also: LSTMRegressor, LSTMForecast, DataSeries, LabelSet +:description: + + Perform classification between a :fluid-obj:`DataSeries` and a :fluid-obj:`LabelSet` using a long-short term memory recurrent neural network (LSTM) + +:discussion: + + For a thorough explanation of how this object works and more information on the parameters, visit the page on **MLP Training** (https://learn.flucoma.org/learn/mlp-training) and **MLP Parameters** (https://learn.flucoma.org/learn/mlp-parameters). + + Also visit the classification tutorial, this is for the :fluid-obj:`MLPRegressor`, but it is good to understand regression conceptually: (https://learn.flucoma.org/learn/classification-neural-network/) + + Conceptually equivalent to the :fluid-obj:`MLPClassifier`, but where that maps a :fluid-obj:`DataSet` to another, recurrent networks can encode time-based patterns and learn those much more efficiently, so map a :fluid-obj:`DataSeries` to a :fluid-obj:`DataSet` + +:control hiddenSize: + + An array of numbers that specifies the internal structure of the neural network. Each number in the list represents one hidden layer of the neural network, the value of which is the number of neurons in that layer. Changing this will reset the neural network, clearing any learning that has happened. + +:control maxIter: + + The number of epochs to train for when ``fit`` is called on the object. An epoch consists of training on all the data points one time. Note the the number of epochs will be much lower here than with the MLP objects (try around 5) + +:control learnRate: + + A scalar for indicating how much the neural network should adjust its internal parameters during training. This is the most important parameter to adjust while training a neural network. + +:control batchSize: + + The number of data points to use in between adjustments of the LSTM's internal parameters during training. + +:message fit: + + :arg sourceDataSeries: Source data + + :arg targetLabelSet: Target labels + + Train the network to map between a source :fluid-obj:`DataSeries` and target :fluid-obj:`LabelSet` + +:message predict: + + :arg sourceDataSeries: Input data + + :arg targetLabelSet: :fluid-obj:`LabelSet` to write the predicted labels into + + Predict labels for a :fluid-obj:`DataSeries` (given a trained network) + +:message predictPoint: + + :arg sourceBuffer: Input point + + Predict a label for a single data point in a |buffer| + +:message clear: + + This will erase all the learning done in the neural network. \ No newline at end of file diff --git a/doc/LSTMRegressor.rst b/doc/LSTMRegressor.rst new file mode 100644 index 0000000..db084c0 --- /dev/null +++ b/doc/LSTMRegressor.rst @@ -0,0 +1,60 @@ +:digest: Series Regression with an LSTM +:species: data +:sc-categories: Machine learning, Regression, LSTM +:sc-related: +:see-also: LSTMClassifier, LSTMForecast, DataSeries, DataSet +:description: + + Perform regression between a :fluid-obj:`DataSeries` and a :fluid-obj:`DataSet` using a long-short term memory recurrent neural network (LSTM) + +:discussion: + + For a thorough explanation of how this object works and more information on the parameters, visit the page on **Recurrent Neural Networks** (https://learn.flucoma.org/learn/recurrent-networks). + + Also visit the regression tutorial, this is for the :fluid-obj:`MLPRegressor`, but it is good to understand regression conceptually: (https://learn.flucoma.org/learn/regression-neural-network/) + + Conceptually equivalent to the :fluid-obj:`MLPRegressor`, but where that maps a :fluid-obj:`DataSet` to another, recurrent networks can encode time-based patterns and learn those much more efficiently. + +:control hiddenSize: + + Single number that specifies the size of the intermediate recurrent layer network. This roughly equates to how well it can learn complex series, in exchange for model size and training time. Changing this will reset the neural network, clearing any learning that has happened. + +:control maxIter: + + The number of epochs to train for when ``fit`` is called on the object. An epoch consists of training on all the data points one time. Note the the number of epochs will be much lower here than with the MLP objects (try around 5) as every frame in every series is processed. + +:control learnRate: + + A scalar for indicating how much the neural network should adjust its internal parameters during training. This is the most important parameter to adjust while training a neural network. + +:control batchSize: + + The number of data points to use in between adjustments of the LSTM's internal parameters during training. + +:message fit: + + :arg sourceDataSeries: Source data + + :arg targetDataSet: Target data + + Train the network to map between a source :fluid-obj:`DataSeries` and target :fluid-obj:`DataSet` + +:message predict: + + :arg sourceDataSeries: Input data + + :arg targetDataSet: Output data + + Apply the learned mapping to a :fluid-obj:`DataSet` (given a trained network) + +:message predictPoint: + + :arg sourceBuffer: Input series + + :arg targetBuffer: Output point + + Predict a label for a single data point in a |buffer| + +:message clear: + + This will erase all the learning done in the neural network. From 0166e102984c030686651b4af6db224a1f4e1daa Mon Sep 17 00:00:00 2001 From: lewardo Date: Mon, 11 Sep 2023 13:27:12 +0100 Subject: [PATCH 04/12] specialise to classifier --- doc/LSTMClassifier.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/LSTMClassifier.rst b/doc/LSTMClassifier.rst index ae726cc..81cbe7e 100644 --- a/doc/LSTMClassifier.rst +++ b/doc/LSTMClassifier.rst @@ -9,19 +9,19 @@ :discussion: - For a thorough explanation of how this object works and more information on the parameters, visit the page on **MLP Training** (https://learn.flucoma.org/learn/mlp-training) and **MLP Parameters** (https://learn.flucoma.org/learn/mlp-parameters). + For a thorough explanation of how this object works and more information on the parameters, visit the page on **Recurrent Neural Networks** (https://learn.flucoma.org/learn/recurrent-networks). Also visit the classification tutorial, this is for the :fluid-obj:`MLPRegressor`, but it is good to understand regression conceptually: (https://learn.flucoma.org/learn/classification-neural-network/) - Conceptually equivalent to the :fluid-obj:`MLPClassifier`, but where that maps a :fluid-obj:`DataSet` to another, recurrent networks can encode time-based patterns and learn those much more efficiently, so map a :fluid-obj:`DataSeries` to a :fluid-obj:`DataSet` + Conceptually equivalent to the :fluid-obj:`MLPClassifier`, but where that maps a :fluid-obj:`DataSet` to a :fluid-obj:`LabelSet`, recurrent networks can encode time-based patterns and learn those much more efficiently, so map a :fluid-obj:`DataSeries` to a :fluid-obj:`LabelSet` :control hiddenSize: - An array of numbers that specifies the internal structure of the neural network. Each number in the list represents one hidden layer of the neural network, the value of which is the number of neurons in that layer. Changing this will reset the neural network, clearing any learning that has happened. + Single number that specifies the size of the intermediate recurrent layer network. This roughly equates to how well it can learn complex series, in exchange for model size and training time. Changing this will reset the neural network, clearing any learning that has happened. :control maxIter: - The number of epochs to train for when ``fit`` is called on the object. An epoch consists of training on all the data points one time. Note the the number of epochs will be much lower here than with the MLP objects (try around 5) + The number of epochs to train for when ``fit`` is called on the object. An epoch consists of training on all the data points one time. Note every frame is processed so the the number of epochs will be much lower here than with the MLP objects (try around 5) :control learnRate: From a965080d4d305f30069d47af7500bc8ac2162011 Mon Sep 17 00:00:00 2001 From: lewardo Date: Mon, 11 Sep 2023 13:36:33 +0100 Subject: [PATCH 05/12] forecast initial ref --- doc/LSTMClassifier.rst | 2 +- doc/LSTMForecast.rst | 60 ++++++++++++++++++++++++++++++++++++++++++ doc/LSTMRegressor.rst | 2 +- 3 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 doc/LSTMForecast.rst diff --git a/doc/LSTMClassifier.rst b/doc/LSTMClassifier.rst index 81cbe7e..05fd73b 100644 --- a/doc/LSTMClassifier.rst +++ b/doc/LSTMClassifier.rst @@ -29,7 +29,7 @@ :control batchSize: - The number of data points to use in between adjustments of the LSTM's internal parameters during training. + The number of data series to use in between adjustments of the LSTM's internal parameters during training. :message fit: diff --git a/doc/LSTMForecast.rst b/doc/LSTMForecast.rst new file mode 100644 index 0000000..ee85613 --- /dev/null +++ b/doc/LSTMForecast.rst @@ -0,0 +1,60 @@ +:digest: Series Forecasting with an LSTM +:species: data +:sc-categories: Machine Learning, LSTM, Prediction +:sc-related: +:see-also: LSTMClassifier, LSTMRegressor, DataSeries +:description: + + Predict/forecast a continuation to an input :fluid-obj:`DataSeries` using a long-short term memory recurrent neural network (LSTM) + +:discussion: + + For a thorough explanation of how this object works and more information on the parameters, visit the page on **Recurrent Neural Networks** (https://learn.flucoma.org/learn/recurrent-networks). + + This object is not like anything that has been seen yet - it takes a single :fluid-obj:`DataSeries` and learns to predict continuations to the series with the same 'style'. It is currently rather limited in its ability, but will recieve improvements in predicting ability in the future! + +:control hiddenSize: + + Single number that specifies the size of the intermediate recurrent layer network. This roughly equates to how well it can learn complex series, in exchange for model size and training time. Changing this will reset the neural network, clearing any learning that has happened. + +:control maxIter: + + The number of epochs to train for when ``fit`` is called on the object. An epoch consists of training on all the data points one time. Note every frame is processed so the the number of epochs will be much lower here than with the MLP objects (try around 5) + +:control learnRate: + + A scalar for indicating how much the neural network should adjust its internal parameters during training. This is the most important parameter to adjust while training a neural network. + +:control batchSize: + + The number of data series to use in between adjustments of the LSTM's internal parameters during training. + +:message fit: + + :arg sourceDataSeries: Source data + + Train the network to learn to continue a :fluid-obj:`DataSeries` + +:message predict: + + :arg sourceDataSeries: Input data + + :arg targetDataSeries: Where to output the forecasted data + + :arg forecastLength: how many frames to predict into the future, if left blank it will return the same number of frames provided for each series + + Predict continuations for a :fluid-obj:`DataSeries` (given a trained network) + +:message predictPoint: + + :arg sourceBuffer: Input series + + :arg targetBuffer: Where to output the forecasted data + + :arg forecastLength: how many frames to predict into the future, if left blank it will return the same number of frames provided + + Predict a continuation to the data in a |buffer| (given a trained network) + +:message clear: + + This will erase all the learning done in the neural network. \ No newline at end of file diff --git a/doc/LSTMRegressor.rst b/doc/LSTMRegressor.rst index db084c0..e827875 100644 --- a/doc/LSTMRegressor.rst +++ b/doc/LSTMRegressor.rst @@ -29,7 +29,7 @@ :control batchSize: - The number of data points to use in between adjustments of the LSTM's internal parameters during training. + The number of data series to use in between adjustments of the LSTM's internal parameters during training. :message fit: From 6c1153c89a10186ed22df26380dea4f865f1171d Mon Sep 17 00:00:00 2001 From: lewardo Date: Mon, 11 Sep 2023 13:58:24 +0100 Subject: [PATCH 06/12] merge dataseries changes --- doc/DataSeries.rst | 28 +++++----------------------- include/FluidParameterDump.hpp | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/doc/DataSeries.rst b/doc/DataSeries.rst index 4b766cc..583f5a0 100644 --- a/doc/DataSeries.rst +++ b/doc/DataSeries.rst @@ -2,16 +2,13 @@ :species: data :sc-categories: UGens>FluidManipulation :sc-related: Classes/Dictionary -:see-also: LabelSet, DataSet, DTW, +:see-also: LabelSet, DataSet, DTW :max-seealso: dict -:description: FluidDataSeries is a container associating series of data points with identifiers. - - +:description: FluidDataSeries is a container associating series of data points with identifiers :control name: The name of the FluidDataSeries. This is unique between all FluidDataSeries. - :message addFrame: :arg identifier: The identifier for the series to add to. @@ -20,7 +17,6 @@ Add a new frame to the end of a series, creates the series if it does not exist. Sets the dimensionality of the DataSeries if it is the first frame added, otherwise if the buffer is too short an error will be reported. - :message addSeries: :arg identifier: The identifier for the series to add. @@ -29,7 +25,6 @@ Add a new series from a buffer. Sets the dimensionality of the DataSeries if it is the first series added, otherwise if the buffer is too short an error will be reported. If the identifier already exists an error will be reported. - :message getFrame: :arg identifier: The identifier for the series to get from. @@ -40,7 +35,6 @@ Get a frame from a series. Negative indexing starts from the last frame. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. - :message getSeries: :arg identifier: The identifier for the series to get. @@ -49,7 +43,6 @@ Get a series. If the identifier doesn't exist an error will be reported. - :message setFrame: :arg identifier: The identifier for the series to set a frame in. @@ -60,7 +53,6 @@ Updates a time frame in a series, or adds it to the end if there is no frame at that time point. Negative indexing starts from the last frame. Sets the dimensionality of the DataSeries if it is the first frame added, otherwise if the buffer is too short an error will be reported. - :message setSeries: :arg identifier: The identifier for the series to set. @@ -69,7 +61,6 @@ Updates a time series, or adds it if it doesn't exist. Sets the dimensionality of the DataSeries if it is the first series added, otherwise if the buffer is too short an error will be reported. - :message updateFrame: :arg identifier: The identifier for the series to update a frame in. @@ -80,7 +71,6 @@ Updates an existing frame. Negative indexing starts from the last frame. If the buffer is too short an error will be reported. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. - :message updateSeries: :arg identifier: The identifier for the series to update. @@ -89,7 +79,6 @@ Updates a new series. If the buffer is too short an error will be reported. If the identifier doesn't exist an error will be reported. - :message deleteFrame: :arg identifier: The identifier for the series to delete a frame from. @@ -98,14 +87,12 @@ Delete a frame from a series, deletes the series if it is the only frame. Negative indexing starts from the last frame. If the identifier doesn't exist or if that series doesnt have a frame for that time point an error will be reported. - :message deleteSeries: :arg identifier: The identifier for the series to delete. Delete a series. If the identifier doesn't exist an error will be reported. - :message getDataSet: :arg time: which time frame to extract. @@ -114,19 +101,16 @@ Get a dataset with the `time`th frame of every series, i.e. can create a :fluid-obj:`DataSet` with every Nth frame of every series. Negative indexing starts from the last frame. If an identifier doesn't have enough frames it is merely not added to the output dataset. - :message clear: Empty the data series of all series and frames. - :message getIds: :arg labelSet: The FluidLabelSet to export to. Its content will be replaced. Export the dataseries identifiers to a :fluid-obj:`LabelSet`. - :message merge: :arg sourceDataSeries: The source DataSeries to be merged. @@ -140,24 +124,22 @@ Post an abbreviated content of the DataSeries in the window by default, but you can supply a custom action instead. - :message read: - :arg filename: (optional) filename to save to + :arg filename: optional, filename to save to Read a saved object in JSON format from disk, will prompt for file location if not filename not provided - :message write: - Save the contents of the object to a JSON file on disk to the file specified, will prompt for file location if not filename not provided + :arg filename: optional, filename to save to + Save the contents of the object to a JSON file on disk to the file specified, will prompt for file location if not filename not provided :message load: Load the state of this object from a Dictionary. - :message dump: Dump the state of this object as a Dictionary. diff --git a/include/FluidParameterDump.hpp b/include/FluidParameterDump.hpp index 557a93b..9627e6d 100644 --- a/include/FluidParameterDump.hpp +++ b/include/FluidParameterDump.hpp @@ -41,6 +41,10 @@ namespace dataset { class DataSetClient; } +namespace dataseries { +class DataSeriesClient; +} + namespace labelset { class LabelSetClient; } @@ -260,6 +264,11 @@ std::string getArgType(SharedClientRef) return "DataSet"; } +std::string getArgType(SharedClientRef) +{ + return "DataSeries"; +} + std::string getArgType(SharedClientRef) { return "LabelSet"; @@ -270,6 +279,11 @@ std::string getArgType(SharedClientRef&) return "Input DataSet"; } +std::string getArgType(SharedClientRef) +{ + return "Input DataSeries"; +} + std::string getArgType(SharedClientRef&) { return "Input LabelSet"; @@ -410,6 +424,13 @@ class ParameterDump { return "dataset"; } + + static std::string + getParamType(const SharedClientRef::ParamType&) + { + return "dataseries"; + } + static std::string getParamType(const SharedClientRef::ParamType&) { From d856fb7fb8eb63a7e13707e223e5d7dcbca02570 Mon Sep 17 00:00:00 2001 From: tremblap Date: Mon, 25 Sep 2023 16:25:59 +0100 Subject: [PATCH 07/12] update to tip of DataSeries --- doc/DataSeries.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/DataSeries.rst b/doc/DataSeries.rst index 583f5a0..a54bcef 100644 --- a/doc/DataSeries.rst +++ b/doc/DataSeries.rst @@ -119,6 +119,21 @@ Merge sourceDataSeries in the current DataSeries. It will replace the value of points with the same identifier if overwrite is set to 1. +:message kNearest: + + :arg buffer: A |buffer| containing a data point to match against. + + :arg k: The number of nearest neighbours to return. + + Returns the identifiers of the ``k`` points nearest to the one passed in distance order (closest first). Note that this is a brute force distance measure, and inefficient for repeated queries against large dataseries. + +:message kNearestDist: + + :arg buffer: A |buffer| containing a data point to match against. The number of frames in the buffer must match the dimensionality of the DataSet. + + :arg k: The number of nearest neighbours to return. The identifiers will be sorted, beginning with the nearest. + + Returns the distances to the ``k`` points nearest to the one passed in descending order. Note that this is a brute force distance measure, and inefficient for repeated queries against large dataseries. :message print: From 69ddc67b304de19384ae89538972fb783a161f63 Mon Sep 17 00:00:00 2001 From: tremblap Date: Wed, 27 Sep 2023 10:02:00 +0100 Subject: [PATCH 08/12] predictPoint is predictSeries --- doc/LSTMClassifier.rst | 6 +++--- doc/LSTMForecast.rst | 2 +- doc/LSTMRegressor.rst | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/LSTMClassifier.rst b/doc/LSTMClassifier.rst index 05fd73b..be204b2 100644 --- a/doc/LSTMClassifier.rst +++ b/doc/LSTMClassifier.rst @@ -47,11 +47,11 @@ Predict labels for a :fluid-obj:`DataSeries` (given a trained network) -:message predictPoint: +:message predictSeries: - :arg sourceBuffer: Input point + :arg sourceBuffer: Input series - Predict a label for a single data point in a |buffer| + Predict a label for a single data series in a |buffer| :message clear: diff --git a/doc/LSTMForecast.rst b/doc/LSTMForecast.rst index ee85613..665bb9f 100644 --- a/doc/LSTMForecast.rst +++ b/doc/LSTMForecast.rst @@ -45,7 +45,7 @@ Predict continuations for a :fluid-obj:`DataSeries` (given a trained network) -:message predictPoint: +:message predictSeries: :arg sourceBuffer: Input series diff --git a/doc/LSTMRegressor.rst b/doc/LSTMRegressor.rst index e827875..abffb4b 100644 --- a/doc/LSTMRegressor.rst +++ b/doc/LSTMRegressor.rst @@ -47,13 +47,13 @@ Apply the learned mapping to a :fluid-obj:`DataSet` (given a trained network) -:message predictPoint: +:message predictSeries: :arg sourceBuffer: Input series :arg targetBuffer: Output point - Predict a label for a single data point in a |buffer| + Predict a point for a single data series in a |buffer| :message clear: From 7ee4a0efadbcc16da9d36e87d6db9f687ae13ca1 Mon Sep 17 00:00:00 2001 From: tremblap Date: Sun, 1 Oct 2023 18:30:04 +0100 Subject: [PATCH 09/12] add deeper network support and momentum and validation --- doc/LSTMClassifier.rst | 12 ++++++++++-- doc/LSTMForecast.rst | 12 ++++++++++-- doc/LSTMRegressor.rst | 12 ++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/doc/LSTMClassifier.rst b/doc/LSTMClassifier.rst index be204b2..c733954 100644 --- a/doc/LSTMClassifier.rst +++ b/doc/LSTMClassifier.rst @@ -15,9 +15,9 @@ Conceptually equivalent to the :fluid-obj:`MLPClassifier`, but where that maps a :fluid-obj:`DataSet` to a :fluid-obj:`LabelSet`, recurrent networks can encode time-based patterns and learn those much more efficiently, so map a :fluid-obj:`DataSeries` to a :fluid-obj:`LabelSet` -:control hiddenSize: +:control hiddenLayers: - Single number that specifies the size of the intermediate recurrent layer network. This roughly equates to how well it can learn complex series, in exchange for model size and training time. Changing this will reset the neural network, clearing any learning that has happened. + An array of numbers that specifies the internal structure of the neural network. Each number in the list represents one hidden layer of the neural network, the value of which is the number of neurons in that layer. Changing this will reset the neural network, clearing any learning that has happened. :control maxIter: @@ -27,10 +27,18 @@ A scalar for indicating how much the neural network should adjust its internal parameters during training. This is the most important parameter to adjust while training a neural network. +:control momentum: + + A scalar that applies a portion of previous adjustments to a current adjustment being made by the neural network during training. + :control batchSize: The number of data series to use in between adjustments of the LSTM's internal parameters during training. +:control validation: + + A percentage (represented as a decimal) of the data points to randomly select, set aside, and not use for training (this "validation set" is reselected on each ``fit``). These points will be used after each epoch to check how the neural network is performing. If it is found to be no longer improving, training will stop, even if a ``fit`` has not reached its ``maxIter`` number of epochs. + :message fit: :arg sourceDataSeries: Source data diff --git a/doc/LSTMForecast.rst b/doc/LSTMForecast.rst index 665bb9f..662a4a2 100644 --- a/doc/LSTMForecast.rst +++ b/doc/LSTMForecast.rst @@ -13,9 +13,9 @@ This object is not like anything that has been seen yet - it takes a single :fluid-obj:`DataSeries` and learns to predict continuations to the series with the same 'style'. It is currently rather limited in its ability, but will recieve improvements in predicting ability in the future! -:control hiddenSize: +:control hiddenLayers: - Single number that specifies the size of the intermediate recurrent layer network. This roughly equates to how well it can learn complex series, in exchange for model size and training time. Changing this will reset the neural network, clearing any learning that has happened. + An array of numbers that specifies the internal structure of the neural network. Each number in the list represents one hidden layer of the neural network, the value of which is the number of neurons in that layer. Changing this will reset the neural network, clearing any learning that has happened. :control maxIter: @@ -25,10 +25,18 @@ A scalar for indicating how much the neural network should adjust its internal parameters during training. This is the most important parameter to adjust while training a neural network. +:control momentum: + + A scalar that applies a portion of previous adjustments to a current adjustment being made by the neural network during training. + :control batchSize: The number of data series to use in between adjustments of the LSTM's internal parameters during training. +:control validation: + + A percentage (represented as a decimal) of the data points to randomly select, set aside, and not use for training (this "validation set" is reselected on each ``fit``). These points will be used after each epoch to check how the neural network is performing. If it is found to be no longer improving, training will stop, even if a ``fit`` has not reached its ``maxIter`` number of epochs. + :message fit: :arg sourceDataSeries: Source data diff --git a/doc/LSTMRegressor.rst b/doc/LSTMRegressor.rst index abffb4b..ae35cde 100644 --- a/doc/LSTMRegressor.rst +++ b/doc/LSTMRegressor.rst @@ -15,9 +15,9 @@ Conceptually equivalent to the :fluid-obj:`MLPRegressor`, but where that maps a :fluid-obj:`DataSet` to another, recurrent networks can encode time-based patterns and learn those much more efficiently. -:control hiddenSize: +:control hiddenLayers: - Single number that specifies the size of the intermediate recurrent layer network. This roughly equates to how well it can learn complex series, in exchange for model size and training time. Changing this will reset the neural network, clearing any learning that has happened. + An array of numbers that specifies the internal structure of the neural network. Each number in the list represents one hidden layer of the neural network, the value of which is the number of neurons in that layer. Changing this will reset the neural network, clearing any learning that has happened. :control maxIter: @@ -27,10 +27,18 @@ A scalar for indicating how much the neural network should adjust its internal parameters during training. This is the most important parameter to adjust while training a neural network. +:control momentum: + + A scalar that applies a portion of previous adjustments to a current adjustment being made by the neural network during training. + :control batchSize: The number of data series to use in between adjustments of the LSTM's internal parameters during training. +:control validation: + + A percentage (represented as a decimal) of the data points to randomly select, set aside, and not use for training (this "validation set" is reselected on each ``fit``). These points will be used after each epoch to check how the neural network is performing. If it is found to be no longer improving, training will stop, even if a ``fit`` has not reached its ``maxIter`` number of epochs. + :message fit: :arg sourceDataSeries: Source data From b9817b7ff1ed5ab70041d1392c8625a5d9e5b7db Mon Sep 17 00:00:00 2001 From: tremblap Date: Sun, 10 Mar 2024 17:13:37 +0000 Subject: [PATCH 10/12] rename forecaster --- doc/{LSTMForecast.rst => LSTMForecaster.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/{LSTMForecast.rst => LSTMForecaster.rst} (100%) diff --git a/doc/LSTMForecast.rst b/doc/LSTMForecaster.rst similarity index 100% rename from doc/LSTMForecast.rst rename to doc/LSTMForecaster.rst From 7ab50a804f005a0bbd759755fb6da894d0e6af8d Mon Sep 17 00:00:00 2001 From: tremblap Date: Sun, 10 Mar 2024 17:14:02 +0000 Subject: [PATCH 11/12] add placeholder examples --- example-code/sc/DataSeries.scd | 5 +++++ example-code/sc/LSTMClassifier.scd | 5 +++++ example-code/sc/LSTMForecaster.scd | 5 +++++ example-code/sc/LSTMRegressor.scd | 5 +++++ 4 files changed, 20 insertions(+) create mode 100644 example-code/sc/DataSeries.scd create mode 100644 example-code/sc/LSTMClassifier.scd create mode 100644 example-code/sc/LSTMForecaster.scd create mode 100644 example-code/sc/LSTMRegressor.scd diff --git a/example-code/sc/DataSeries.scd b/example-code/sc/DataSeries.scd new file mode 100644 index 0000000..211b722 --- /dev/null +++ b/example-code/sc/DataSeries.scd @@ -0,0 +1,5 @@ +code:: + +//soon + +:: \ No newline at end of file diff --git a/example-code/sc/LSTMClassifier.scd b/example-code/sc/LSTMClassifier.scd new file mode 100644 index 0000000..211b722 --- /dev/null +++ b/example-code/sc/LSTMClassifier.scd @@ -0,0 +1,5 @@ +code:: + +//soon + +:: \ No newline at end of file diff --git a/example-code/sc/LSTMForecaster.scd b/example-code/sc/LSTMForecaster.scd new file mode 100644 index 0000000..211b722 --- /dev/null +++ b/example-code/sc/LSTMForecaster.scd @@ -0,0 +1,5 @@ +code:: + +//soon + +:: \ No newline at end of file diff --git a/example-code/sc/LSTMRegressor.scd b/example-code/sc/LSTMRegressor.scd new file mode 100644 index 0000000..211b722 --- /dev/null +++ b/example-code/sc/LSTMRegressor.scd @@ -0,0 +1,5 @@ +code:: + +//soon + +:: \ No newline at end of file From d94013d867c52570e8be0e25b770ad250173ca8a Mon Sep 17 00:00:00 2001 From: tremblap Date: Fri, 15 Mar 2024 14:23:20 +0000 Subject: [PATCH 12/12] correct cross reference to forecaster --- doc/LSTMClassifier.rst | 2 +- doc/LSTMRegressor.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/LSTMClassifier.rst b/doc/LSTMClassifier.rst index c733954..f5cf7b6 100644 --- a/doc/LSTMClassifier.rst +++ b/doc/LSTMClassifier.rst @@ -2,7 +2,7 @@ :species: data :sc-categories: Machine learning, Classification, LSTM :sc-related: -:see-also: LSTMRegressor, LSTMForecast, DataSeries, LabelSet +:see-also: LSTMRegressor, LSTMForecaster, DataSeries, LabelSet :description: Perform classification between a :fluid-obj:`DataSeries` and a :fluid-obj:`LabelSet` using a long-short term memory recurrent neural network (LSTM) diff --git a/doc/LSTMRegressor.rst b/doc/LSTMRegressor.rst index ae35cde..11ee230 100644 --- a/doc/LSTMRegressor.rst +++ b/doc/LSTMRegressor.rst @@ -2,7 +2,7 @@ :species: data :sc-categories: Machine learning, Regression, LSTM :sc-related: -:see-also: LSTMClassifier, LSTMForecast, DataSeries, DataSet +:see-also: LSTMClassifier, LSTMForecaster, DataSeries, DataSet :description: Perform regression between a :fluid-obj:`DataSeries` and a :fluid-obj:`DataSet` using a long-short term memory recurrent neural network (LSTM)