Time Lags Using the Windowing for Training and Sliding Window Validation Operator
Hi there,
I am new to rapidminer and having issues in running a model to predict appliance usage based on a time series. The time is in 10 minute intervals. There are some additional pre-processing steps also included.
The process is failing to run and I can't figure out how to fix or improve. Would welcome any suggestions.
Thanks.
Current process...
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.1.001" expanded="true" height="68" name="Retrieve Energy Set Version 2 " width="90" x="45" y="136">
<parameter key="repository_entry" value="//Algorithm Module - College/Data Science Project/Energy Set Version 2 "/>
</operator>
<operator activated="true" class="nominal_to_date" compatibility="8.1.001" expanded="true" height="82" name="Nominal to Date" width="90" x="179" y="136">
<parameter key="attribute_name" value="date"/>
<parameter key="date_type" value="date_time"/>
<parameter key="date_format" value="MM/dd/yyyy h:mm"/>
</operator>
<operator activated="true" class="anomalydetection:Local Outlier Factor (LOF)" compatibility="2.4.001" expanded="true" height="103" name="Local Outlier Factor (LOF)" width="90" x="313" y="34"/>
<operator activated="true" class="filter_examples" compatibility="8.1.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="34">
<parameter key="invert_filter" value="true"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="outlier.gt.1\.145"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="8.1.001" expanded="true" height="82" name="Set Role" width="90" x="581" y="34">
<parameter key="attribute_name" value="date"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing for Training" width="90" x="715" y="34">
<parameter key="window_size" value="144"/>
<parameter key="step_size" value="6"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="Appliances"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Sliding W. Validation" width="90" x="581" y="187">
<parameter key="training_window_width" value="144"/>
<parameter key="training_window_step_size" value="1"/>
<parameter key="horizon" value="5"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="8.1.001" expanded="true" height="124" name="SVM" width="90" x="246" y="85"/>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<connect from_op="SVM" from_port="estimated performance" to_port="through 1"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
<portSpacing port="sink_through 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="246" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Date|label|prediction(label)"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="8.1.001" expanded="true" height="82" name="Performance" width="90" x="380" y="136"/>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="source_through 2" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.001" expanded="true" height="82" name="Select Attributes" width="90" x="916" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="label|Measure-0|Date"/>
</operator>
<connect from_op="Retrieve Energy Set Version 2 " from_port="output" to_op="Nominal to Date" to_port="example set input"/>
<connect from_op="Nominal to Date" from_port="example set output" to_op="Local Outlier Factor (LOF)" to_port="example set"/>
<connect from_op="Local Outlier Factor (LOF)" from_port="example set" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
<connect from_op="Windowing for Training" from_port="example set output" to_op="Sliding W. Validation" to_port="training"/>
<connect from_op="Sliding W. Validation" from_port="model" to_port="result 2"/>
<connect from_op="Sliding W. Validation" from_port="training" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Sliding W. Validation" from_port="averagable 1" to_port="result 3"/>
<connect from_op="Select Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="252"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="21"/>
</process>
</operator>
</process>
Answers
Hi @pix123,
Can you share your dataset in order we can reproduce the error ?
Regards,
Lionel
Thanks Lionel, please find attached.
Hi again @pix123,
Here you can find a process to forecast "Appliances".
1. In this version of process, Window size = 20, you can modify this setting by :
- setting this parameter inside the Set Predictions_Params subprocess operator.
and
- modifying the parameters of Select Attributes and Rename operators inside the Loop operator.
2. To improve this process, you can test differents models (Deep learning model is chosen arbitrarily....)
and you can use the Optimize Parameters operator to determine the combinaison of parameters to obtain the best performance for your model.
The process :
I hope it helps,
Regards,
Lionel
Hi @lionelderkrikor apologies for the delay. Thank you for this, it has been helpful.
My machine currently has performance issues due to the memory needed to run a large process.
Is there an efficient way to run a prediction model without the need to window?
Thanks again.