Unable to optimize parameters in Fit Trend-SVM model
Hello,
i am trying to oprimize the SVM parameter C using GRID optimization in an attempth to fit the trend of a stock market as better as possible. I get the error message: Trend(label) attribute is duplicate. What am i doing wrong?
here is the process:
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.000-BETA">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.000-BETA" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="7.3.000-BETA" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
<parameter key="excel_file" value="C:\Users\Manolis\Desktop\Book1.xlsx"/>
<parameter key="imported_cell_range" value="A1:I1670"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="Trade Date.true.polynominal.attribute"/>
<parameter key="1" value="High.true.real.attribute"/>
<parameter key="2" value="Low.true.real.attribute"/>
<parameter key="3" value="Open.true.real.attribute"/>
<parameter key="4" value="Close.true.numeric.attribute"/>
<parameter key="5" value="Volume.true.integer.attribute"/>
<parameter key="6" value="Prev\. Close.true.numeric.attribute"/>
<parameter key="7" value="Total Turnover.true.numeric.attribute"/>
<parameter key="8" value="Num\. Of Trans\..true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="generate_id" compatibility="7.3.000-BETA" expanded="true" height="82" name="Generate ID" width="90" x="112" y="187"/>
<operator activated="true" class="sort" compatibility="7.3.000-BETA" expanded="true" height="82" name="Sort" width="90" x="313" y="187">
<parameter key="attribute_name" value="id"/>
<parameter key="sorting_direction" value="decreasing"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.3.000-BETA" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="442">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Volume|Total Turnover|Open|Num. Of Trans.|Low|High|Close"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.2.000" expanded="true" height="82" name="Windowing" width="90" x="447" y="544">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="Close"/>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.3.000-BETA" expanded="true" height="103" name="Optimize Parameters (Grid)" width="90" x="648" y="544">
<list key="parameters">
<parameter key="SVM.C" value="[0;1;100;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="series:fit_trend" compatibility="7.2.000" expanded="true" height="68" name="Fit Trend" width="90" x="313" y="289">
<parameter key="attribute" value="label"/>
<process expanded="true">
<operator activated="false" class="h2o:generalized_linear_model" compatibility="7.3.000-BETA" expanded="true" height="103" name="Generalized Linear Model (2)" width="90" x="179" y="238">
<list key="beta_constraints"/>
<list key="expert_parameters"/>
</operator>
<operator activated="false" class="neural_net" compatibility="7.3.000-BETA" expanded="true" height="82" name="Neural Net" width="90" x="380" y="289">
<list key="hidden_layers"/>
</operator>
<operator activated="true" class="support_vector_machine" compatibility="7.3.000-BETA" expanded="true" height="124" name="SVM" width="90" x="514" y="391">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="5.0"/>
<parameter key="kernel_cache" value="200000"/>
<parameter key="C" value="0.01"/>
<parameter key="convergence_epsilon" value="0.01"/>
<parameter key="max_iterations" value="10000000"/>
<parameter key="L_pos" value="3.0"/>
<parameter key="L_neg" value="3.0"/>
<parameter key="epsilon" value="0.01"/>
<parameter key="epsilon_plus" value="0.01"/>
</operator>
<operator activated="false" class="h2o:deep_learning" compatibility="7.3.000-BETA" expanded="true" height="82" name="Deep Learning (2)" width="90" x="313" y="136">
<parameter key="activation" value="Tanh"/>
<enumeration key="hidden_layer_sizes">
<parameter key="hidden_layer_sizes" value="3"/>
<parameter key="hidden_layer_sizes" value="2"/>
<parameter key="hidden_layer_sizes" value="3"/>
<parameter key="hidden_layer_sizes" value="2"/>
</enumeration>
<enumeration key="hidden_dropout_ratios"/>
<list key="expert_parameters"/>
<list key="expert_parameters_"/>
</operator>
<connect from_port="example set" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="7.3.000-BETA" expanded="true" height="82" name="Set Role" width="90" x="514" y="187">
<parameter key="attribute_name" value="trend(label)"/>
<parameter key="target_role" value="prediction"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="7.3.000-BETA" expanded="true" height="82" name="Performance (3)" width="90" x="715" y="340"/>
<connect from_port="input 1" to_op="Fit Trend" to_port="example set"/>
<connect from_op="Fit Trend" from_port="example set with trend" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Performance (3)" to_port="labelled data"/>
<connect from_op="Performance (3)" from_port="performance" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Thanks
MM
PS: here is the stack trace
Exception: java.lang.IllegalArgumentException
Message: Duplicate attribute name: trend(label)
Stack trace:
com.rapidminer.example.SimpleAttributes.register(SimpleAttributes.java:124)
com.rapidminer.example.SimpleAttributes.add(SimpleAttributes.java:203)
com.rapidminer.example.AbstractAttributes.addRegular(AbstractAttributes.java:94)
com.rapidminer.operator.preprocessing.series.filter.Trend.doWork(Trend.java:153)
com.rapidminer.operator.Operator.execute(Operator.java:1005)
com.rapidminer.operator.execution.SimpleUnitExecutor.execute(SimpleUnitExecutor.java:76)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:812)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:807)
java.security.AccessController.doPrivileged(Native Method)
com.rapidminer.operator.ExecutionUnit.execute(ExecutionUnit.java:807)
com.rapidminer.operator.meta.ParameterIteratingOperatorChain.executeSubprocess(ParameterIteratingOperatorChain.java:262)
com.rapidminer.operator.meta.ParameterIteratingOperatorChain.getPerformanceVector(ParameterIteratingOperatorChain.java:305)
com.rapidminer.operator.meta.GridSearchParameterOptimizationOperator.computeCurrentPerformance(GridSearchParameterOptimizationOperator.java:113)
com.rapidminer.operator.meta.GridSearchParameterOptimizationOperator.doWork(GridSearchParameterOptimizationOperator.java:177)
com.rapidminer.operator.Operator.execute(Operator.java:1005)
com.rapidminer.operator.execution.SimpleUnitExecutor.execute(SimpleUnitExecutor.java:76)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:812)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:807)
java.security.AccessController.doPrivileged(Native Method)
com.rapidminer.operator.ExecutionUnit.execute(ExecutionUnit.java:807)
com.rapidminer.operator.OperatorChain.doWork(OperatorChain.java:428)
com.rapidminer.operator.Operator.execute(Operator.java:1005)
com.rapidminer.Process.run(Process.java:1195)
com.rapidminer.Process.run(Process.java:1091)
com.rapidminer.Process.run(Process.java:1044)
com.rapidminer.Process.run(Process.java:1039)
com.rapidminer.Process.run(Process.java:1029)
com.rapidminer.gui.ProcessThread.run(ProcessThread.java:65)
Answers
@stevefarr please move this thread to the Studio forum. This sould not be in the Building Block forum.
Thanks.
Hi,
I checked out your process and see a few things that are not quite correct. You vary the C by decimal increments: 1.1, 1.2, 1.3, that's not very effecient and you won't get anything bump in performance. May I suggest using ten steps and varying from 0 to 10,000?
In addition, with time series you have to use the Sliding Window Validation and nest it with the Forecast Performance operator, then you can optimize trend accuracy.
Update: See XML below as an example.