The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
CROSS VALIDATION
fedayncarica
Member Posts: 30 Contributor I
Good Mornig. How can I do to enter a within my trial the block CROSS VALIDATION?
0
Best Answer
-
Thomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn
No need to go crazy. I think you want to do something like this.
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.001" expanded="true" name="Process">
<parameter key="logfile" value="C:\Users\Damiano\Downloads\log1Damiano.csv"/>
<process expanded="true">
<operator activated="false" class="loop_files" compatibility="7.3.001" expanded="true" height="82" name="Loop Files" width="90" x="112" y="289">
<parameter key="directory" value="C:\Users\Damiano\Desktop\prova"/>
<process expanded="true">
<operator activated="true" class="loop_parameters" compatibility="7.3.001" expanded="true" height="103" name="Loop Parameters" width="90" x="581" y="34">
<list key="parameters">
<parameter key="Clustering.k" value="[2.0;5;5;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="k_means" compatibility="7.3.001" expanded="true" height="82" name="Clustering" width="90" x="112" y="34">
<parameter key="max_runs" value="1"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.3.001" expanded="true" height="82" name="Set Role (4)" width="90" x="313" y="136">
<parameter key="attribute_name" value="id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation" width="90" x="447" y="136">
<process expanded="true">
<operator activated="true" class="set_role" compatibility="7.3.001" expanded="true" height="82" name="Set Role (3)" width="90" x="179" y="34">
<parameter key="attribute_name" value="id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="parallel_decision_tree" compatibility="7.3.001" expanded="true" height="82" name="Decision Tree" width="90" x="380" y="34">
<parameter key="criterion" value="gini_index"/>
</operator>
<connect from_port="training set" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.3.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.3.001" expanded="true" height="82" name="Set Role (2)" width="90" x="179" y="136">
<parameter key="attribute_name" value="id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="performance" compatibility="7.3.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34"/>
<operator activated="true" class="performance_to_data" compatibility="7.3.001" expanded="true" height="82" name="Performance to Data" width="90" x="447" y="34"/>
<operator activated="true" class="write_csv" compatibility="7.3.001" expanded="true" height="82" name="Write CSV" width="90" x="648" y="34">
<parameter key="csv_file" value="C:\Users\Damiano\Documents\prova risultati.csv"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Set Role (2)" from_port="original" to_port="test set results"/>
<connect from_op="Performance" from_port="performance" to_op="Performance to Data" to_port="performance vector"/>
<connect from_op="Performance to Data" from_port="example set" to_op="Write CSV" to_port="input"/>
<connect from_op="Performance to Data" from_port="performance vector" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Clustering" to_port="example set"/>
<connect from_op="Clustering" from_port="cluster model" to_port="result 1"/>
<connect from_op="Clustering" from_port="clustered set" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="example set" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
<operator activated="false" class="set_role" compatibility="7.3.001" expanded="true" height="82" name="Set Role" width="90" x="45" y="391">
<parameter key="attribute_name" value="traceId"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<connect from_op="Loop Parameters" from_port="result 1" to_port="out 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="read_csv" compatibility="7.3.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
<parameter key="csv_file" value="C:\Users\ThomasOtt\Downloads\log1Damiano (1).csv"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="traceId.true.polynominal.attribute"/>
<parameter key="1" value="padiglione cina.true.integer.attribute"/>
<parameter key="2" value="albero della vita.true.integer.attribute"/>
<parameter key="3" value="padiglione giappone.true.integer.attribute"/>
<parameter key="4" value="padiglione brasile.true.integer.attribute"/>
<parameter key="5" value="padiglione russia.true.integer.attribute"/>
<parameter key="6" value="padiglione ecuador.true.integer.attribute"/>
<parameter key="7" value="padiglione italia.true.integer.attribute"/>
<parameter key="8" value="padiglione austria.true.integer.attribute"/>
<parameter key="9" value="padiglione oman.true.integer.attribute"/>
<parameter key="10" value="padiglione nepal.true.integer.attribute"/>
<parameter key="11" value="padiglione belgio.true.integer.attribute"/>
<parameter key="12" value="padiglione slovacchia.true.integer.attribute"/>
<parameter key="13" value="padiglione estonia.true.integer.attribute"/>
<parameter key="14" value="padiglione zero.true.integer.attribute"/>
<parameter key="15" value="padiglione usa.true.integer.attribute"/>
<parameter key="16" value="padiglione polonia.true.integer.attribute"/>
<parameter key="17" value="padiglione costa d'avorio.true.integer.attribute"/>
<parameter key="18" value="padiglione olanda.true.integer.attribute"/>
<parameter key="19" value="padiglione germania.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.3.001" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="34">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="value_type" value="nominal"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="generate_id" compatibility="7.3.001" expanded="true" height="82" name="Generate ID" width="90" x="380" y="34">
<parameter key="create_nominal_ids" value="true"/>
</operator>
<operator activated="true" class="loop" compatibility="7.3.001" expanded="true" height="82" name="Loop" width="90" x="514" y="34">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="macro_start_value" value="2"/>
<parameter key="iterations" value="5"/>
<process expanded="true">
<operator activated="true" class="k_means" compatibility="7.3.001" expanded="true" height="82" name="Clustering (2)" width="90" x="45" y="34">
<parameter key="k" value="%{iteration}"/>
<parameter key="max_runs" value="1"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.3.001" expanded="true" height="82" name="Set Role (5)" width="90" x="179" y="34">
<parameter key="attribute_name" value="cluster"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation (2)" width="90" x="313" y="34">
<process expanded="true">
<operator activated="true" class="parallel_decision_tree" compatibility="7.3.001" expanded="true" height="82" name="Decision Tree (2)" width="90" x="112" y="34">
<parameter key="criterion" value="gini_index"/>
</operator>
<connect from_port="training set" to_op="Decision Tree (2)" to_port="training set"/>
<connect from_op="Decision Tree (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.3.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="7.3.001" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="34"/>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="performance_to_data" compatibility="7.3.001" expanded="true" height="82" name="Performance to Data (2)" width="90" x="447" y="34"/>
<operator activated="true" class="store" compatibility="7.3.001" expanded="true" height="68" name="Store" width="90" x="581" y="34">
<parameter key="repository_entry" value="../data/processo_kvalue_%{iteration}"/>
</operator>
<connect from_port="input 1" to_op="Clustering (2)" to_port="example set"/>
<connect from_op="Clustering (2)" from_port="clustered set" to_op="Set Role (5)" to_port="example set input"/>
<connect from_op="Set Role (5)" from_port="example set output" to_op="Cross Validation (2)" to_port="example set"/>
<connect from_op="Cross Validation (2)" from_port="performance 1" to_op="Performance to Data (2)" to_port="performance vector"/>
<connect from_op="Performance to Data (2)" from_port="example set" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Loop" to_port="input 1"/>
<connect from_op="Loop" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0
Answers
Did you watch this video on how to do Cross Validation: https://youtu.be/86EbKAtvYoI?list=PLssWC2d9JhOZLbQNZ80uOxLypglgWqbJA first?
i'm not sure what you are doing this process. what is the goal here? You embed a clustering algorithm which typically is not used with a cross validation.
I saw another video, but I can not do it.
Best regards,
Are you trying to use Cross Validaiton with the Clustering algorithm? What is this process trying to do?
Yes,I enter the Validation block to get not only the results of clustering but also to see their performance!
Your data must have a class labels to use Cross Validation, you can do your clustering and feed the results with labels into a cross validation and use another algorithm to train on the data, then measure the classification performance.
There are clustering performance measures too, like Cluster Density or Cluster Distances.
Hi Thomas, I tried to do what you said. I am sending you the attached process, you can tell me if it's okay?
thank you very much for your availability
It looks ok, a bit hard to trouble shoot without data. The only question I have is why do you have set role operators in the testing and training side? The Set Role operator before the Cross Validation will take care of that.
I removed the set role, Because with this data set attached I can not see the performance? I am going crazy