Clustering and Performance
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="logfile" value="C:\Users\Damiano\Downloads\log1Damiano.csv"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="loop_files" compatibility="7.3.001" expanded="true" height="82" name="Loop Files" width="90" x="179" y="34">
<parameter key="directory" value="C:\Users\Damiano\Desktop\Loop Process"/>
<parameter key="filtered_string" value="file name (last part of the path)"/>
<parameter key="file_name_macro" value="file_name"/>
<parameter key="file_path_macro" value="file_path"/>
<parameter key="parent_path_macro" value="parent_path"/>
<parameter key="recursive" value="false"/>
<parameter key="iterate_over_files" value="true"/>
<parameter key="iterate_over_subdirs" value="false"/>
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="7.3.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
<parameter key="column_separators" value=";"/>
<parameter key="trim_lines" value="false"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="""/>
<parameter key="escape_character" value="\"/>
<parameter key="skip_comments" value="false"/>
<parameter key="comment_characters" value="#"/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="date_format" value=""/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="encoding" value="SYSTEM"/>
<list key="data_set_meta_data_information"/>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.3.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="136">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="generate_id" compatibility="7.3.001" expanded="true" height="82" name="Generate ID" width="90" x="380" y="85">
<parameter key="create_nominal_ids" value="true"/>
<parameter key="offset" value="0"/>
</operator>
<operator activated="true" class="loop_parameters" compatibility="7.3.001" expanded="true" height="103" name="Loop Parameters" width="90" x="514" y="136">
<list key="parameters">
<parameter key="Clustering.k" value="[2.0;100.0;10;linear]"/>
</list>
<parameter key="error_handling" value="fail on error"/>
<parameter key="synchronize" value="false"/>
<process expanded="true">
<operator activated="true" class="set_role" compatibility="7.3.001" expanded="true" height="82" name="Set Role (4)" width="90" x="179" y="34">
<parameter key="attribute_name" value="id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation" width="90" x="380" y="34">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="10"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="k_means" compatibility="7.3.001" expanded="true" height="82" name="Clustering" width="90" x="112" y="34">
<parameter key="add_cluster_attribute" value="true"/>
<parameter key="add_as_label" value="false"/>
<parameter key="remove_unlabeled" value="false"/>
<parameter key="k" value="2"/>
<parameter key="max_runs" value="10"/>
<parameter key="determine_good_start_values" value="false"/>
<parameter key="measure_types" value="BregmanDivergences"/>
<parameter key="mixed_measure" value="MixedEuclideanDistance"/>
<parameter key="nominal_measure" value="NominalDistance"/>
<parameter key="numerical_measure" value="EuclideanDistance"/>
<parameter key="divergence" value="SquaredEuclideanDistance"/>
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_degree" value="3.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
<parameter key="max_optimization_steps" value="100"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<connect from_port="training set" to_op="Clustering" to_port="example set"/>
<connect from_op="Clustering" from_port="cluster model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.3.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance" compatibility="7.3.001" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="34">
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="performance_to_data" compatibility="7.3.001" expanded="true" height="82" name="Performance to Data" width="90" x="112" y="391"/>
<operator activated="true" class="write_csv" compatibility="7.3.001" expanded="true" height="82" name="Write CSV" width="90" x="179" y="238">
<parameter key="csv_file" value="C:\Users\Damiano\Documents\risultati.csv"/>
<parameter key="column_separator" value=";"/>
<parameter key="write_attribute_names" value="true"/>
<parameter key="quote_nominal_values" value="true"/>
<parameter key="format_date_attributes" value="true"/>
<parameter key="append_to_file" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_op="Performance to Data" to_port="performance vector"/>
<connect from_op="Performance to Data" from_port="example set" to_op="Write CSV" to_port="input"/>
<connect from_op="Performance to Data" from_port="performance vector" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="example set" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
<operator activated="false" class="set_role" compatibility="7.3.001" expanded="true" height="82" name="Set Role" width="90" x="45" y="391">
<parameter key="attribute_name" value="traceId"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<connect from_port="file object" to_op="Read CSV" to_port="file"/>
<connect from_op="Read CSV" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Loop Parameters" to_port="input 1"/>
<connect from_op="Loop Parameters" from_port="result 1" to_port="out 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_op="Loop Files" from_port="out 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Answers
Goodmorning everyone, I need your help.
I can not run this process with clustering and performance.
Attached please find the file to be included in the block "loop files"
Why are you using Loop Files when you only have 1 CSV file?
Was what I posted here: http://community.rapidminer.com/t5/RapidMiner-Studio/CROSS-VALIDATION/m-p/36389#M25306 not useful?
Hi Thomas, I have 2 CSV file, sorry. My professor told me to use "Loop File" and consider going to set before clustering and their performace and then the decision tree and its performance. These values if I understand it must be set in the "loop parameters".
Attached I send you the second CSV file.
I hope for your help!