The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Reduction Dimensionality
Hey Guys,
We have applied the PCA and ChiSquared methods to the topic of dimensionality reduction. This reduces the data volume considerably, which is my question: Can I remove the main component analysis PCA and leave the Weight By PCA method?
I would like to thank you for answering my question.
I would like to thank you for answering my question.
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="UTF-8"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve reut2" width="90" x="45" y="187">
<parameter key="repository_entry" value="reut2"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="exchanges|orgs|people|places|text_orig|topics|zahlen"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="generate_id" compatibility="9.2.001" expanded="true" height="82" name="Generate ID" width="90" x="313" y="187">
<parameter key="create_nominal_ids" value="false"/>
<parameter key="offset" value="0"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="45" y="289">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="category.does_not_equal.?"/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.2.001" expanded="true" height="82" name="Set Role" width="90" x="179" y="289">
<parameter key="attribute_name" value="category"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="remove_correlated_attributes" compatibility="9.2.001" expanded="true" height="82" name="Remove Correlated Attributes" width="90" x="313" y="289">
<parameter key="correlation" value="0.8"/>
<parameter key="filter_relation" value="greater"/>
<parameter key="attribute_order" value="random"/>
<parameter key="use_absolute_correlation" value="true"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="subprocess" compatibility="9.2.001" expanded="true" height="124" name="Subprocess" width="90" x="581" y="187">
<process expanded="true">
<operator activated="true" class="multiply" compatibility="9.2.001" expanded="true" height="124" name="Multiply (2)" width="90" x="380" y="442"/>
<operator activated="true" class="weight_by_chi_squared_statistic" compatibility="9.2.001" expanded="true" height="82" name="Weight by Chi Squared Statistic" width="90" x="648" y="289">
<parameter key="normalize_weights" value="false"/>
<parameter key="sort_weights" value="true"/>
<parameter key="sort_direction" value="descending"/>
<parameter key="number_of_bins" value="10"/>
</operator>
<operator activated="true" class="select_by_weights" compatibility="9.2.001" expanded="true" height="103" name="Select by Weights (ChiSq)" width="90" x="849" y="289">
<parameter key="weight_relation" value="top k"/>
<parameter key="weight" value="10.0"/>
<parameter key="k" value="45"/>
<parameter key="p" value="0.1"/>
<parameter key="deselect_unknown" value="true"/>
<parameter key="use_absolute_weights" value="false"/>
</operator>
<operator activated="true" class="store" compatibility="9.2.001" expanded="true" height="68" name="Store" width="90" x="1050" y="289">
<parameter key="repository_entry" value="data/data_out_select_by_chisq_weights"/>
</operator>
<operator activated="true" class="principal_component_analysis" compatibility="9.2.001" expanded="true" height="103" name="PCA" width="90" x="648" y="442">
<parameter key="dimensionality_reduction" value="keep variance"/>
<parameter key="variance_threshold" value="0.8"/>
<parameter key="number_of_components" value="1"/>
</operator>
<operator activated="true" class="weight_by_pca" compatibility="9.2.001" expanded="true" height="82" name="Weight by PCA" width="90" x="648" y="595">
<parameter key="normalize_weights" value="false"/>
<parameter key="sort_weights" value="true"/>
<parameter key="sort_direction" value="ascending"/>
<parameter key="component_number" value="1"/>
</operator>
<operator activated="true" class="select_by_weights" compatibility="9.2.001" expanded="true" height="103" name="Select by Weights (PCA)" width="90" x="849" y="595">
<parameter key="weight_relation" value="top k"/>
<parameter key="weight" value="10.0"/>
<parameter key="k" value="45"/>
<parameter key="p" value="0.1"/>
<parameter key="deselect_unknown" value="true"/>
<parameter key="use_absolute_weights" value="true"/>
</operator>
<operator activated="true" class="store" compatibility="9.2.001" expanded="true" height="68" name="Store (3)" width="90" x="1050" y="595">
<parameter key="repository_entry" value="data/data_out_select_by_pca_weights"/>
</operator>
<operator activated="true" class="store" compatibility="9.2.001" expanded="true" height="68" name="Store (2)" width="90" x="1050" y="442">
<parameter key="repository_entry" value="data/data_out_pca"/>
</operator>
<connect from_port="in 1" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Weight by Chi Squared Statistic" to_port="example set"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="PCA" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 3" to_op="Weight by PCA" to_port="example set"/>
<connect from_op="Weight by Chi Squared Statistic" from_port="weights" to_op="Select by Weights (ChiSq)" to_port="weights"/>
<connect from_op="Weight by Chi Squared Statistic" from_port="example set" to_op="Select by Weights (ChiSq)" to_port="example set input"/>
<connect from_op="Select by Weights (ChiSq)" from_port="example set output" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="out 1"/>
<connect from_op="PCA" from_port="example set output" to_op="Store (2)" to_port="input"/>
<connect from_op="Weight by PCA" from_port="weights" to_op="Select by Weights (PCA)" to_port="weights"/>
<connect from_op="Weight by PCA" from_port="example set" to_op="Select by Weights (PCA)" to_port="example set input"/>
<connect from_op="Select by Weights (PCA)" from_port="example set output" to_op="Store (3)" to_port="input"/>
<connect from_op="Store (3)" from_port="through" to_port="out 3"/>
<connect from_op="Store (2)" from_port="through" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
<portSpacing port="sink_out 4" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Subprozess</description>
</operator>
<connect from_op="Retrieve reut2" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Remove Correlated Attributes" to_port="example set input"/>
<connect from_op="Remove Correlated Attributes" from_port="example set output" to_op="Subprocess" to_port="in 1"/>
<connect from_op="Subprocess" from_port="out 1" to_port="result 1"/>
<connect from_op="Subprocess" from_port="out 2" to_port="result 2"/>
<connect from_op="Subprocess" from_port="out 3" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
0
Best Answer
-
varunm1 Member Posts: 1,207 UnicornIf you are not using then you can remove PCA, if you just need weights from PCA you can keep the current Weights to PCA operator and remove the PCA operator. You can tailor the process for your needs.Regards,
Varun
https://www.varunmandalapu.com/Be Safe. Follow precautions and Maintain Social Distancing
5
Answers
Sorry, can you explain your question? I saw your process and it looks fine. Why are you asking to remove PCA?
Varun
https://www.varunmandalapu.com/
Be Safe. Follow precautions and Maintain Social Distancing