The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
How to perform aggregation on year and half year data of customer
sgnarkhede2016
Member Posts: 152 Contributor II
Hello , |
form the attached file I want to generate the below table but from generation of data I have a Configuration file that contains one column "BIN" contains month, quarter,half-year and year, another column "ENABLE" that contain Yes or No. I used the filter operator to calculate values only on Yes attribute and store in the respective columns.
Customer Daily-AvgAmt | Daily-STDAmt | Daily-AvgVolume | Daily-STDVolume |
above for daily same column added for Monthly/Yearly
I want to calculate the "number of transaction", "avg amount"," std devation", "Daily Limit " of the customer as per selected value in the config(Year/Half year)
<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.5.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Config_ProfileOnBins" width="90" x="45" y="34">
<parameter key="repository_entry" value="../Data/Config_ProfileOnBins"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.5.001" expanded="true" height="103" name="Filter Examples (22)" width="90" x="179" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="ENABLE.equals.Yes"/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="34">
<parameter key="macro" value="number_of_bins"/>
<parameter key="macro_type" value="number_of_examples"/>
<parameter key="statistics" value="average"/>
<parameter key="attribute_name" value=""/>
<list key="additional_macros"/>
</operator>
<operator activated="false" class="utility:create_exampleset" compatibility="9.5.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="595">
<parameter key="generator_type" value="numeric series"/>
<parameter key="number_of_examples" value="1000"/>
<parameter key="use_stepsize" value="true"/>
<list key="function_descriptions"/>
<parameter key="add_id_attribute" value="false"/>
<list key="numeric_series_configuration">
<parameter key="ProfileID" value="linear.1\.0.1\.0"/>
</list>
<list key="date_series_configuration"/>
<list key="date_series_configuration (interval)"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="column_separator" value=","/>
<parameter key="parse_all_as_nominal" value="false"/>
<parameter key="decimal_point_character" value="."/>
<parameter key="trim_attribute_names" value="true"/>
</operator>
<operator activated="false" class="generate_attributes" compatibility="9.5.001" expanded="true" height="82" name="Generate Attributes (5)" width="90" x="246" y="595">
<list key="function_descriptions">
<parameter key="NEW_ID" value="concat("CBP",suffix(concat("0000",str(ProfileID)),5))"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve dummy Data" width="90" x="45" y="238">
<parameter key="repository_entry" value="../Data/dummy Data"/>
</operator>
<operator activated="true" breakpoints="after" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="179" y="238">
<process expanded="true">
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical" width="90" x="45" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="year"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="year"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="era"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (2)" width="90" x="179" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="quarter"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="year"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Exract Quarter of Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (3)" width="90" x="313" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="month"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="year"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Month of Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (4)" width="90" x="447" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="week"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="month"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Week of Month</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (6)" width="90" x="581" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="half year"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="month"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Half of Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (15)" width="90" x="715" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="day"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="week"/>
<parameter key="week_relative_to" value="month"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Day of Month</description>
</operator>
<operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename Extracted Features" width="90" x="849" y="34">
<parameter key="old_name" value="InputDateTime_year"/>
<parameter key="new_name" value="YEAR"/>
<list key="rename_additional_attributes">
<parameter key="InputDateTime_quarter" value="QUARTER"/>
<parameter key="InputDateTime_month" value="MONTH"/>
<parameter key="InputDateTime_week" value="WEEK"/>
<parameter key="InputDateTime_half year" value="HALF YEAR"/>
<parameter key="InputDateTime_day" value="DAY"/>
</list>
</operator>
<connect from_port="in 1" to_op="Date to Numerical" to_port="example set input"/>
<connect from_op="Date to Numerical" from_port="example set output" to_op="Date to Numerical (2)" to_port="example set input"/>
<connect from_op="Date to Numerical (2)" from_port="example set output" to_op="Date to Numerical (3)" to_port="example set input"/>
<connect from_op="Date to Numerical (3)" from_port="example set output" to_op="Date to Numerical (4)" to_port="example set input"/>
<connect from_op="Date to Numerical (4)" from_port="example set output" to_op="Date to Numerical (6)" to_port="example set input"/>
<connect from_op="Date to Numerical (6)" from_port="example set output" to_op="Date to Numerical (15)" to_port="example set input"/>
<connect from_op="Date to Numerical (15)" from_port="example set output" to_op="Rename Extracted Features" to_port="example set input"/>
<connect from_op="Rename Extracted Features" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Data Preparation</description>
</operator>
<operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename" width="90" x="313" y="238">
<parameter key="old_name" value="Amount"/>
<parameter key="new_name" value="AMOUNT"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="concurrency:loop_values" compatibility="9.5.001" expanded="true" height="103" name="Loop Values" width="90" x="514" y="187">
<parameter key="attribute" value="%{number_of_bins}"/>
<parameter key="iteration_macro" value="loop_bin"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (3)" width="90" x="246" y="238">
<parameter key="use_default_aggregation" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="average"/>
<list key="aggregation_attributes">
<parameter key="AMOUNT" value="average"/>
<parameter key="AMOUNT" value="minimum"/>
<parameter key="AMOUNT" value="maximum"/>
<parameter key="AMOUNT" value="standard_deviation"/>
<parameter key="AMOUNT" value="median"/>
<parameter key="AMOUNT" value="sum"/>
</list>
<parameter key="group_by_attributes" value="Customer|%{loop_bin}"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
</operator>
<operator activated="true" breakpoints="after" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (256)" width="90" x="380" y="238">
<parameter key="use_default_aggregation" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="average"/>
<list key="aggregation_attributes">
<parameter key="FREQUENCY" value="average"/>
<parameter key="FREQUENCY" value="minimum"/>
<parameter key="FREQUENCY" value="maximum"/>
<parameter key="FREQUENCY" value="standard_deviation"/>
<parameter key="FREQUENCY" value="median"/>
<parameter key="FREQUENCY" value="sum"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="average"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="minimum"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="maximum"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="standard_deviation"/>
</list>
<parameter key="group_by_attributes" value="%{loop_bin}|Customer"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
</operator>
<operator activated="true" class="filter_example_range" compatibility="9.5.001" expanded="true" height="82" name="Filter Example Range (2)" width="90" x="112" y="34">
<parameter key="first_example" value="%{iteration}"/>
<parameter key="last_example" value="%{iteration}"/>
<parameter key="invert_filter" value="false"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro (42)" width="90" x="246" y="34">
<parameter key="macro" value="key_attribute_type1"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="statistics" value="average"/>
<parameter key="attribute_name" value="BIN"/>
<parameter key="example_index" value="1"/>
<list key="additional_macros"/>
</operator>
<connect from_port="input 1" to_op="Filter Example Range (2)" to_port="example set input"/>
<connect from_port="input 2" to_op="Aggregate (3)" to_port="example set input"/>
<connect from_op="Aggregate (3)" from_port="example set output" to_op="Aggregate (256)" to_port="example set input"/>
<connect from_op="Filter Example Range (2)" from_port="example set output" to_op="Extract Macro (42)" to_port="example set"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="source_input 3" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Config_ProfileOnBins" from_port="output" to_op="Filter Examples (22)" to_port="example set input"/>
<connect from_op="Filter Examples (22)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Loop Values" to_port="input 1"/>
<connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes (5)" to_port="example set input"/>
<connect from_op="Retrieve dummy Data" from_port="output" to_op="Subprocess (2)" to_port="in 1"/>
<connect from_op="Subprocess (2)" from_port="out 1" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Loop Values" to_port="input 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Best Answer
-
hbajpai Member Posts: 102 Unicornsgnarkhede2016,
Thanks for sharing the process and xlsx files with us. I think I understood your requirements and have created a process which aggregates half yearly and yearly values.
Take a look, and let me know what you think.<?xml version="1.0" encoding="UTF-8"?><process version="9.6.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="read_excel" compatibility="9.6.000" expanded="true" height="68" name="Read Excel (2)" width="90" x="45" y="85"> <parameter key="excel_file" value="C:\Users\harsh\Downloads\dummy Data.xlsx"/> <parameter key="sheet_selection" value="sheet number"/> <parameter key="sheet_number" value="1"/> <parameter key="imported_cell_range" value="A1"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="first_row_as_names" value="true"/> <list key="annotations"/> <parameter key="date_format" value=""/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> <parameter key="read_all_values_as_polynominal" value="false"/> <list key="data_set_meta_data_information"> <parameter key="0" value="Customer.true.polynominal.attribute"/> <parameter key="1" value="InputDateTime.true.date_time.attribute"/> <parameter key="2" value="Amount.true.integer.attribute"/> </list> <parameter key="read_not_matching_values_as_missings" value="false"/> <parameter key="datamanagement" value="double_array"/> <parameter key="data_management" value="auto"/> <description align="center" color="transparent" colored="false" width="126">Dummy Data file</description> </operator> <operator activated="true" class="rename" compatibility="9.6.000" expanded="true" height="82" name="Rename" width="90" x="179" y="85"> <parameter key="old_name" value="Amount"/> <parameter key="new_name" value="AMOUNT"/> <list key="rename_additional_attributes"/> </operator> <operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="85"> <list key="function_descriptions"> <parameter key="Date" value="date_str_custom(InputDateTime,"yyyy-MM-dd")"/> </list> <parameter key="keep_all" value="true"/> </operator> <operator activated="true" class="blending:pivot" compatibility="9.6.000" expanded="true" height="82" name="Pivot" width="90" x="447" y="85"> <parameter key="group_by_attributes" value="Customer|Date"/> <parameter key="column_grouping_attribute" value=""/> <list key="aggregation_attributes"> <parameter key="Customer" value="count"/> <parameter key="AMOUNT" value="sum"/> </list> <parameter key="use_default_aggregation" value="false"/> <parameter key="default_aggregation_function" value="first"/> </operator> <operator activated="true" class="rename" compatibility="9.6.000" expanded="true" height="82" name="Rename (2)" width="90" x="581" y="85"> <parameter key="old_name" value="sum(AMOUNT)"/> <parameter key="new_name" value="Customer Daily Amt"/> <list key="rename_additional_attributes"> <parameter key="count(Customer)" value="Daily Volume"/> </list> </operator> <operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="715" y="85"> <list key="function_descriptions"> <parameter key="Year_Group" value="str(date_get(date_parse_custom(Date,"yyyy-MM-dd"),DATE_UNIT_YEAR))"/> <parameter key="Half_Year" value="if ( date_get(date_parse_custom(Date,"yyyy-MM-dd"),DATE_UNIT_MONTH) <= 6, str(1), str(2))"/> <parameter key="Half_Year_Group" value="concat(Year_Group,"-",Half_Year)"/> </list> <parameter key="keep_all" value="true"/> </operator> <operator activated="true" class="multiply" compatibility="9.6.000" expanded="true" height="103" name="Multiply" width="90" x="849" y="85"/> <operator activated="true" class="blending:pivot" compatibility="9.6.000" expanded="true" height="82" name="Half Yearly" width="90" x="983" y="34"> <parameter key="group_by_attributes" value="Customer|Half_Year_Group"/> <parameter key="column_grouping_attribute" value=""/> <list key="aggregation_attributes"> <parameter key="Customer Daily Amt" value="average"/> <parameter key="Customer Daily Amt" value="standard deviation"/> <parameter key="Daily Volume" value="average"/> <parameter key="Daily Volume" value="standard deviation"/> </list> <parameter key="use_default_aggregation" value="false"/> <parameter key="default_aggregation_function" value="first"/> </operator> <operator activated="true" class="blending:pivot" compatibility="9.6.000" expanded="true" height="82" name="Yearly" width="90" x="983" y="187"> <parameter key="group_by_attributes" value="Customer|Year_Group"/> <parameter key="column_grouping_attribute" value=""/> <list key="aggregation_attributes"> <parameter key="Customer Daily Amt" value="average"/> <parameter key="Customer Daily Amt" value="standard deviation"/> <parameter key="Daily Volume" value="average"/> <parameter key="Daily Volume" value="standard deviation"/> </list> <parameter key="use_default_aggregation" value="false"/> <parameter key="default_aggregation_function" value="first"/> </operator> <connect from_op="Read Excel (2)" from_port="output" to_op="Rename" to_port="example set input"/> <connect from_op="Rename" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/> <connect from_op="Generate Attributes" from_port="example set output" to_op="Pivot" to_port="input"/> <connect from_op="Pivot" from_port="output" to_op="Rename (2)" to_port="example set input"/> <connect from_op="Rename (2)" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/> <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Multiply" to_port="input"/> <connect from_op="Multiply" from_port="output 1" to_op="Half Yearly" to_port="input"/> <connect from_op="Multiply" from_port="output 2" to_op="Yearly" to_port="input"/> <connect from_op="Half Yearly" from_port="output" to_port="result 1"/> <connect from_op="Yearly" from_port="output" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="147"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator> </process>
Best,
Harshit6