Normalize data per column

Hyper parameters

  • keepFullyDefinedRows : It removes every row which contains an undefined feature if it is set to true.
  • preprocessingStrategy: JSON Array which contains as fields :
    • columnName: String containing numerical features on which the preprocessing is applied.
    • preprocessingMethod : String containing the methods of normalization:
      • percentil_0.01_0.99: (X - 1) / (99 - 1) (default value Min= 0.01 & Max=0.99)
      • percentil: (X - PercMin) / (PercMax - PercMin).
      • log: log(X + Constant).
      • log10: log10(X + Constant)
      • min_max: (X - Ai) / Bi.
      • standardization: ( X - mean(X)) / (sd(X)).
      • shift_and_divide: (X - A) / B
    • parameters: JSON Array which contains two fields:
      • name: name of the parameter as ‘PercMin’.
      • value: The parameter value.

Payload JSON template example :

{
  "processingKeyword": "normalizeDataPerColumn",
  "customer": "Safran",
  "name": "SOMProcess",
  "status": 1,
  "dataLocations": [{
    "role": "som_model_mongo",
    "dataLocationId": "62fd06a5c50007ba8f560aec"
    }
  ],
  "processingContext": {
    "processingName": "user define name, ex SOM",
    "editionContext": "user",
    "callingContext": "hephIA-solution",
    "dataset": {
      "name": "safran"
    },
    "project": {
      "id": 2,
      "name": "normalize_per_column"
    }
  },
  "hyperParameters": {
    "removeRowsWithUndefinedValues": true,
    "preprocessingStrategy": [
        {
            "columnName": "pt_100_c", "preprocessingMethod": "percentile_001_099", "parameters":[{"name": ", "value": 0.0}]
        },
        {
            "columnName": "cp_a_1_bar", "preprocessingMethod": "shift_and_divide", "parameters":[{"name": "A", "value": 10.0}, {"name": "B", "value": 20.0}]
        },
        {
            "columnName": "cp_a_2_bar", "preprocessingMethod": "standardization", "parameters":[{"name": ", "value": 0.0}]
        }, 
        {
            "columnName": "cp_r_bar", "preprocessingMethod": "log10", "parameters":[{"name": "base", "value": 10}, {"name": "Constant", "value": 10}]
        }        
    ]
  }
}