Principal Component Analysis

Representation

Keys _id, name, stepId, domainInformation, dataset, project, processingInfo, creationTS , latestUpdateTS are unchanged and follow the classic of representation only dataSpecification keys are changed and are described as above :

Currently, only numerical vector can be saved during raw data loading which gives us un single template at this moment.

JSON template for numerical vector processing data representation.

{
  "_id": "637cee7171541371d7181883",
  "name": "hephia",
  "stepId": 1,
  "domainInformation": {
    "customer": "hephia"
  },
  "dataset": {
    "name": "pca_007"
  },
  "project": {
    "id": 190242736,
    "name": "datasets"
  },
  "processingInfo": {
    "processingId": "637cee5206c5b60b125933e3",
    "processingName": "pca_1669131888941",
    "editionContext": "ds-lab"
  },
  "creationTS": 1669131859,
  "latestUpdateTS": 1669131859,
  "dataSpecification": {
    "keyword": "pcaDataMongo",
    "valueType": {
      "dataType": "numerical",
      "structureType": "vector"
    },
    "meaning": "Vector of pca numerical features",
    "view": {
      "name": "str123456",
      "id": "my_name"
    },
    "dataLocationId": "637cee7171541371d7181886",
    "hyperParameters": {
      "dimension": 3
    }
  }
}

Observation

{
  "_id": "ObjectId('637cee7171541371d7181889')",
  "dimensions": {
    "dimensionName": "PC0",
    "explainedVariance": 0.9999993582418356,
    "principalComponents": [
      {
        "name": "ObservationId",
        "value": -0.999999993628101
      },
      {
        "name": "cp_a_1_bar",
        "value": 0.00010170583200002157
      },
      {
        "name": "cp_a_2_bar",
        "value": 0.00004898695782813702
      }
    ]
  },
  "representationId": "ObjectId('637cee7171541371d7181883')"
}