Skip to main content
Version: Next

DataProcess

Aspects

dataProcessKey

Key for a Data Process

Schema
{
"type": "record",
"Aspect": {
"name": "dataProcessKey"
},
"name": "DataProcessKey",
"namespace": "com.linkedin.metadata.key",
"fields": [
{
"Searchable": {
"boostScore": 4.0,
"enableAutocomplete": true,
"fieldType": "WORD_GRAM"
},
"type": "string",
"name": "name",
"doc": "Process name i.e. an ETL job name"
},
{
"Searchable": {
"enableAutocomplete": true,
"fieldType": "TEXT_PARTIAL"
},
"type": "string",
"name": "orchestrator",
"doc": "Standardized Orchestrator where data process is defined.\nTODO: Migrate towards something that can be validated like DataPlatform urn"
},
{
"Searchable": {
"fieldType": "TEXT_PARTIAL",
"queryByDefault": false
},
"type": {
"type": "enum",
"symbolDocs": {
"CORP": "Designates corporation fabrics",
"DEV": "Designates development fabrics",
"EI": "Designates early-integration fabrics",
"NON_PROD": "Designates non-production fabrics",
"PRE": "Designates pre-production fabrics",
"PROD": "Designates production fabrics",
"QA": "Designates quality assurance fabrics",
"STG": "Designates staging fabrics",
"TEST": "Designates testing fabrics",
"UAT": "Designates user acceptance testing fabrics"
},
"name": "FabricType",
"namespace": "com.linkedin.common",
"symbols": [
"DEV",
"TEST",
"QA",
"UAT",
"EI",
"PRE",
"STG",
"NON_PROD",
"PROD",
"CORP"
],
"doc": "Fabric group type"
},
"name": "origin",
"doc": "Fabric type where dataset belongs to or where it was generated."
}
],
"doc": "Key for a Data Process"
}

ownership

Ownership information of an entity.

Schema
{
"type": "record",
"Aspect": {
"name": "ownership"
},
"name": "Ownership",
"namespace": "com.linkedin.common",
"fields": [
{
"type": {
"type": "array",
"items": {
"type": "record",
"name": "Owner",
"namespace": "com.linkedin.common",
"fields": [
{
"Relationship": {
"entityTypes": [
"corpuser",
"corpGroup"
],
"name": "OwnedBy"
},
"Searchable": {
"addToFilters": true,
"fieldName": "owners",
"fieldType": "URN",
"filterNameOverride": "Owned By",
"hasValuesFieldName": "hasOwners",
"queryByDefault": false
},
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": "string",
"name": "owner",
"doc": "Owner URN, e.g. urn:li:corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name\n(Caveat: only corpuser is currently supported in the frontend.)"
},
{
"deprecated": true,
"type": {
"type": "enum",
"symbolDocs": {
"BUSINESS_OWNER": "A person or group who is responsible for logical, or business related, aspects of the asset.",
"CONSUMER": "A person, group, or service that consumes the data\nDeprecated! Use TECHNICAL_OWNER or BUSINESS_OWNER instead.",
"CUSTOM": "Set when ownership type is unknown or a when new one is specified as an ownership type entity for which we have no\nenum value for. This is used for backwards compatibility",
"DATAOWNER": "A person or group that is owning the data\nDeprecated! Use TECHNICAL_OWNER instead.",
"DATA_STEWARD": "A steward, expert, or delegate responsible for the asset.",
"DELEGATE": "A person or a group that overseas the operation, e.g. a DBA or SRE.\nDeprecated! Use TECHNICAL_OWNER instead.",
"DEVELOPER": "A person or group that is in charge of developing the code\nDeprecated! Use TECHNICAL_OWNER instead.",
"NONE": "No specific type associated to the owner.",
"PRODUCER": "A person, group, or service that produces/generates the data\nDeprecated! Use TECHNICAL_OWNER instead.",
"STAKEHOLDER": "A person or a group that has direct business interest\nDeprecated! Use TECHNICAL_OWNER, BUSINESS_OWNER, or STEWARD instead.",
"TECHNICAL_OWNER": "person or group who is responsible for technical aspects of the asset."
},
"deprecatedSymbols": {
"CONSUMER": true,
"DATAOWNER": true,
"DELEGATE": true,
"DEVELOPER": true,
"PRODUCER": true,
"STAKEHOLDER": true
},
"name": "OwnershipType",
"namespace": "com.linkedin.common",
"symbols": [
"CUSTOM",
"TECHNICAL_OWNER",
"BUSINESS_OWNER",
"DATA_STEWARD",
"NONE",
"DEVELOPER",
"DATAOWNER",
"DELEGATE",
"PRODUCER",
"CONSUMER",
"STAKEHOLDER"
],
"doc": "Asset owner types"
},
"name": "type",
"doc": "The type of the ownership"
},
{
"Relationship": {
"entityTypes": [
"ownershipType"
],
"name": "ownershipType"
},
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": [
"null",
"string"
],
"name": "typeUrn",
"default": null,
"doc": "The type of the ownership\nUrn of type O"
},
{
"type": [
"null",
{
"type": "record",
"name": "OwnershipSource",
"namespace": "com.linkedin.common",
"fields": [
{
"type": {
"type": "enum",
"symbolDocs": {
"AUDIT": "Auditing system or audit logs",
"DATABASE": "Database, e.g. GRANTS table",
"FILE_SYSTEM": "File system, e.g. file/directory owner",
"ISSUE_TRACKING_SYSTEM": "Issue tracking system, e.g. Jira",
"MANUAL": "Manually provided by a user",
"OTHER": "Other sources",
"SERVICE": "Other ownership-like service, e.g. Nuage, ACL service etc",
"SOURCE_CONTROL": "SCM system, e.g. GIT, SVN"
},
"name": "OwnershipSourceType",
"namespace": "com.linkedin.common",
"symbols": [
"AUDIT",
"DATABASE",
"FILE_SYSTEM",
"ISSUE_TRACKING_SYSTEM",
"MANUAL",
"SERVICE",
"SOURCE_CONTROL",
"OTHER"
]
},
"name": "type",
"doc": "The type of the source"
},
{
"type": [
"null",
"string"
],
"name": "url",
"default": null,
"doc": "A reference URL for the source"
}
],
"doc": "Source/provider of the ownership information"
}
],
"name": "source",
"default": null,
"doc": "Source information for the ownership"
}
],
"doc": "Ownership information"
}
},
"name": "owners",
"doc": "List of owners of the entity."
},
{
"type": {
"type": "record",
"name": "AuditStamp",
"namespace": "com.linkedin.common",
"fields": [
{
"type": "long",
"name": "time",
"doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
},
{
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": "string",
"name": "actor",
"doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change."
},
{
"java": {
"class": "com.linkedin.common.urn.Urn"
},
"type": [
"null",
"string"
],
"name": "impersonator",
"default": null,
"doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor."
},
{
"type": [
"null",
"string"
],
"name": "message",
"default": null,
"doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
}
],
"doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
},
"name": "lastModified",
"default": {
"actor": "urn:li:corpuser:unknown",
"impersonator": null,
"time": 0,
"message": null
},
"doc": "Audit stamp containing who last modified the record and when. A value of 0 in the time field indicates missing data."
}
],
"doc": "Ownership information of an entity."
}

dataProcessInfo

The inputs and outputs of this data process

Schema
{
"type": "record",
"Aspect": {
"name": "dataProcessInfo"
},
"name": "DataProcessInfo",
"namespace": "com.linkedin.dataprocess",
"fields": [
{
"Relationship": {
"/*": {
"entityTypes": [
"dataset"
],
"isLineage": true,
"name": "Consumes"
}
},
"Searchable": {
"/*": {
"fieldName": "inputs",
"fieldType": "URN",
"numValuesFieldName": "numInputDatasets",
"queryByDefault": false
}
},
"type": [
"null",
{
"type": "array",
"items": "string"
}
],
"name": "inputs",
"default": null,
"doc": "the inputs of the data process"
},
{
"Relationship": {
"/*": {
"entityTypes": [
"dataset"
],
"isLineage": true,
"name": "Consumes"
}
},
"Searchable": {
"/*": {
"fieldName": "outputs",
"fieldType": "URN",
"numValuesFieldName": "numOutputDatasets",
"queryByDefault": false
}
},
"type": [
"null",
{
"type": "array",
"items": "string"
}
],
"name": "outputs",
"default": null,
"doc": "the outputs of the data process"
}
],
"doc": "The inputs and outputs of this data process"
}

status

The lifecycle status metadata of an entity, e.g. dataset, metric, feature, etc. This aspect is used to represent soft deletes conventionally.

Schema
{
"type": "record",
"Aspect": {
"name": "status"
},
"name": "Status",
"namespace": "com.linkedin.common",
"fields": [
{
"Searchable": {
"fieldType": "BOOLEAN"
},
"type": "boolean",
"name": "removed",
"default": false,
"doc": "Whether the entity has been removed (soft-deleted)."
}
],
"doc": "The lifecycle status metadata of an entity, e.g. dataset, metric, feature, etc.\nThis aspect is used to represent soft deletes conventionally."
}

Relationships

Outgoing

These are the relationships stored in this entity's aspects

  • OwnedBy

    • Corpuser via ownership.owners.owner
    • CorpGroup via ownership.owners.owner
  • ownershipType

    • OwnershipType via ownership.owners.typeUrn
  • Consumes

    • Dataset via dataProcessInfo.inputs
    • Dataset via dataProcessInfo.outputs

Global Metadata Model

Global Graph