import React from 'react'
import { graphql } from 'gatsby'

import WorkContainer from '../../components/WorkContainer'
import Layout from '../../components/Layout'
import { ProjectTitle, EnglishOnly } from '../../components/Text'
import Metadata from '../../components/Metadata'
import CustomerLogo from '../../components/CustomerLogo'
import ProjectImage from '../../components/ProjectImage'
import { useTranslation } from 'gatsby-plugin-react-i18next'

export default function Mondrian({ data }) {
  const { t } = useTranslation()

  const project = data.projectsJson

  return (
    <Layout>
      <Metadata title={t(project.title)} isArticle />
      <WorkContainer>
        <CustomerLogo project={project} />
        <ProjectTitle title={t(project.title)} />
        <EnglishOnly />
        <ProjectImage data={project.gallery.main} top />
        <h2>Challenge</h2>
        <p>
          Our customer has a dataset with detailed information about emissions
          that everyone in the organization generates. In particular, the
          dataset contains valuable personal data, such as age, gender or team,
          which are important to analyze if the customer should be able to
          identify and communicate effective policies for reducing emissions.
        </p>
        <p>
          However, the dataset also needs to be anonymized to protect the
          privacy of employees, in compliance with the strict European Privacy
          Regulation.
        </p>
        <blockquote>
          Anonymize data without sacrificing important detail
        </blockquote>
        <p>
          Therefore, the challenge is to design and implement a data
          anonymization method while also maintaining as many data details as
          possible.
        </p>
        <h2>Approach</h2>
        <p>
          To achieve a robust anonymization, we need a method that is
          established in the scientific literature as well as in practice.
        </p>
        <blockquote>Scientific literature research</blockquote>
        <p>
          Based on the system requirements, the goal is to create an anonymized
          dataset that in the literature falls under the definition "Privacy in
          non-interactive databases", and also the definition "non-sensitive
          data".
        </p>
        <p>
          <em>Non-interactive database</em> refers the publication of anonymized
          data in a single public dataset. (The opposite of this is the
          "interactive" setting where a protected database contains original
          data, and anonymization happens "live" at the moment when a user
          requests parts of the data.)
        </p>
        <p>
          <em>Non-sensitive</em> refers to the definition "Special categories of
          personal data" from the{' '}
          <a href="https://eur-lex.europa.eu/legal-content/EN/ALL/?uri=celex%3A32016R0679">
            EU General Data Protection Law
          </a>
          . This is concerning data whose processing could create significant
          risks to the fundamental rights and freedoms of affected persons, such
          as health related data.
        </p>
        <p>
          <blockquote>
            Mondrian: anonymization for non-sensitive data
          </blockquote>
          We settled on <em>k-anonymization</em> as a suitable anonymization
          method, and after further research decided to use an algorithm called
          "Basic Mondrian", a robust algorithm that minimizes data loss. It does
          so by allowing us to define incremental levels of anonymization for
          each data attribute separately, and by grouping datapoints into "k"
          groups of data anonymized together. (The groups, if drawn onto a
          canvas, look similar to the well-known paintings by Piet Mondrian,
          hence the name.)
        </p>
        <blockquote>Implementing the anonymization software</blockquote>
        <p>
          A search for existing Mondrian software algorithms yielded no
          practical results. The one open-source implementation we found had key
          weaknesses, such as not being able to handle missing datapoints. We
          therefore implemented our own algorithm. In addition, we applied a few
          additional sandard anonymizing techniques such as moving flight dates
          by a randomized number of days.
        </p>
        <h2>Result</h2>
        <p>
          The result is an anonymization algorithm that can easily be added to
          existing data analysis software.
          <ul>
            <li>Dataset anonymization is fully automated</li>
            <li>
              Every data attribute has an incremental anonymization approach
            </li>
            <li>
              It's possible to set relative priorities for which data attributes
              to maintain as much as possible detail for.
            </li>
          </ul>
        </p>
        <p>
          We have written a whitepaper which details the anonymization process,
          including Privacy attack scenarios that it averts. This was used in
          getting the green light from the customer's Privacy Office.
        </p>
        <p>
          We are planning to adapt the algorithm's codebase so it will be ready
          for publishing as open-source software. We expect to gain an
          additional layer of security and trust from exposing its
          functionality.
        </p>
        <p>Contact us if you are interested in this.</p>
      </WorkContainer>
    </Layout>
  )
}

export const query = graphql`
  query ($language: String!) {
    locales: allLocale(filter: { language: { eq: $language } }) {
      edges {
        node {
          ns
          data
          language
        }
      }
    }
    projectsJson(name: { eq: "mondrian" }) {
      name
      title
      customer
      customerLogo {
        childImageSharp {
          gatsbyImageData
        }
      }
      tags
      link
      gallery {
        main {
          alt
          src {
            childImageSharp {
              gatsbyImageData
            }
          }
        }
        mondrian_1 {
          alt
          src {
            childImageSharp {
              gatsbyImageData
            }
          }
        }
      }
    }
  }
`
