import React from 'react'
import { graphql } from 'gatsby'

import WorkContainer from '../../components/WorkContainer'
import Layout from '../../components/Layout'
import { ProjectTitle, EnglishOnly } from '../../components/Text'
import Metadata from '../../components/Metadata'
import CustomerLogo from '../../components/CustomerLogo'
import ProjectImage from '../../components/ProjectImage'
import { useTranslation } from 'gatsby-plugin-react-i18next'

export default function DicomAnon({ data }) {
  const { t } = useTranslation()

  const project = data.projectsJson

  return (
    <Layout>
      <Metadata title={t(project.title)} isArticle />
      <WorkContainer>
        <CustomerLogo project={project} />
        <ProjectTitle title={t(project.title)} />
        <EnglishOnly />
        <ProjectImage data={project.gallery.main} top />
        <h2>Challenge</h2>
        <p>
          The task is to anonymize medical images produced by MRI and PET
          scanners.
        </p>
        <blockquote>Sort and anonymize 695'799 brain images</blockquote>
        <p>
          The data is stored in{' '}
          <a
            href="https://www.dicomstandard.org/"
            target="_blank"
            rel="noreferrer"
          >
            DICOM
          </a>{' '}
          format, the industry standard for storing and transporting medical
          images. The images are from a multi-hospital clinical trial. When
          stored as files, they consist of 695'799 files with meaningless names.
          The files are from a time when scanner vendors had stored DICOM files
          in their own DICOM "dialects" that deviated from the evolving standard
          in various ways.
        </p>
        <blockquote>
          Each file has hundreds of "header fields" potentially containing
          personal data
        </blockquote>
        <p>
          Additionally, each file contains hundreds of so-called "headers" with
          metadata about the image acquisition, many of them referring to the
          patient, to hospital names or other identifiable information. A part
          of the headers is important for analysis, and a part needs to be
          modified during anonymization.
        </p>
        <blockquote>US and EU privacy regulation</blockquote>
        <p>
          Anonymization needs to be done in accordance with HIPAA, but also the
          more stringent EU regulation. Since results are not used for an FDA
          submission, process validation according to 21 CFR part 11 is not
          required. However, the anonymization process needs to be reproducible,
          and the customer needs to be able to run their own quality checks.
        </p>
        <h2>Approach</h2>
        <p>
          In this project, we develop our own anonymization software and build
          it into a web-based and open-source DICOM toolkit. The DICOM files
          never leave the user's computer and are anonymized locally.
        </p>
        <blockquote>A website to anonymize DICOM files</blockquote>
        <p>
          A lot of effort is spent on writing the page in a memory-efficient,
          fast way so that a 2014 laptop will not crash while reading,
          anonymizing, sorting and writing back close to a million files one by
          one.
        </p>
        <blockquote>Our contribution to the DICOM Standard</blockquote>
        <p>
          The DICOM standard contains its own section about how to anonymize
          images, and our CEO Stefan in fact has authored its first draft. (For
          the insiders: obviously{' '}
          <a href="https://www.dclunie.com/" target="_blank" rel="noreferrer">
            David Clunie
          </a>{' '}
          has authored the next and final version.) That standard served as the
          guideline to this project.
        </p>
        <h2>Result</h2>
        <p>
          An anonymization pipeline has been developed which is fully
          characterized by a configuration page. This means, if any aspect of
          anonymization should change, we just edit the configuration and re-run
          the process on all original files.
        </p>
        <blockquote>
          A repeatable image anonymization process to ensure quality
        </blockquote>
        <p>
          We thus avoid doing incremental changes to the data files, which is
          very important for verifying the quality and reproducibility of the
          final deliverable.
        </p>
        <p>
          An external hard drive with 30 GB of anonymized imaging files has been
          delivered to the customer.
        </p>
        <p>
          Since this project, there have been great developments for working
          with DICOM on the web. Our old site and anonymizer is still up though.
          It's located at{' '}
          <a
            href="http://dcmjs.org/organize/index.html"
            target="_blank"
            rel="noreferrer"
          >
            dcmjs.org
          </a>
        </p>
        <p>
          In case you're curious why we're not mentioning the customer by name:
          Our General Services Agreement with them prohibits us from doing so.
        </p>
      </WorkContainer>
    </Layout>
  )
}

export const query = graphql`
  query ($language: String!) {
    locales: allLocale(filter: { language: { eq: $language } }) {
      edges {
        node {
          ns
          data
          language
        }
      }
    }
    projectsJson(name: { eq: "dicomAnon" }) {
      name
      title
      customer
      customerLogo {
        childImageSharp {
          gatsbyImageData
        }
      }
      tags
      link
      gallery {
        main {
          alt
          src {
            childImageSharp {
              gatsbyImageData
            }
          }
        }
        config_screenshot {
          alt
          src {
            childImageSharp {
              gatsbyImageData
            }
          }
        }
      }
    }
  }
`
