683 lines
34 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="en" data-content_root="./" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Keyword-Service &#8212; CV Analysis Service 2.5.1 documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
<link rel="stylesheet" type="text/css" href="https://assets.readthedocs.org/static/css/badge_only.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script src="_static/documentation_options.js?v=983e91d6"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'README';</script>
<script async="async" src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="cv_analysis package" href="modules/cv_analysis.html" />
<link rel="prev" title="Welcome to Keyword Extraction Service documentation!" href="index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- RTD Extra Head -->
<link rel="stylesheet" href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css" type="text/css" />
<script type="application/json" id="READTHEDOCS_DATA">{"ad_free": "", "api_host": "", "builder": "sphinx", "canonical_url": "", "docroot": "", "features": {"docsearch_disabled": false}, "global_analytics_code": null, "language": "", "page": "README", "programming_language": "", "project": "", "source_suffix": ".md", "subprojects": {}, "theme": "", "user_analytics_code": null, "version": ""}</script>
<!--
Using this variable directly instead of using `JSON.parse` is deprecated.
The READTHEDOCS_DATA global variable will be removed in the future.
-->
<script type="text/javascript">
READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
</script>
<script type="text/javascript" src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js" async="async"></script>
<!-- end RTD <extrahead> -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="index.html">
<img src="_static/logo.png" class="logo__image only-light" alt="CV Analysis Service 2.5.1 documentation - Home"/>
<script>document.write(`<img src="_static/logo.png" class="logo__image only-dark" alt="CV Analysis Service 2.5.1 documentation - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="#">
Keyword-Service
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
cv_analysis package
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/serve.html">
serve module
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="#">
Keyword-Service
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
cv_analysis package
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/serve.html">
serve module
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"></div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item active" aria-current="page">Keyword-Service</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="keyword-service">
<h1>Keyword-Service<a class="headerlink" href="#keyword-service" title="Link to this heading">#</a></h1>
<p>Service to get keywords of a paragraph or whole document.</p>
<!-- TOC --><ul class="simple">
<li><p><a class="reference external" href="#keyword-service">Keyword-Service</a></p>
<ul>
<li><p><a class="reference external" href="#api">API</a></p>
<ul>
<li><p><a class="reference external" href="#rest">REST</a></p></li>
<li><p><a class="reference external" href="#rabbitmq">RabbitMQ</a></p></li>
</ul>
</li>
<li><p><a class="reference external" href="#service-configuration">Service Configuration</a></p></li>
<li><p><a class="reference external" href="#language">Language</a></p></li>
<li><p><a class="reference external" href="#usage">Usage</a></p>
<ul>
<li><p><a class="reference external" href="#run-docker-commands">Run Docker Commands</a></p></li>
<li><p><a class="reference external" href="#run-locally">Run locally</a></p></li>
</ul>
</li>
</ul>
</li>
<li><p><a class="reference external" href="#upload-models-to-ml-flow">Upload models to ML Flow</a></p></li>
</ul>
<!-- TOC --><section id="api">
<h2>API<a class="headerlink" href="#api" title="Link to this heading">#</a></h2>
<section id="rest">
<h3>REST<a class="headerlink" href="#rest" title="Link to this heading">#</a></h3>
<p>The service provides endpoints to extract keywords from a text and to embed a text. For details, download
<a class="reference external" href="docs/openapi_redoc.html">OpenAPI schema</a> and view it in a browser.</p>
</section>
<section id="rabbitmq">
<h3>RabbitMQ<a class="headerlink" href="#rabbitmq" title="Link to this heading">#</a></h3>
<p>The service listens to a queue and processes the messages. This method is ment to be used for extracting keywords from
whole documents. All RabbitMQ parameters including the queue names are set in environment variables, refer to the
service respective HELM chart for more information.</p>
<p>The input message should be a JSON object with the following structure:</p>
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;targetFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="err">s</span><span class="kc">tr</span><span class="err">i</span><span class="kc">n</span><span class="err">g</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;responseFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="err">s</span><span class="kc">tr</span><span class="err">i</span><span class="kc">n</span><span class="err">g</span>
<span class="p">}</span>
</pre></div>
</div>
<p>The service downloads the file specified in <code class="docutils literal notranslate"><span class="pre">targetFilePath</span></code>. Supported data structures for the target file are:</p>
<ul class="simple">
<li><p>simplified text data (signifier key: <code class="docutils literal notranslate"><span class="pre">paragraphs</span></code>)</p></li>
<li><p>structure object data (signifier key: <code class="docutils literal notranslate"><span class="pre">structureObjects</span></code>)</p></li>
</ul>
<p>As a response, the service uploads a JSON-structured file (as defined in <code class="docutils literal notranslate"><span class="pre">responseFilePath</span></code>) with the result under the
<code class="docutils literal notranslate"><span class="pre">data</span></code> key. The structure of the response file is as follows:</p>
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="w"> </span><span class="s2">&quot;targetFilePath&quot;</span>
<span class="o">:</span>
<span class="w"> </span><span class="nx">string</span><span class="p">,</span>
<span class="w"> </span><span class="s2">&quot;responseFilePath&quot;</span>
<span class="o">:</span>
<span class="w"> </span><span class="nx">string</span><span class="p">,</span>
<span class="w"> </span><span class="c1">// and eventually further fields if present in the input message </span>
<span class="w"> </span><span class="s2">&quot;data&quot;</span>
<span class="o">:</span>
<span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="s2">&quot;keywords&quot;</span><span class="o">:</span><span class="w"> </span><span class="nb">Array</span><span class="p">[</span><span class="nx">string</span><span class="p">],</span>
<span class="w"> </span><span class="s2">&quot;paragraphId&quot;</span><span class="o">:</span><span class="w"> </span><span class="kr">int</span><span class="p">,</span>
<span class="w"> </span><span class="s2">&quot;embedding&quot;</span><span class="o">:</span><span class="w"> </span><span class="nb">Array</span><span class="p">[</span><span class="kr">float</span><span class="p">]</span><span class="w"> </span><span class="c1">// 384 dimensions</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p><strong>Note</strong> that</p>
<ul class="simple">
<li><p>the <code class="docutils literal notranslate"><span class="pre">embedding</span></code> key is optional and can be omitted. The service will not calculate the embedding if the environment
variable <code class="docutils literal notranslate"><span class="pre">MODEL__COMPUTE_EMBEDDINGS</span></code> is set to <code class="docutils literal notranslate"><span class="pre">false</span></code>.</p></li>
<li><p>the service also computes the keywords for the whole document. In this case, the <code class="docutils literal notranslate"><span class="pre">paragraphId</span></code> is set to <code class="docutils literal notranslate"><span class="pre">-1</span></code>.</p></li>
</ul>
</section>
</section>
<section id="service-configuration">
<h2>Service Configuration<a class="headerlink" href="#service-configuration" title="Link to this heading">#</a></h2>
<p>The service is configured via environment variables. The following variables are available:</p>
<p>| Variable | Description | Default |
| —————————————— | ———————————————————————————– | ——- |
| LOGGING__LEVEL | Logging level | INFO |
| MODEL__MAX_KEYWORDS_PER_PARAGRAPH | Maximum number of keywords per paragraph | 5 |
| MODLE__MAX_KEYWORDS_PER_DOCUMENT | Maximum number of keywords per document, when set to 0, no keywords are extracted | 0 |
| MODEL__COMPUTE_EMBEDDINGS | Whether to compute keyword embeddings or not | true |
| MODEL__PREPROCESSING__MIN_PARAGRAPH_LENGTH | Minimum number of characters in a paragraph to be considered for keyword extraction | 1 |
| MODEL__POSTPROCESSING__FILTER_SUBWORDS | Whether to filter out subwords from the keywords or not | true |</p>
<p><strong>NOTE</strong> that these variables are subject to change. For the most recent configuration, refer to the service respective
HELM chart.</p>
</section>
<section id="language">
<h2>Language<a class="headerlink" href="#language" title="Link to this heading">#</a></h2>
<p>Currently, there is an english, a german and a multi-language model for keyword extraction. The models are uploaded to
mlflow and can
be set in the Dockerfile when building the container:</p>
<p>example for german model:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ENV</span> <span class="n">AZURE_RESOURCE_GROUP</span><span class="o">=</span><span class="s2">&quot;mriedl&quot;</span>
<span class="n">ENV</span> <span class="n">AZURE_AML_WORKSPACE</span><span class="o">=</span><span class="s2">&quot;azureml-ws&quot;</span>
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_NAME</span><span class="o">=</span><span class="s2">&quot;keyword-extraction-de&quot;</span>
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_VERSION</span><span class="o">=</span><span class="s2">&quot;1&quot;</span>
</pre></div>
</div>
<p>and example for english model:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ENV</span> <span class="n">AZURE_RESOURCE_GROUP</span><span class="o">=</span><span class="s2">&quot;mriedl&quot;</span>
<span class="n">ENV</span> <span class="n">AZURE_AML_WORKSPACE</span><span class="o">=</span><span class="s2">&quot;azureml-ws&quot;</span>
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_NAME</span><span class="o">=</span><span class="s2">&quot;keyword-extraction-de&quot;</span>
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_VERSION</span><span class="o">=</span><span class="s2">&quot;1&quot;</span>
</pre></div>
</div>
</section>
<section id="usage">
<h2>Usage<a class="headerlink" href="#usage" title="Link to this heading">#</a></h2>
<p><strong>Two Options:</strong></p>
<ol class="simple">
<li><p>REST: Send text per request to endpoint, endpoint returns keywords</p></li>
<li><p>Queue: Service gets text from queue, model calculates keywords, save keywords in queue</p></li>
</ol>
<p>To test the REST endpoint you have to set up an environment and do poetry install (
see https://gitlab.knecon.com/knecon/research/template-python-project for details for setting up poetry)
Then run</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="o">./</span><span class="n">src</span><span class="o">/</span><span class="n">serve</span><span class="o">.</span><span class="n">py</span>
</pre></div>
</div>
<p>You dont need to start a queue for that, just ignore the AMQP Error.
Port and host are set in settings.toml .
You can use the FastAPI under 127.0.0.1:8001/docs to send request to endpoint.</p>
<p>You can also test the service with docker:</p>
<section id="run-docker-commands">
<h3>Run Docker Commands<a class="headerlink" href="#run-docker-commands" title="Link to this heading">#</a></h3>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-t<span class="w"> </span><span class="si">${</span><span class="nv">IMAGE_NAME</span><span class="si">}</span><span class="w"> </span>-f<span class="w"> </span>Dockerfile<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">GITLAB_USER</span><span class="o">=</span><span class="si">${</span><span class="nv">GITLAB_USER</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">GITLAB_ACCESS_TOKEN</span><span class="o">=</span><span class="si">${</span><span class="nv">GITLAB_ACCESS_TOKEN</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_TENANT_ID</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_TENANT_ID</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_SUBSCRIPTION_ID</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_SUBSCRIPTION_ID</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_CLIENT_ID</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_CLIENT_ID</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_CLIENT_SECRET</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_CLIENT_SECRET</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_AML_MODEL_VERSION</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_AML_MODEL_VERSION</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_AML_MODEL_NAME</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_AML_MODEL_NAME</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_RESOURCE_GROUP</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_RESOURCE_GROUP</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_AML_WORKSPACE</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_AML_WORKSPACE</span><span class="si">}</span>
</pre></div>
</div>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--net<span class="o">=</span>host<span class="w"> </span>-it<span class="w"> </span>--rm<span class="w"> </span>--name<span class="w"> </span><span class="si">${</span><span class="nv">CONTAINER_NAME</span><span class="si">}</span><span class="w"> </span><span class="si">${</span><span class="nv">IMAGE_NAME</span><span class="si">}</span>
</pre></div>
</div>
</section>
<section id="run-locally">
<h3>Run locally<a class="headerlink" href="#run-locally" title="Link to this heading">#</a></h3>
<p>First you need to download the model from mlflow. This can be done with the <em>“src/ml_flow/download_model.py”</em> script.
This scripts downloads a model and copies config and model data to the specific locations, such that the model can
be loaded.</p>
<p>For running/testing the keyword extraction locally you can use the <em>src/tests/test_process.py</em> script.</p>
<p>Model ist stored and loaded via DVC, you need the connection string under
https://portal.azure.com/#&#64;knecon.com/resource/subscriptions/4b9531fc-c5e4-4b11-8492-0cc173c1f97d/resourceGroups/taas-rg/providers/Microsoft.Storage/storageAccounts/taassaracer/keys</p>
</section>
</section>
</section>
<section id="upload-models-to-ml-flow">
<h1>Upload models to ML Flow<a class="headerlink" href="#upload-models-to-ml-flow" title="Link to this heading">#</a></h1>
<p>To upload the models to mlflow, you can use following script: src/mlflow/upload_model.py
For authentication following environment variables need to be set:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1">#AZURE_TENANT_ID=&quot;&quot;</span>
<span class="c1">#AZURE_SUBSCRIPTION_ID=&quot;&quot;</span>
<span class="c1">#AZURE_CLIENT_ID=&quot;&quot;</span>
<span class="c1">#AZURE_CLIENT_SECRET=&quot;&quot;</span>
</pre></div>
</div>
<p>Additional settings (resource group, experiment name, etc.) can be specified in the config (
<em>./src/mlflow/config/azure_config.toml</em>).
The <em>upload_model.py</em> has the following parameters:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">options</span><span class="p">:</span>
<span class="o">-</span><span class="n">h</span><span class="p">,</span> <span class="o">--</span><span class="n">help</span> <span class="n">show</span> <span class="n">this</span> <span class="n">help</span> <span class="n">message</span> <span class="ow">and</span> <span class="n">exit</span>
<span class="o">-</span><span class="n">a</span> <span class="n">AZURE_CONFIG</span><span class="p">,</span> <span class="o">--</span><span class="n">azure_config</span> <span class="n">AZURE_CONFIG</span>
<span class="n">Location</span> <span class="n">of</span> <span class="n">the</span> <span class="n">configuration</span> <span class="n">file</span> <span class="k">for</span> <span class="n">Azure</span> <span class="p">(</span><span class="n">default</span><span class="p">:</span> <span class="n">src</span><span class="o">/</span><span class="n">mlflow</span><span class="o">/</span><span class="n">config</span><span class="o">/</span><span class="n">azure_config</span><span class="o">.</span><span class="n">toml</span><span class="p">)</span>
<span class="o">-</span><span class="n">b</span> <span class="n">BASE_CONFIG</span><span class="p">,</span> <span class="o">--</span><span class="n">base_config</span> <span class="n">BASE_CONFIG</span>
<span class="n">Location</span> <span class="n">of</span> <span class="n">the</span> <span class="n">basic</span> <span class="n">training</span> <span class="n">configuration</span> <span class="p">(</span><span class="n">default</span><span class="p">:</span> <span class="n">src</span><span class="o">/</span><span class="n">mlflow</span><span class="o">/</span><span class="n">config</span><span class="o">/</span><span class="n">settings_de</span><span class="o">.</span><span class="n">toml</span><span class="p">)</span>
</pre></div>
</div>
<p>the base config contains all information for the models used. Examples for German and
English are placed in <em>/src/mlflow/config/</em></p>
<p>Note: Multi-language model tracking does not work for now. After the upload script reports an error, you have to
manually track the
model <a class="reference external" href="https://ml.azure.com/experiments?wsid=/subscriptions/4b9531fc-c5e4-4b11-8492-0cc173c1f97d/resourcegroups/fforesight-rg/providers/Microsoft.MachineLearningServices/workspaces/ff-aml-main&amp;tid=b44be368-e4f2-4ade-a089-cd2825458048">here</a>
where you can find the run. Adhere to the naming conventions for the model name and versions,
see <a class="reference external" href="https://ml.azure.com/model/list?wsid=/subscriptions/4b9531fc-c5e4-4b11-8492-0cc173c1f97d/resourcegroups/fforesight-rg/providers/Microsoft.MachineLearningServices/workspaces/ff-aml-main&amp;tid=b44be368-e4f2-4ade-a089-cd2825458048">here</a></p>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="index.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Welcome to Keyword Extraction Service documentation!</p>
</div>
</a>
<a class="right-next"
href="modules/cv_analysis.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">cv_analysis package</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Keyword-Service</a><ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#api">API</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rest">REST</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rabbitmq">RabbitMQ</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#service-configuration">Service Configuration</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#language">Language</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#run-docker-commands">Run Docker Commands</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#run-locally">Run locally</a></li>
</ul>
</li>
</ul>
</li>
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#upload-models-to-ml-flow">Upload models to ML Flow</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection sourcelink">
<a href="_sources/README.md.txt">
<i class="fa-solid fa-file-lines"></i> Show Source
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright All rights reserved.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.3.7.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>