683 lines
34 KiB
HTML
683 lines
34 KiB
HTML
|
||
<!DOCTYPE html>
|
||
|
||
|
||
<html lang="en" data-content_root="./" >
|
||
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<title>Keyword-Service — CV Analysis Service 2.5.1 documentation</title>
|
||
|
||
|
||
|
||
<script data-cfasync="false">
|
||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
|
||
</script>
|
||
|
||
<!-- Loaded before other Sphinx assets -->
|
||
<link href="_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||
<link href="_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||
<link href="_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||
|
||
|
||
<link href="_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
|
||
|
||
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
|
||
<link rel="stylesheet" type="text/css" href="https://assets.readthedocs.org/static/css/badge_only.css" />
|
||
|
||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
|
||
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
|
||
<script src="_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
|
||
|
||
<script src="_static/documentation_options.js?v=983e91d6"></script>
|
||
<script src="_static/doctools.js?v=9a2dae69"></script>
|
||
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script>DOCUMENTATION_OPTIONS.pagename = 'README';</script>
|
||
<script async="async" src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"></script>
|
||
<link rel="index" title="Index" href="genindex.html" />
|
||
<link rel="search" title="Search" href="search.html" />
|
||
<link rel="next" title="cv_analysis package" href="modules/cv_analysis.html" />
|
||
<link rel="prev" title="Welcome to Keyword Extraction Service documentation!" href="index.html" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||
<meta name="docsearch:language" content="en"/>
|
||
|
||
<!-- RTD Extra Head -->
|
||
|
||
<link rel="stylesheet" href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css" type="text/css" />
|
||
|
||
<script type="application/json" id="READTHEDOCS_DATA">{"ad_free": "", "api_host": "", "builder": "sphinx", "canonical_url": "", "docroot": "", "features": {"docsearch_disabled": false}, "global_analytics_code": null, "language": "", "page": "README", "programming_language": "", "project": "", "source_suffix": ".md", "subprojects": {}, "theme": "", "user_analytics_code": null, "version": ""}</script>
|
||
|
||
<!--
|
||
Using this variable directly instead of using `JSON.parse` is deprecated.
|
||
The READTHEDOCS_DATA global variable will be removed in the future.
|
||
-->
|
||
<script type="text/javascript">
|
||
READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
|
||
</script>
|
||
|
||
<script type="text/javascript" src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js" async="async"></script>
|
||
|
||
<!-- end RTD <extrahead> -->
|
||
</head>
|
||
|
||
|
||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||
|
||
|
||
|
||
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
|
||
|
||
<div id="pst-scroll-pixel-helper"></div>
|
||
|
||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||
<i class="fa-solid fa-arrow-up"></i>
|
||
Back to top
|
||
</button>
|
||
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
name="__primary"
|
||
id="__primary"/>
|
||
<label class="overlay overlay-primary" for="__primary"></label>
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
name="__secondary"
|
||
id="__secondary"/>
|
||
<label class="overlay overlay-secondary" for="__secondary"></label>
|
||
|
||
<div class="search-button__wrapper">
|
||
<div class="search-button__overlay"></div>
|
||
<div class="search-button__search-container">
|
||
<form class="bd-search d-flex align-items-center"
|
||
action="search.html"
|
||
method="get">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<input type="search"
|
||
class="form-control"
|
||
name="q"
|
||
id="search-input"
|
||
placeholder="Search the docs ..."
|
||
aria-label="Search the docs ..."
|
||
autocomplete="off"
|
||
autocorrect="off"
|
||
autocapitalize="off"
|
||
spellcheck="false"/>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||
</form></div>
|
||
</div>
|
||
|
||
<header class="bd-header navbar navbar-expand-lg bd-navbar">
|
||
<div class="bd-header__inner bd-page-width">
|
||
<label class="sidebar-toggle primary-toggle" for="__primary">
|
||
<span class="fa-solid fa-bars"></span>
|
||
</label>
|
||
|
||
|
||
<div class="col-lg-3 navbar-header-items__start">
|
||
|
||
<div class="navbar-item">
|
||
|
||
|
||
|
||
<a class="navbar-brand logo" href="index.html">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<img src="_static/logo.png" class="logo__image only-light" alt="CV Analysis Service 2.5.1 documentation - Home"/>
|
||
<script>document.write(`<img src="_static/logo.png" class="logo__image only-dark" alt="CV Analysis Service 2.5.1 documentation - Home"/>`);</script>
|
||
|
||
|
||
</a></div>
|
||
|
||
</div>
|
||
|
||
<div class="col-lg-9 navbar-header-items">
|
||
|
||
<div class="me-auto navbar-header-items__center">
|
||
|
||
<div class="navbar-item">
|
||
<nav class="navbar-nav">
|
||
<ul class="bd-navbar-elements navbar-nav">
|
||
|
||
<li class="nav-item current active">
|
||
<a class="nav-link nav-internal" href="#">
|
||
Keyword-Service
|
||
</a>
|
||
</li>
|
||
|
||
|
||
<li class="nav-item">
|
||
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
|
||
cv_analysis package
|
||
</a>
|
||
</li>
|
||
|
||
|
||
<li class="nav-item">
|
||
<a class="nav-link nav-internal" href="modules/serve.html">
|
||
serve module
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</nav></div>
|
||
|
||
</div>
|
||
|
||
|
||
<div class="navbar-header-items__end">
|
||
|
||
<div class="navbar-item navbar-persistent--container">
|
||
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<span class="search-button__default-text">Search</span>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||
</button>
|
||
`);
|
||
</script>
|
||
</div>
|
||
|
||
|
||
<div class="navbar-item">
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
||
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
||
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
||
</button>
|
||
`);
|
||
</script></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
<div class="navbar-persistent--mobile">
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<span class="search-button__default-text">Search</span>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||
</button>
|
||
`);
|
||
</script>
|
||
</div>
|
||
|
||
|
||
|
||
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
|
||
<span class="fa-solid fa-outdent"></span>
|
||
</label>
|
||
|
||
</div>
|
||
|
||
</header>
|
||
|
||
|
||
<div class="bd-container">
|
||
<div class="bd-container__inner bd-page-width">
|
||
|
||
|
||
|
||
<div class="bd-sidebar-primary bd-sidebar">
|
||
|
||
|
||
|
||
<div class="sidebar-header-items sidebar-primary__section">
|
||
|
||
|
||
<div class="sidebar-header-items__center">
|
||
|
||
<div class="navbar-item">
|
||
<nav class="navbar-nav">
|
||
<ul class="bd-navbar-elements navbar-nav">
|
||
|
||
<li class="nav-item current active">
|
||
<a class="nav-link nav-internal" href="#">
|
||
Keyword-Service
|
||
</a>
|
||
</li>
|
||
|
||
|
||
<li class="nav-item">
|
||
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
|
||
cv_analysis package
|
||
</a>
|
||
</li>
|
||
|
||
|
||
<li class="nav-item">
|
||
<a class="nav-link nav-internal" href="modules/serve.html">
|
||
serve module
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</nav></div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="sidebar-header-items__end">
|
||
|
||
<div class="navbar-item">
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
||
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
||
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
||
</button>
|
||
`);
|
||
</script></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||
<div class="sidebar-primary-item">
|
||
<nav class="bd-docs-nav bd-links"
|
||
aria-label="Section Navigation">
|
||
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
|
||
<div class="bd-toc-item navbar-nav"></div>
|
||
</nav></div>
|
||
</div>
|
||
|
||
|
||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||
</div>
|
||
|
||
<div id="rtd-footer-container"></div>
|
||
|
||
|
||
</div>
|
||
|
||
<main id="main-content" class="bd-main">
|
||
|
||
|
||
<div class="bd-content">
|
||
<div class="bd-article-container">
|
||
|
||
<div class="bd-header-article">
|
||
<div class="header-article-items header-article__inner">
|
||
|
||
<div class="header-article-items__start">
|
||
|
||
<div class="header-article-item">
|
||
|
||
|
||
|
||
<nav aria-label="Breadcrumb">
|
||
<ul class="bd-breadcrumbs">
|
||
|
||
<li class="breadcrumb-item breadcrumb-home">
|
||
<a href="index.html" class="nav-link" aria-label="Home">
|
||
<i class="fa-solid fa-home"></i>
|
||
</a>
|
||
</li>
|
||
<li class="breadcrumb-item active" aria-current="page">Keyword-Service</li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<div id="searchbox"></div>
|
||
<article class="bd-article">
|
||
|
||
<section id="keyword-service">
|
||
<h1>Keyword-Service<a class="headerlink" href="#keyword-service" title="Link to this heading">#</a></h1>
|
||
<p>Service to get keywords of a paragraph or whole document.</p>
|
||
<!-- TOC --><ul class="simple">
|
||
<li><p><a class="reference external" href="#keyword-service">Keyword-Service</a></p>
|
||
<ul>
|
||
<li><p><a class="reference external" href="#api">API</a></p>
|
||
<ul>
|
||
<li><p><a class="reference external" href="#rest">REST</a></p></li>
|
||
<li><p><a class="reference external" href="#rabbitmq">RabbitMQ</a></p></li>
|
||
</ul>
|
||
</li>
|
||
<li><p><a class="reference external" href="#service-configuration">Service Configuration</a></p></li>
|
||
<li><p><a class="reference external" href="#language">Language</a></p></li>
|
||
<li><p><a class="reference external" href="#usage">Usage</a></p>
|
||
<ul>
|
||
<li><p><a class="reference external" href="#run-docker-commands">Run Docker Commands</a></p></li>
|
||
<li><p><a class="reference external" href="#run-locally">Run locally</a></p></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li><p><a class="reference external" href="#upload-models-to-ml-flow">Upload models to ML Flow</a></p></li>
|
||
</ul>
|
||
<!-- TOC --><section id="api">
|
||
<h2>API<a class="headerlink" href="#api" title="Link to this heading">#</a></h2>
|
||
<section id="rest">
|
||
<h3>REST<a class="headerlink" href="#rest" title="Link to this heading">#</a></h3>
|
||
<p>The service provides endpoints to extract keywords from a text and to embed a text. For details, download
|
||
<a class="reference external" href="docs/openapi_redoc.html">OpenAPI schema</a> and view it in a browser.</p>
|
||
</section>
|
||
<section id="rabbitmq">
|
||
<h3>RabbitMQ<a class="headerlink" href="#rabbitmq" title="Link to this heading">#</a></h3>
|
||
<p>The service listens to a queue and processes the messages. This method is ment to be used for extracting keywords from
|
||
whole documents. All RabbitMQ parameters including the queue names are set in environment variables, refer to the
|
||
service respective HELM chart for more information.</p>
|
||
<p>The input message should be a JSON object with the following structure:</p>
|
||
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"targetFilePath"</span><span class="p">:</span><span class="w"> </span><span class="err">s</span><span class="kc">tr</span><span class="err">i</span><span class="kc">n</span><span class="err">g</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"responseFilePath"</span><span class="p">:</span><span class="w"> </span><span class="err">s</span><span class="kc">tr</span><span class="err">i</span><span class="kc">n</span><span class="err">g</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The service downloads the file specified in <code class="docutils literal notranslate"><span class="pre">targetFilePath</span></code>. Supported data structures for the target file are:</p>
|
||
<ul class="simple">
|
||
<li><p>simplified text data (signifier key: <code class="docutils literal notranslate"><span class="pre">paragraphs</span></code>)</p></li>
|
||
<li><p>structure object data (signifier key: <code class="docutils literal notranslate"><span class="pre">structureObjects</span></code>)</p></li>
|
||
</ul>
|
||
<p>As a response, the service uploads a JSON-structured file (as defined in <code class="docutils literal notranslate"><span class="pre">responseFilePath</span></code>) with the result under the
|
||
<code class="docutils literal notranslate"><span class="pre">data</span></code> key. The structure of the response file is as follows:</p>
|
||
<div class="highlight-javascript notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
|
||
<span class="w"> </span><span class="s2">"targetFilePath"</span>
|
||
<span class="o">:</span>
|
||
<span class="w"> </span><span class="nx">string</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="s2">"responseFilePath"</span>
|
||
<span class="o">:</span>
|
||
<span class="w"> </span><span class="nx">string</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="c1">// and eventually further fields if present in the input message </span>
|
||
<span class="w"> </span><span class="s2">"data"</span>
|
||
<span class="o">:</span>
|
||
<span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="s2">"keywords"</span><span class="o">:</span><span class="w"> </span><span class="nb">Array</span><span class="p">[</span><span class="nx">string</span><span class="p">],</span>
|
||
<span class="w"> </span><span class="s2">"paragraphId"</span><span class="o">:</span><span class="w"> </span><span class="kr">int</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="s2">"embedding"</span><span class="o">:</span><span class="w"> </span><span class="nb">Array</span><span class="p">[</span><span class="kr">float</span><span class="p">]</span><span class="w"> </span><span class="c1">// 384 dimensions</span>
|
||
<span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">]</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Note</strong> that</p>
|
||
<ul class="simple">
|
||
<li><p>the <code class="docutils literal notranslate"><span class="pre">embedding</span></code> key is optional and can be omitted. The service will not calculate the embedding if the environment
|
||
variable <code class="docutils literal notranslate"><span class="pre">MODEL__COMPUTE_EMBEDDINGS</span></code> is set to <code class="docutils literal notranslate"><span class="pre">false</span></code>.</p></li>
|
||
<li><p>the service also computes the keywords for the whole document. In this case, the <code class="docutils literal notranslate"><span class="pre">paragraphId</span></code> is set to <code class="docutils literal notranslate"><span class="pre">-1</span></code>.</p></li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
<section id="service-configuration">
|
||
<h2>Service Configuration<a class="headerlink" href="#service-configuration" title="Link to this heading">#</a></h2>
|
||
<p>The service is configured via environment variables. The following variables are available:</p>
|
||
<p>| Variable | Description | Default |
|
||
| —————————————— | ———————————————————————————– | ——- |
|
||
| LOGGING__LEVEL | Logging level | INFO |
|
||
| MODEL__MAX_KEYWORDS_PER_PARAGRAPH | Maximum number of keywords per paragraph | 5 |
|
||
| MODLE__MAX_KEYWORDS_PER_DOCUMENT | Maximum number of keywords per document, when set to 0, no keywords are extracted | 0 |
|
||
| MODEL__COMPUTE_EMBEDDINGS | Whether to compute keyword embeddings or not | true |
|
||
| MODEL__PREPROCESSING__MIN_PARAGRAPH_LENGTH | Minimum number of characters in a paragraph to be considered for keyword extraction | 1 |
|
||
| MODEL__POSTPROCESSING__FILTER_SUBWORDS | Whether to filter out subwords from the keywords or not | true |</p>
|
||
<p><strong>NOTE</strong> that these variables are subject to change. For the most recent configuration, refer to the service respective
|
||
HELM chart.</p>
|
||
</section>
|
||
<section id="language">
|
||
<h2>Language<a class="headerlink" href="#language" title="Link to this heading">#</a></h2>
|
||
<p>Currently, there is an english, a german and a multi-language model for keyword extraction. The models are uploaded to
|
||
mlflow and can
|
||
be set in the Dockerfile when building the container:</p>
|
||
<p>example for german model:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ENV</span> <span class="n">AZURE_RESOURCE_GROUP</span><span class="o">=</span><span class="s2">"mriedl"</span>
|
||
<span class="n">ENV</span> <span class="n">AZURE_AML_WORKSPACE</span><span class="o">=</span><span class="s2">"azureml-ws"</span>
|
||
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_NAME</span><span class="o">=</span><span class="s2">"keyword-extraction-de"</span>
|
||
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_VERSION</span><span class="o">=</span><span class="s2">"1"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>and example for english model:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ENV</span> <span class="n">AZURE_RESOURCE_GROUP</span><span class="o">=</span><span class="s2">"mriedl"</span>
|
||
<span class="n">ENV</span> <span class="n">AZURE_AML_WORKSPACE</span><span class="o">=</span><span class="s2">"azureml-ws"</span>
|
||
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_NAME</span><span class="o">=</span><span class="s2">"keyword-extraction-de"</span>
|
||
<span class="n">ENV</span> <span class="n">AZURE_AML_MODEL_VERSION</span><span class="o">=</span><span class="s2">"1"</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="usage">
|
||
<h2>Usage<a class="headerlink" href="#usage" title="Link to this heading">#</a></h2>
|
||
<p><strong>Two Options:</strong></p>
|
||
<ol class="simple">
|
||
<li><p>REST: Send text per request to endpoint, endpoint returns keywords</p></li>
|
||
<li><p>Queue: Service gets text from queue, model calculates keywords, save keywords in queue</p></li>
|
||
</ol>
|
||
<p>To test the REST endpoint you have to set up an environment and do poetry install (
|
||
see https://gitlab.knecon.com/knecon/research/template-python-project for details for setting up poetry)
|
||
Then run</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="o">./</span><span class="n">src</span><span class="o">/</span><span class="n">serve</span><span class="o">.</span><span class="n">py</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>You don’t need to start a queue for that, just ignore the AMQP Error.
|
||
Port and host are set in settings.toml .
|
||
You can use the FastAPI under 127.0.0.1:8001/docs to send request to endpoint.</p>
|
||
<p>You can also test the service with docker:</p>
|
||
<section id="run-docker-commands">
|
||
<h3>Run Docker Commands<a class="headerlink" href="#run-docker-commands" title="Link to this heading">#</a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-t<span class="w"> </span><span class="si">${</span><span class="nv">IMAGE_NAME</span><span class="si">}</span><span class="w"> </span>-f<span class="w"> </span>Dockerfile<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">GITLAB_USER</span><span class="o">=</span><span class="si">${</span><span class="nv">GITLAB_USER</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">GITLAB_ACCESS_TOKEN</span><span class="o">=</span><span class="si">${</span><span class="nv">GITLAB_ACCESS_TOKEN</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_TENANT_ID</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_TENANT_ID</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_SUBSCRIPTION_ID</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_SUBSCRIPTION_ID</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_CLIENT_ID</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_CLIENT_ID</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_CLIENT_SECRET</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_CLIENT_SECRET</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_AML_MODEL_VERSION</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_AML_MODEL_VERSION</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_AML_MODEL_NAME</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_AML_MODEL_NAME</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_RESOURCE_GROUP</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_RESOURCE_GROUP</span><span class="si">}</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">AZURE_AML_WORKSPACE</span><span class="o">=</span><span class="si">${</span><span class="nv">AZURE_AML_WORKSPACE</span><span class="si">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--net<span class="o">=</span>host<span class="w"> </span>-it<span class="w"> </span>--rm<span class="w"> </span>--name<span class="w"> </span><span class="si">${</span><span class="nv">CONTAINER_NAME</span><span class="si">}</span><span class="w"> </span><span class="si">${</span><span class="nv">IMAGE_NAME</span><span class="si">}</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="run-locally">
|
||
<h3>Run locally<a class="headerlink" href="#run-locally" title="Link to this heading">#</a></h3>
|
||
<p>First you need to download the model from mlflow. This can be done with the <em>“src/ml_flow/download_model.py”</em> script.
|
||
This scripts downloads a model and copies config and model data to the specific locations, such that the model can
|
||
be loaded.</p>
|
||
<p>For running/testing the keyword extraction locally you can use the <em>src/tests/test_process.py</em> script.</p>
|
||
<p>Model ist stored and loaded via DVC, you need the connection string under
|
||
https://portal.azure.com/#@knecon.com/resource/subscriptions/4b9531fc-c5e4-4b11-8492-0cc173c1f97d/resourceGroups/taas-rg/providers/Microsoft.Storage/storageAccounts/taassaracer/keys</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="upload-models-to-ml-flow">
|
||
<h1>Upload models to ML Flow<a class="headerlink" href="#upload-models-to-ml-flow" title="Link to this heading">#</a></h1>
|
||
<p>To upload the models to mlflow, you can use following script: src/mlflow/upload_model.py
|
||
For authentication following environment variables need to be set:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1">#AZURE_TENANT_ID=""</span>
|
||
<span class="c1">#AZURE_SUBSCRIPTION_ID=""</span>
|
||
<span class="c1">#AZURE_CLIENT_ID=""</span>
|
||
<span class="c1">#AZURE_CLIENT_SECRET=""</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Additional settings (resource group, experiment name, etc.) can be specified in the config (
|
||
<em>./src/mlflow/config/azure_config.toml</em>).
|
||
The <em>upload_model.py</em> has the following parameters:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">options</span><span class="p">:</span>
|
||
<span class="o">-</span><span class="n">h</span><span class="p">,</span> <span class="o">--</span><span class="n">help</span> <span class="n">show</span> <span class="n">this</span> <span class="n">help</span> <span class="n">message</span> <span class="ow">and</span> <span class="n">exit</span>
|
||
<span class="o">-</span><span class="n">a</span> <span class="n">AZURE_CONFIG</span><span class="p">,</span> <span class="o">--</span><span class="n">azure_config</span> <span class="n">AZURE_CONFIG</span>
|
||
<span class="n">Location</span> <span class="n">of</span> <span class="n">the</span> <span class="n">configuration</span> <span class="n">file</span> <span class="k">for</span> <span class="n">Azure</span> <span class="p">(</span><span class="n">default</span><span class="p">:</span> <span class="n">src</span><span class="o">/</span><span class="n">mlflow</span><span class="o">/</span><span class="n">config</span><span class="o">/</span><span class="n">azure_config</span><span class="o">.</span><span class="n">toml</span><span class="p">)</span>
|
||
<span class="o">-</span><span class="n">b</span> <span class="n">BASE_CONFIG</span><span class="p">,</span> <span class="o">--</span><span class="n">base_config</span> <span class="n">BASE_CONFIG</span>
|
||
<span class="n">Location</span> <span class="n">of</span> <span class="n">the</span> <span class="n">basic</span> <span class="n">training</span> <span class="n">configuration</span> <span class="p">(</span><span class="n">default</span><span class="p">:</span> <span class="n">src</span><span class="o">/</span><span class="n">mlflow</span><span class="o">/</span><span class="n">config</span><span class="o">/</span><span class="n">settings_de</span><span class="o">.</span><span class="n">toml</span><span class="p">)</span>
|
||
|
||
</pre></div>
|
||
</div>
|
||
<p>the base config contains all information for the models used. Examples for German and
|
||
English are placed in <em>/src/mlflow/config/</em></p>
|
||
<p>Note: Multi-language model tracking does not work for now. After the upload script reports an error, you have to
|
||
manually track the
|
||
model <a class="reference external" href="https://ml.azure.com/experiments?wsid=/subscriptions/4b9531fc-c5e4-4b11-8492-0cc173c1f97d/resourcegroups/fforesight-rg/providers/Microsoft.MachineLearningServices/workspaces/ff-aml-main&tid=b44be368-e4f2-4ade-a089-cd2825458048">here</a>
|
||
where you can find the run. Adhere to the naming conventions for the model name and versions,
|
||
see <a class="reference external" href="https://ml.azure.com/model/list?wsid=/subscriptions/4b9531fc-c5e4-4b11-8492-0cc173c1f97d/resourcegroups/fforesight-rg/providers/Microsoft.MachineLearningServices/workspaces/ff-aml-main&tid=b44be368-e4f2-4ade-a089-cd2825458048">here</a></p>
|
||
</section>
|
||
|
||
|
||
</article>
|
||
|
||
|
||
|
||
|
||
|
||
<footer class="prev-next-footer">
|
||
|
||
<div class="prev-next-area">
|
||
<a class="left-prev"
|
||
href="index.html"
|
||
title="previous page">
|
||
<i class="fa-solid fa-angle-left"></i>
|
||
<div class="prev-next-info">
|
||
<p class="prev-next-subtitle">previous</p>
|
||
<p class="prev-next-title">Welcome to Keyword Extraction Service documentation!</p>
|
||
</div>
|
||
</a>
|
||
<a class="right-next"
|
||
href="modules/cv_analysis.html"
|
||
title="next page">
|
||
<div class="prev-next-info">
|
||
<p class="prev-next-subtitle">next</p>
|
||
<p class="prev-next-title">cv_analysis package</p>
|
||
</div>
|
||
<i class="fa-solid fa-angle-right"></i>
|
||
</a>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||
|
||
|
||
<div class="sidebar-secondary-item">
|
||
<div
|
||
id="pst-page-navigation-heading-2"
|
||
class="page-toc tocsection onthispage">
|
||
<i class="fa-solid fa-list"></i> On this page
|
||
</div>
|
||
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
|
||
<ul class="visible nav section-nav flex-column">
|
||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#">Keyword-Service</a><ul class="visible nav section-nav flex-column">
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#api">API</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rest">REST</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#rabbitmq">RabbitMQ</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#service-configuration">Service Configuration</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#language">Language</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#run-docker-commands">Run Docker Commands</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#run-locally">Run locally</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h1 nav-item toc-entry"><a class="reference internal nav-link" href="#upload-models-to-ml-flow">Upload models to ML Flow</a></li>
|
||
</ul>
|
||
|
||
</nav></div>
|
||
|
||
<div class="sidebar-secondary-item">
|
||
|
||
<div class="tocsection sourcelink">
|
||
<a href="_sources/README.md.txt">
|
||
<i class="fa-solid fa-file-lines"></i> Show Source
|
||
</a>
|
||
</div>
|
||
</div>
|
||
|
||
</div></div>
|
||
|
||
|
||
</div>
|
||
<footer class="bd-footer-content">
|
||
|
||
</footer>
|
||
|
||
</main>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||
<script src="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
|
||
<script src="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
|
||
|
||
<footer class="bd-footer">
|
||
<div class="bd-footer__inner bd-page-width">
|
||
|
||
<div class="footer-items__start">
|
||
|
||
<div class="footer-item">
|
||
|
||
<p class="copyright">
|
||
|
||
© Copyright All rights reserved.
|
||
<br/>
|
||
|
||
</p>
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
<p class="sphinx-version">
|
||
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.3.7.
|
||
<br/>
|
||
</p>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="footer-items__end">
|
||
|
||
<div class="footer-item">
|
||
<p class="theme-version">
|
||
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
|
||
</p></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</footer>
|
||
</body>
|
||
</html> |