-
Notifications
You must be signed in to change notification settings - Fork 31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Validateur NeTEx avec enRoute's Chouette #4140
Changes from 3 commits
616c46a
32188a6
16bdd48
cb4adb6
26e78dd
0d4e751
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,311 @@ | ||||
defmodule Transport.Validators.NeTEx do | ||||
import TransportWeb.Gettext, only: [dgettext: 2] | ||||
|
||||
@moduledoc """ | ||||
Validator for NeTEx files calling enRoute Chouette Valid API. | ||||
""" | ||||
require Logger | ||||
@behaviour Transport.Validators.Validator | ||||
|
||||
@impl Transport.Validators.Validator | ||||
def validator_name, do: "enroute-chouette-validator" | ||||
AntoineAugusti marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
@impl Transport.Validators.Validator | ||||
def validate_and_save(%DB.Resource{format: "NeTEx", id: resource_id}) do | ||||
Logger.info("Validating #{resource_id} with enRoute Chouette Valid") | ||||
|
||||
resource_history = DB.ResourceHistory.latest_resource_history(resource_id) | ||||
with_resource_file(resource_history, &validate_resource_history(resource_history, &1)) | ||||
end | ||||
|
||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pour les 2 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dans les metadata ? |
||||
def validate_resource_history(resource_history, filepath) do | ||||
case validate_with_enroute(filepath) do | ||||
{:ok, result_url} -> | ||||
insert_validation_results(resource_history.id, result_url) | ||||
:ok | ||||
|
||||
{:error, {result_url, errors}} -> | ||||
insert_validation_results(resource_history.id, result_url, errors) | ||||
:ok | ||||
|
||||
{:error, :unexpected_validation_status} -> | ||||
Logger.error("Invalid API call to enRoute Chouette Valid") | ||||
{:error, "enRoute Chouette Valid: Unexpected validation status"} | ||||
end | ||||
end | ||||
|
||||
@type validate_options :: [{:graceful_retry, boolean()}] | ||||
|
||||
@doc """ | ||||
Validate the resource from the given URL. | ||||
""" | ||||
@spec validate(binary(), validate_options()) :: {:ok, map()} | {:error, binary()} | ||||
def validate(url, opts \\ []) do | ||||
with_url(url, fn filepath -> | ||||
case validate_with_enroute(filepath, opts) do | ||||
{:ok, result_url} -> | ||||
# result_url in metadata? | ||||
Logger.info("Result URL: #{result_url}") | ||||
{:ok, %{"validations" => index_messages([]), "metadata" => %{}}} | ||||
|
||||
{:error, {result_url, errors}} -> | ||||
Logger.info("Result URL: #{result_url}") | ||||
# result_url in metadata? | ||||
{:ok, %{"validations" => index_messages(errors), "metadata" => %{}}} | ||||
|
||||
{:error, :unexpected_validation_status} -> | ||||
Logger.error("Invalid API call to enRoute Chouette Valid") | ||||
{:error, "enRoute Chouette Valid: Unexpected validation status"} | ||||
end | ||||
end) | ||||
end | ||||
|
||||
@spec with_resource_file(DB.ResourceHistory.t(), (Path.t() -> any())) :: any() | ||||
def with_resource_file(resource_history, closure) do | ||||
%DB.ResourceHistory{payload: %{"permanent_url" => permanent_url}} = resource_history | ||||
filepath = tmp_path(resource_history) | ||||
|
||||
with_temp_file(permanent_url, filepath, closure) | ||||
end | ||||
|
||||
@spec with_url(binary(), (Path.t() -> any())) :: any() | ||||
def with_url(url, closure) do | ||||
with_temp_file(url, tmp_path(url), closure) | ||||
end | ||||
|
||||
@spec with_temp_file(binary(), Path.t(), (Path.t() -> any())) :: any() | ||||
def with_temp_file(url, filepath, closure) do | ||||
AntoineAugusti marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
http_client().get!(url, compressed: false, into: File.stream!(filepath)) | ||||
ptitfred marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
closure.(filepath) | ||||
after | ||||
File.rm(filepath) | ||||
end | ||||
|
||||
defp http_client, do: Transport.Req.impl() | ||||
|
||||
defp tmp_path(%DB.ResourceHistory{id: resource_history_id}) do | ||||
System.tmp_dir!() |> Path.join("enroute_validation_netex_#{resource_history_id}") | ||||
end | ||||
|
||||
defp tmp_path(_other) do | ||||
AntoineAugusti marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
System.tmp_dir!() |> Path.join("enroute_validation_netex_#{Ecto.UUID.generate()}") | ||||
end | ||||
|
||||
def insert_validation_results(resource_history_id, result_url, errors \\ []) do | ||||
result = index_messages(errors) | ||||
|
||||
%DB.MultiValidation{ | ||||
validation_timestamp: DateTime.utc_now(), | ||||
validator: validator_name(), | ||||
result: result, | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Se pose la question d'uniformiser ce qui est stocké dans Tu as comparé ce qui était fait entre ceux-ci et GTFS-RT et GTFS ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initialement je pensais pouvoir faire ça et j'avais quelque chose équivalent à JSONSchema TableSchema et GBFS puis finalement ça s'est rapproché de GTFS (notamment en implémentant la visualisation) et j'ai découvert au passage que ce n'était pas standardisé. Les erreurs elle-même ne seront pas standardisées si j'en crois les différents templates de rendu, et la présence de helpers spécifiques à chaque format pour interpréter ces résultats ne simplifie pas la tâche. On pourrait avoir une description plus générale des résultats de validation avec toujours une gestion des spécificités, ce n'est pas un problème, mais je ne suis pas à l'aise pour faire un tel changement actuellement. On aurait quoi qu'il en soit des templates spécifiques à GTFS et NeTEx et déjà du code générique pour les autres formats. Mon avis est donc en résumé que chercher à généraliser les validateurs est peu bénéfique en l'état. Et si on décidait de le faire je serais bien peu à l'aise pour le faire par le manque de spécification des structures de données que l'on manipule dans la validation de ressources. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merci pour tes explications, je partage ton point de vue. J'aimerais que ce soit plus simple et évident mais ce n'est pas le cas je crois. |
||||
resource_history_id: resource_history_id, | ||||
validator_version: validator_version(), | ||||
command: result_url, | ||||
max_error: get_max_severity_error(result) | ||||
} | ||||
|> DB.Repo.insert!() | ||||
end | ||||
|
||||
@doc """ | ||||
Returns the maximum issue severity found | ||||
|
||||
iex> validation_result = %{"uic-operating-period" => [%{"criticity" => "error"}], "valid-day-bits" => [%{"criticity" => "error"}], "frame-arret-resources" => [%{"criticity" => "error"}]} | ||||
iex> get_max_severity_error(validation_result) | ||||
"error" | ||||
|
||||
iex> get_max_severity_error(%{}) | ||||
"NoError" | ||||
""" | ||||
@spec get_max_severity_error(map()) :: binary() | nil | ||||
def get_max_severity_error(validation_result) do | ||||
{severity, _} = validation_result |> count_max_severity() | ||||
severity | ||||
end | ||||
|
||||
@no_error "NoError" | ||||
AntoineAugusti marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
@doc """ | ||||
Returns the maximum severity, with the issues count | ||||
|
||||
iex> validation_result = %{"uic-operating-period" => [%{"criticity" => "error"}], "valid-day-bits" => [%{"criticity" => "error"}], "frame-arret-resources" => [%{"criticity" => "error"}]} | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ajouter un cas There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. |
||||
iex> count_max_severity(validation_result) | ||||
{"error", 3} | ||||
iex> count_max_severity(%{}) | ||||
{"NoError", 0} | ||||
""" | ||||
@spec count_max_severity(map()) :: {binary(), integer()} | ||||
def count_max_severity(validation_result) when validation_result == %{} do | ||||
{@no_error, 0} | ||||
end | ||||
|
||||
def count_max_severity(%{} = validation_result) do | ||||
validation_result | ||||
|> count_by_severity() | ||||
|> Enum.min_by(fn {severity, _count} -> severity |> severities() |> Map.get(:level) end) | ||||
end | ||||
|
||||
@spec severities_map() :: map() | ||||
def severities_map, | ||||
do: %{ | ||||
"error" => %{level: 1, text: dgettext("netex-validator", "errors")}, | ||||
"warning" => %{level: 2, text: dgettext("netex-validator", "warnings")}, | ||||
"information" => %{level: 3, text: dgettext("netex-validator", "informations")} | ||||
} | ||||
|
||||
@spec severities(binary()) :: %{level: integer(), text: binary()} | ||||
def severities(key), do: severities_map()[key] | ||||
AntoineAugusti marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
@doc """ | ||||
Returns the number of issues by severity level | ||||
|
||||
iex> validation_result = %{"uic-operating-period" => [%{"criticity" => "warning"}], "valid-day-bits" => [%{"criticity" => "error"}], "frame-arret-resources" => [%{"criticity" => "error"}]} | ||||
iex> count_by_severity(validation_result) | ||||
%{"warning" => 1, "error" => 2} | ||||
|
||||
iex> count_by_severity(%{}) | ||||
%{} | ||||
""" | ||||
@spec count_by_severity(map()) :: map() | ||||
def count_by_severity(%{} = validation_result) do | ||||
validation_result | ||||
|> Enum.flat_map(fn {_, v} -> v end) | ||||
|> Enum.reduce(%{}, fn v, acc -> Map.update(acc, v["criticity"], 1, &(&1 + 1)) end) | ||||
end | ||||
|
||||
def count_by_severity(_), do: %{} | ||||
|
||||
defp validate_with_enroute(filepath, opts \\ []) do | ||||
client().create_a_validation(filepath) |> fetch_validation_results(0, opts) | ||||
end | ||||
|
||||
defp fetch_validation_results(validation_id, retries, opts) do | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On a un max retry ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pas encore, je comptais le faire dans un second temps. En l'état ce code espère que le validateur implémentera ce timeout, mais l'expérience a montré que c'était pas le cas. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. D'acc, à voir plus tard alors ! |
||||
case client().get_a_validation(validation_id) do | ||||
:pending -> | ||||
if Keyword.get(opts, :graceful_retry, true) do | ||||
retries |> poll_interval() |> :timer.sleep() | ||||
end | ||||
AntoineAugusti marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
fetch_validation_results(validation_id, retries + 1, opts) | ||||
|
||||
{:successful, url} -> | ||||
{:ok, url} | ||||
|
||||
value when value in [:warning, :failed] -> | ||||
{:error, client().get_messages(validation_id)} | ||||
|
||||
:unexpected_validation_status -> | ||||
{:error, :unexpected_validation_status} | ||||
end | ||||
end | ||||
|
||||
@doc """ | ||||
iex> index_messages([]) | ||||
%{} | ||||
|
||||
iex> index_messages([%{"code"=>"a", "id"=> 1}, %{"code"=>"a", "id"=> 2}, %{"code"=>"b", "id"=> 3}]) | ||||
%{"a"=>[%{"code"=>"a", "id"=> 1}, %{"code"=>"a", "id"=> 2}], "b"=>[%{"code"=>"b", "id"=> 3}]} | ||||
""" | ||||
def index_messages(messages) do | ||||
messages |> Enum.group_by(fn %{"code" => code} -> code end) | ||||
end | ||||
|
||||
@doc """ | ||||
Poll interval to play nice with the tier. | ||||
|
||||
iex> 0..9 |> Enum.map(&poll_interval(&1)) | ||||
[10000, 10000, 10000, 10000, 10000, 10000, 20000, 20000, 20000, 20000] | ||||
ptitfred marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
""" | ||||
def poll_interval(nb_tries) when nb_tries < 6, do: 10_000 | ||||
def poll_interval(_), do: 20_000 | ||||
AntoineAugusti marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
# This will change with an actual versioning of the validator | ||||
def validator_version, do: "saas-production" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Déplacer à côté du nom du validateur ? Ce n'est pas dans le behaviour ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Non, c'est un champs de There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Peut-être à ajouter dans
|
||||
|
||||
@doc """ | ||||
iex> validation_result = %{"uic-operating-period" => [%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}], "valid-day-bits" => [%{"code" => "valid-day-bits", "message" => "Mandatory attribute valid_day_bits not found", "criticity" => "error"}], "frame-arret-resources" => [%{"code" => "frame-arret-resources", "message" => "Tag frame_id doesn't match ''", "criticity" => "warning"}]} | ||||
iex> summary(validation_result) | ||||
[ | ||||
{"error", [ | ||||
{"uic-operating-period", %{count: 1, criticity: "error", title: "UIC operating period"}}, | ||||
{"valid-day-bits", %{count: 1, criticity: "error", title: "Valid day bits"}} | ||||
]}, | ||||
{"warning", [{"frame-arret-resources", %{count: 1, criticity: "warning", title: "Frame arret resources"}}]} | ||||
] | ||||
iex> summary(%{}) | ||||
[] | ||||
""" | ||||
@spec summary(map()) :: list() | ||||
def summary(%{} = validation_result) do | ||||
validation_result | ||||
|> Enum.map(fn {code, errors} -> | ||||
{code, | ||||
%{ | ||||
count: length(errors), | ||||
criticity: Map.get(hd(errors), "criticity"), | ||||
title: Map.get(issues_short_translation(), code, code) | ||||
}} | ||||
end) | ||||
|> Enum.group_by(fn {_, details} -> details.criticity end) | ||||
|> Enum.sort_by(fn {criticity, _} -> severities(criticity).level end) | ||||
end | ||||
|
||||
@spec issues_short_translation() :: %{binary() => binary()} | ||||
def issues_short_translation, | ||||
do: %{ | ||||
"composite-frame-ligne-mandatory" => dgettext("netex-validator", "Composite frame ligne mandatory"), | ||||
"frame-arret-resources" => dgettext("netex-validator", "Frame arret resources"), | ||||
"frame-calendrier-resources" => dgettext("netex-validator", "Frame calendrier resources"), | ||||
"frame-horaire-resources" => dgettext("netex-validator", "Frame horaire resources"), | ||||
"frame-ligne-resources" => dgettext("netex-validator", "Frame ligne resources"), | ||||
"frame-reseau-resources" => dgettext("netex-validator", "Frame reseau resources"), | ||||
"latitude-mandatory" => dgettext("netex-validator", "Latitude mandatory"), | ||||
"longitude-mandatory" => dgettext("netex-validator", "Longitude mandatory"), | ||||
"uic-operating-period" => dgettext("netex-validator", "UIC operating period"), | ||||
"valid-day-bits" => dgettext("netex-validator", "Valid day bits"), | ||||
"version-any" => dgettext("netex-validator", "Version any") | ||||
} | ||||
|
||||
@doc """ | ||||
Get issues from validation results. For a specific issue type if specified, or the most severe. | ||||
|
||||
iex> validation_result = %{"uic-operating-period" => [%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}], "valid-day-bits" => [%{"code" => "valid-day-bits", "message" => "Mandatory attribute valid_day_bits not found", "criticity" => "error"}], "frame-arret-resources" => [%{"code" => "frame-arret-resources", "message" => "Tag frame_id doesn't match ''", "criticity" => "warning"}]} | ||||
iex> get_issues(validation_result, %{"issue_type" => "uic-operating-period"}) | ||||
[%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}] | ||||
iex> get_issues(validation_result, %{"issue_type" => "broken-file"}) | ||||
[] | ||||
iex> get_issues(validation_result, nil) | ||||
[%{"code" => "uic-operating-period", "message" => "Resource 23504000009 hasn't expected class but Netex::OperatingPeriod", "criticity" => "error"}] | ||||
iex> get_issues(%{}, nil) | ||||
[] | ||||
iex> get_issues([], nil) | ||||
[] | ||||
""" | ||||
def get_issues(%{} = validation_result, %{"issue_type" => issue_type}) do | ||||
Map.get(validation_result, issue_type, []) |> order_issues_by_location() | ||||
end | ||||
|
||||
def get_issues(%{} = validation_result, _) do | ||||
validation_result | ||||
|> Map.values() | ||||
|> Enum.sort_by(fn [%{"criticity" => severity} | _] -> severities(severity).level end) | ||||
|> List.first([]) | ||||
|> order_issues_by_location() | ||||
end | ||||
|
||||
def get_issues(_, _), do: [] | ||||
|
||||
def order_issues_by_location(issues) do | ||||
issues | ||||
|> Enum.sort_by(fn issue -> | ||||
message = Map.get(issue, "message", "") | ||||
resource = Map.get(issue, "resource", %{}) | ||||
filename = Map.get(resource, "filename", "") | ||||
line = Map.get(resource, "line", "") | ||||
{filename, line, message} | ||||
end) | ||||
end | ||||
|
||||
defp client do | ||||
Transport.EnRouteChouetteValidClient.Wrapper.impl() | ||||
end | ||||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
## "msgid"s in this file come from POT (.pot) files. | ||
### | ||
### Do not add, change, or remove "msgid"s manually here as | ||
### they're tied to the ones in the corresponding POT file | ||
### (with the same domain). | ||
### | ||
### Use "mix gettext.extract --merge" or "mix gettext.merge" | ||
### to merge POT files into PO files. | ||
msgid "" | ||
msgstr "" | ||
"Language: en\n" | ||
"Plural-Forms: nplurals=2; plural=(n != 1);\n" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Composite frame ligne mandatory" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Frame arret resources" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Frame calendrier resources" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Frame horaire resources" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Frame ligne resources" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Frame reseau resources" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Latitude mandatory" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Longitude mandatory" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "UIC operating period" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Valid day bits" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "Version any" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "errors" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "informations" | ||
msgstr "" | ||
|
||
#, elixir-autogen, elixir-format | ||
msgid "warnings" | ||
msgstr "" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Documenter que ce validateur effectue la phase de polling d'une API distante et que les appels à
validate_
vont prendre plusieurs minutes ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Je peux.
Pour ma gouverne, n'est-ce pas commun à tous les validateurs d'être bloquants et potentiellement longs ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Si, tout à fait. La différence est que les autres validateurs ont des temps de traitement bien plus rapides, de l'ordre de 1-5s la plupart du temps.
Avoir un temps classique de plusieurs minutes a donc un impact si on veut proposer ça à la demande ou dans une chaine de validations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
En effet pour enchainer les validations ça peut surprendre. (Pour le on demand le pulling est implémenté via un job oban.)