-
Notifications
You must be signed in to change notification settings - Fork 12
Ticket30406 refactor header constants #357
base: master
Are you sure you want to change the base?
Changes from all commits
8a5efb1
ebbb9ff
c2dfe26
2d92fbd
9214877
c3769b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
"""Stem's Bandwidth file HEADER_ATTR part as it is in stem's commit | ||
658dd5281604eb9c63a91e529501947ecc65ef6b, which will be included in the next | ||
Stem's release, 1.8.0, except ``_date`` because depends on other stem's module. | ||
""" | ||
# XXX: Remove this file when stem releases 1.8.0. | ||
from ..util.timestamp import isostr_to_dt_obj | ||
|
||
# Converts header attributes to a given type. Malformed fields should be | ||
# ignored according to the spec. | ||
|
||
def _str(val): | ||
return val # already a str | ||
|
||
|
||
def _int(val): | ||
return int(val) if (val and val.isdigit()) else None | ||
|
||
|
||
def _date(val): | ||
try: | ||
return isostr_to_dt_obj(val) | ||
except ValueError: | ||
return None # not an iso formatted date | ||
|
||
|
||
def _csv(val): | ||
return map(lambda v: v.strip(), val.split(',')) if val is not None else None | ||
|
||
|
||
# mapping of attributes => (header, type) | ||
|
||
HEADER_ATTR = { | ||
# version 1.1.0 introduced headers | ||
|
||
'version': ('version', _str), | ||
|
||
'software': ('software', _str), | ||
'software_version': ('software_version', _str), | ||
|
||
'earliest_bandwidth': ('earliest_bandwidth', _date), | ||
'latest_bandwidth': ('latest_bandwidth', _date), | ||
'created_at': ('file_created', _date), | ||
'generated_at': ('generator_started', _date), | ||
|
||
# version 1.2.0 additions | ||
|
||
'consensus_size': ('number_consensus_relays', _int), | ||
'eligible_count': ('number_eligible_relays', _int), | ||
'eligible_percent': ('percent_eligible_relays', _int), | ||
'min_count': ('minimum_number_eligible_relays', _int), | ||
'min_percent': ('minimum_percent_eligible_relays', _int), | ||
|
||
# version 1.3.0 additions | ||
|
||
'scanner_country': ('scanner_country', _str), | ||
'destinations_countries': ('destinations_countries', _csv), | ||
|
||
# version 1.4.0 additions | ||
|
||
'time_to_report_half_network': ('time_to_report_half_network', _int), | ||
|
||
'recent_stats.consensus_count': ('recent_consensus_count', _int), | ||
'recent_stats.prioritized_relay_lists': ('recent_priority_list_count', _int), | ||
'recent_stats.prioritized_relays': ('recent_priority_relay_count', _int), | ||
'recent_stats.measurement_attempts': ('recent_measurement_attempt_count', _int), | ||
'recent_stats.measurement_failures': ('recent_measurement_failure_count', _int), | ||
'recent_stats.relay_failures.no_measurement': ('recent_measurements_excluded_error_count', _int), | ||
'recent_stats.relay_failures.insuffient_period': ('recent_measurements_excluded_near_count', _int), | ||
'recent_stats.relay_failures.insufficient_measurements': ('recent_measurements_excluded_few_count', _int), | ||
'recent_stats.relay_failures.stale': ('recent_measurements_excluded_old_count', _int), | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,6 +1,10 @@ | ||||||||||||||||
# -*- coding: utf-8 -*- | ||||||||||||||||
"""Classes and functions that create the bandwidth measurements document | ||||||||||||||||
(v3bw) used by bandwidth authorities.""" | ||||||||||||||||
(v3bw) used by bandwidth authorities. | ||||||||||||||||
|
||||||||||||||||
See https://gitweb.torproject.org/torspec.git/tree/bandwidth-file-spec.txt for | ||||||||||||||||
more information on the meaning of the ``KeyValues`` | ||||||||||||||||
""" | ||||||||||||||||
|
||||||||||||||||
import copy | ||||||||||||||||
import logging | ||||||||||||||||
|
@@ -15,6 +19,11 @@ | |||||||||||||||
TORFLOW_SCALING, SBWS_SCALING, TORFLOW_BW_MARGIN, | ||||||||||||||||
TORFLOW_OBS_LAST, TORFLOW_OBS_MEAN, | ||||||||||||||||
PROP276_ROUND_DIG, MIN_REPORT, MAX_BW_DIFF_PERC) | ||||||||||||||||
|
||||||||||||||||
# XXX: replace this line by: | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't write a comment, write code:
Suggested change
|
||||||||||||||||
# from stem.descriptor.bandwidth_file import HEADER_ATTR, _int | ||||||||||||||||
# when stem releases 1.8.0. More information in stem_bandwidth_file.py | ||||||||||||||||
from .stem_bandwidth_file import HEADER_ATTR, _int | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please write CI tests for sbws that use stem 1.7.0 and stem master. |
||||||||||||||||
from sbws.lib.resultdump import ResultSuccess, _ResultType | ||||||||||||||||
from sbws.util.filelock import DirectoryLock | ||||||||||||||||
from sbws.util.timestamp import (now_isodt_str, unixts_to_isodt_str, | ||||||||||||||||
|
@@ -27,91 +36,34 @@ | |||||||||||||||
KEYVALUE_SEP_V1 = '=' | ||||||||||||||||
KEYVALUE_SEP_V2 = ' ' | ||||||||||||||||
|
||||||||||||||||
# NOTE: in a future refactor make make all the KeyValues be a dictionary | ||||||||||||||||
# with their type, so that it's more similar to stem parser. | ||||||||||||||||
|
||||||||||||||||
# Header KeyValues | ||||||||||||||||
# ================= | ||||||||||||||||
# List of the extra KeyValues accepted by the class | ||||||||||||||||
EXTRA_ARG_KEYVALUES = ['software', 'software_version', 'file_created', | ||||||||||||||||
'earliest_bandwidth', 'generator_started', | ||||||||||||||||
'scanner_country', 'destinations_countries'] | ||||||||||||||||
# number_eligible_relays is the number that ends in the bandwidth file | ||||||||||||||||
# ie, have not been excluded by one of the filters in 4. below | ||||||||||||||||
# They should be call recent_measurement_included_count to be congruent | ||||||||||||||||
# with the other KeyValues. | ||||||||||||||||
|
||||||||||||||||
# XXX: These constants need to be kept even if using Stem's header one. | ||||||||||||||||
# Unless Stem's will add a property type to the attributes. | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand this comment. What does stem need to do? |
||||||||||||||||
STATS_KEYVALUES = ['number_eligible_relays', 'minimum_number_eligible_relays', | ||||||||||||||||
'number_consensus_relays', 'percent_eligible_relays', | ||||||||||||||||
'minimum_percent_eligible_relays'] | ||||||||||||||||
|
||||||||||||||||
# KeyValues that count the number of relays that are in the bandwidth file, | ||||||||||||||||
# but ignored by Tor when voting, because they do not have a | ||||||||||||||||
# measured bandwidth. | ||||||||||||||||
BW_HEADER_KEYVALUES_RECENT_MEASUREMENTS_EXCLUDED = [ | ||||||||||||||||
# Number of relays that were measured but all the measurements failed | ||||||||||||||||
# because of network failures or it was | ||||||||||||||||
# not found a suitable helper relay | ||||||||||||||||
'recent_measurements_excluded_error_count', | ||||||||||||||||
# Number of relays that have successful measurements but the measurements | ||||||||||||||||
# were not away from each other in X time (by default 1 day). | ||||||||||||||||
'recent_measurements_excluded_near_count', | ||||||||||||||||
# Number of relays that have successful measurements and they are away from | ||||||||||||||||
# each other but they are not X time recent. | ||||||||||||||||
# By default this is 5 days, which is the same time the older | ||||||||||||||||
# the measurements can be by default. | ||||||||||||||||
'recent_measurements_excluded_old_count', | ||||||||||||||||
# Number of relays that have successful measurements and they are away from | ||||||||||||||||
# each other and recent | ||||||||||||||||
# but the number of measurements are less than X (by default 2). | ||||||||||||||||
'recent_measurements_excluded_few_count', | ||||||||||||||||
] | ||||||||||||||||
# Added in #29591 | ||||||||||||||||
# NOTE: recent_consensus_count, recent_priority_list_count, | ||||||||||||||||
# recent_measurement_attempt_count and recent_priority_relay_count | ||||||||||||||||
# are not reset when the scanner is stop. | ||||||||||||||||
# They will accumulate the values since the scanner was ever started. | ||||||||||||||||
BW_HEADER_KEYVALUES_MONITOR = [ | ||||||||||||||||
# 1.1 header: the number of different consensuses, that sbws has seen, | ||||||||||||||||
# since the last 5 days | ||||||||||||||||
'recent_consensus_count', | ||||||||||||||||
# 2.4 Number of times a priority list has been created | ||||||||||||||||
'recent_priority_list_count', | ||||||||||||||||
# 2.5 Number of relays that there were in a priority list | ||||||||||||||||
# [50, number of relays in the network * 0.05] | ||||||||||||||||
'recent_priority_relay_count', | ||||||||||||||||
# 3.6 header: the number of times that sbws has tried to measure any relay, | ||||||||||||||||
# since the last 5 days | ||||||||||||||||
# This would be the number of times a relays were in a priority list | ||||||||||||||||
'recent_measurement_attempt_count', | ||||||||||||||||
# 3.7 header: the number of times that sbws has tried to measure any relay, | ||||||||||||||||
# since the last 5 days, but it didn't work | ||||||||||||||||
# This should be the number of attempts - number of ResultSuccess - | ||||||||||||||||
# something else we don't know yet | ||||||||||||||||
# So far is the number of ResultError | ||||||||||||||||
'recent_measurement_failure_count', | ||||||||||||||||
# The time it took to report about half of the network. | ||||||||||||||||
'time_to_report_half_network', | ||||||||||||||||
] + BW_HEADER_KEYVALUES_RECENT_MEASUREMENTS_EXCLUDED | ||||||||||||||||
BANDWIDTH_HEADER_KEY_VALUES_INIT = \ | ||||||||||||||||
['earliest_bandwidth', 'generator_started', | ||||||||||||||||
'scanner_country', 'destinations_countries']\ | ||||||||||||||||
+ STATS_KEYVALUES \ | ||||||||||||||||
+ BW_HEADER_KEYVALUES_MONITOR | ||||||||||||||||
|
||||||||||||||||
KEYVALUES_INT = STATS_KEYVALUES + BW_HEADER_KEYVALUES_MONITOR | ||||||||||||||||
# List of all unordered KeyValues currently being used to generate the file | ||||||||||||||||
UNORDERED_KEYVALUES = EXTRA_ARG_KEYVALUES + STATS_KEYVALUES + \ | ||||||||||||||||
['latest_bandwidth'] + \ | ||||||||||||||||
BW_HEADER_KEYVALUES_MONITOR | ||||||||||||||||
# List of all the KeyValues currently being used to generate the file | ||||||||||||||||
ALL_KEYVALUES = ['version'] + UNORDERED_KEYVALUES | ||||||||||||||||
# The stem's attribute name for these Keys starts with this string. | ||||||||||||||||
BW_HEADER_KEYVALUES_RECENT_MEASUREMENTS_EXCLUDED = \ | ||||||||||||||||
[k for n, (k, _) in HEADER_ATTR.items() | ||||||||||||||||
if n.startswith("recent_stats.relay_failures.")] | ||||||||||||||||
|
||||||||||||||||
# XXX: The following constants are kept for compatibility with the generator/ | ||||||||||||||||
# parser here. Use stem's parser/generator when possible and refactor this | ||||||||||||||||
# code when stem releases 1.8.0. | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't write comments, write code. Refactor the code now. Use the old code with old stem, and the new code with new stem. Then you can write CI tests for sbws that use stem 1.7.0 and stem master. |
||||||||||||||||
KEYVALUES_INT = [k for k, t in HEADER_ATTR.values() if t is _int] | ||||||||||||||||
# # List of all unordered KeyValues currently being used to generate the file | ||||||||||||||||
UNORDERED_KEYVALUES = [k for k, _ in HEADER_ATTR.values()] | ||||||||||||||||
UNORDERED_KEYVALUES.remove('version') | ||||||||||||||||
|
||||||||||||||||
TERMINATOR = '=====' | ||||||||||||||||
|
||||||||||||||||
# Bandwidth Lines KeyValues | ||||||||||||||||
# ========================= | ||||||||||||||||
# Num header lines in v1.X.X using all the KeyValues | ||||||||||||||||
NUM_LINES_HEADER_V1 = len(ALL_KEYVALUES) + 2 | ||||||||||||||||
NUM_LINES_HEADER_V1 = len(HEADER_ATTR) + 2 | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why 2? |
||||||||||||||||
LINE_TERMINATOR = TERMINATOR + LINE_SEP | ||||||||||||||||
|
||||||||||||||||
# KeyValue separator in Bandwidth Lines | ||||||||||||||||
|
@@ -250,20 +202,28 @@ class V3BWHeader(object): | |||||||||||||||
when the generator started | ||||||||||||||||
""" | ||||||||||||||||
def __init__(self, timestamp, **kwargs): | ||||||||||||||||
# XXX: do not convert all the arguments to string here, convert them | ||||||||||||||||
# only in __str__ and check their actual types. | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment explains what the code is doing. |
||||||||||||||||
assert isinstance(timestamp, str) | ||||||||||||||||
for v in kwargs.values(): | ||||||||||||||||
assert isinstance(v, str) | ||||||||||||||||
self.timestamp = timestamp | ||||||||||||||||
# KeyValues with default value when not given by kwargs | ||||||||||||||||
|
||||||||||||||||
# Initialize all defined attrs, | ||||||||||||||||
# XXX: Since using the stem's name for the attributes will require | ||||||||||||||||
# other refactoring here, use the name in the actual file. | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is "the name in the actual file"? |
||||||||||||||||
header_keys = [k for k, _ in HEADER_ATTR.values()] | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not easy to understand if this code is refactoring or changes the behavior of sbws (adds/removes headers). Could we have a unittest which makes sure that the old code and the new code will output the exact same thing? Perhaps instead of commenting out the code above, you can move it to the tests directory and make a unittest that checks the output of the old with the new code. Would this make sense? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Makes sense but then need to keep this old code in the test. So i came out with other solution that it's something i wanted to do time ago: include real measurements and bandwidth files generated with the test network for the tests. So i run the integration tests to generate those files with master branch, then i created a test that generates the bandwidth file from the previous results and compare that the bandwidth file is the same. I'm not pushing these changes yet until knowing your opinion. If you don't like that solution will open other ticket to include them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Other way of ensuring that i didn't accidentally change functionality is the fact that i didn't have to change any test for this PR. It's true though that i don't think there're unit tests checking all the headers, but the previous idea would solve that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pushed the new test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this new test very much. I think it's a good step. |
||||||||||||||||
[setattr(self, k, v) for k, v in kwargs.items() if k in header_keys] | ||||||||||||||||
|
||||||||||||||||
# Then initialize the ones with defaults, if they weren't already. | ||||||||||||||||
# Ideally these defaults could be defined in the HEADER_ATTR constant. | ||||||||||||||||
self.version = kwargs.get('version', SPEC_VERSION) | ||||||||||||||||
self.software = kwargs.get('software', 'sbws') | ||||||||||||||||
self.software_version = kwargs.get('software_version', __version__) | ||||||||||||||||
self.file_created = kwargs.get('file_created', now_isodt_str()) | ||||||||||||||||
# latest_bandwidth should not be in kwargs, since it MUST be the | ||||||||||||||||
# same as timestamp | ||||||||||||||||
self.latest_bandwidth = unixts_to_isodt_str(timestamp) | ||||||||||||||||
[setattr(self, k, v) for k, v in kwargs.items() | ||||||||||||||||
if k in BANDWIDTH_HEADER_KEY_VALUES_INIT] | ||||||||||||||||
|
||||||||||||||||
def __str__(self): | ||||||||||||||||
if self.version.startswith('1.'): | ||||||||||||||||
|
@@ -337,9 +297,10 @@ def from_lines_v1(cls, lines): | |||||||||||||||
log.warn('Terminator is not in lines') | ||||||||||||||||
return None | ||||||||||||||||
ts = lines[0] | ||||||||||||||||
header_keys = [k for k, _ in HEADER_ATTR.values()] | ||||||||||||||||
kwargs = dict([l.split(KEYVALUE_SEP_V1) | ||||||||||||||||
for l in lines[:index_terminator] | ||||||||||||||||
if l.split(KEYVALUE_SEP_V1)[0] in ALL_KEYVALUES]) | ||||||||||||||||
if l.split(KEYVALUE_SEP_V1)[0] in header_keys]) | ||||||||||||||||
h = cls(ts, **kwargs) | ||||||||||||||||
# last line is new line | ||||||||||||||||
return h, lines[index_terminator + 1:-1] | ||||||||||||||||
|
@@ -1056,6 +1017,8 @@ def is_max_bw_diff_perc_reached(bw_lines, | |||||||||||||||
sum_bw = sum([l.bw for l in bw_lines | ||||||||||||||||
if getattr(l, 'consensus_bandwidth', None) | ||||||||||||||||
and getattr(l, 'unmeasured', 0) == 0]) * 1000 | ||||||||||||||||
# For tests without consensus bandwidth, avoid division by 0 | ||||||||||||||||
sum_bw = sum_bw or 1 | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is weird to see a test workaround in a code file. Please separate the tests and the code. |
||||||||||||||||
# Percentage difference | ||||||||||||||||
diff_perc = ( | ||||||||||||||||
abs(sum_consensus_bw - sum_bw) | ||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
[general] | ||
# Days into the past that measurements are considered valid | ||
data_period = 1 | ||
|
||
[paths] | ||
sbws_home = .sbws | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps we don't need to hide this data in .sbws? I'm not used to files in source code being hidden. Why are we doing it here? |
||
|
||
[scanner] | ||
country = ZZ | ||
|
||
[destinations] | ||
local = on | ||
|
||
[destinations.local] | ||
; url = https://localhost:28888/sbws.bin | ||
url = http://127.0.0.1:28888/sbws.bin | ||
verify = False | ||
country = ZZ | ||
|
||
[tor] | ||
extra_lines = | ||
DirAuthority auth1 orport=2002 no-v2 v3ident=D7DBC517EFD2BA1A5012CF1BD0BB38F17C8160BD 127.10.0.1:2003 AA45C13025C037F056E734169891878ED0880231 | ||
DirAuthority auth2 orport=2002 no-v2 v3ident=4EE103A081F400E6622F5461D51782B876BB5C24 127.10.0.2:2003 E7B3C9A0040D628DAC88B0251AE6334D28E8F531 | ||
DirAuthority auth3 orport=2002 no-v2 v3ident=8B85069C7FC0593801E6491A34100264FCE28980 127.10.0.3:2003 35E3B8BB71C81355649AEC5862ECB7ED7EFDBC5C | ||
TestingTorNetwork 1 | ||
NumCPUs 1 | ||
|
||
[logging] | ||
level = debug | ||
to_stdout_level = ${level} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't remove this file when stem releases 1.8.0.
We can only remove this file when everyone who uses sbws 1.2.0 or later, has access to stem 1.8.0 or later. We will need to check Debian package versions for stem and sbws.