Skip to content

Commit

Permalink
Don't expire iOS devices prematurely (#25436)
Browse files Browse the repository at this point in the history
#25406 

The `last_seen_times` table is only updates when osquery hits one of its
authenticated endpoints, meaning it isn't updated when devices without
osquery, like iphones, are enrolled. I've left a
[comment](#25406 (comment))
on the original issue explaining how this happens. Originally, if there
was no `last_seen_time`, the fallback value would be the `created_at`
value on the `hosts` table, so ios devices would always get deleted once
they were added X number of days ago.

In its place, I've added the `detail_updated_at` column on the `hosts`
table as the fallback value, and only use `created_at` if that is also
empty. `detail_updated_at` is updated every time a full detail refetch
completes. In the case of ios/ipados, [this is done using
MDM](https://github.com/fleetdm/fleet/blob/cd5c0e8aed10664458f597b5d9600dd20bf3fdac/server/service/apple_mdm.go#L3101).
`detail_updated_at` is updated less frequently than `last_seen_times`,
only once every hour or so instead of every 30 seconds, but since
expiration policies are set on the scale of days instead of hours, this
should be fine.

The way I've QA'd this is by adding an iOS device to my fleet instance,
waited 24 hours, and set the expiration policy to 24 hours.
  • Loading branch information
dantecatalfamo authored Jan 16, 2025
1 parent d3ea62a commit 3a2a689
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 2 deletions.
1 change: 1 addition & 0 deletions changes/25406-premature-ios-deletion
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- Fixed bug where iOS devices were being removed prematurely by expiration policy
8 changes: 6 additions & 2 deletions server/datastore/mysql/hosts.go
Original file line number Diff line number Diff line change
Expand Up @@ -3119,11 +3119,15 @@ func (ds *Datastore) CleanupExpiredHosts(ctx context.Context) ([]uint, error) {
// DELETE FROM hosts WHERE id in (SELECT host_id FROM host_seen_times WHERE seen_time < DATE_SUB(NOW(), INTERVAL ? DAY))
// This means a full table scan for hosts, and for big deployments, that's not ideal
// so instead, we get the ids one by one and delete things one by one
// it might take longer, but it should lock only the row we need
// it might take longer, but it should lock only the row we need.
//
// host_seen_time entries are not available for ios/ipados devices, since they're updated on
// osquery check-in. Instead we fall back to detail_updated_at, which is updated every time a
// full detail refetch happens.
findHostsSql := `SELECT h.id FROM hosts h
LEFT JOIN host_seen_times hst
ON h.id = hst.host_id
WHERE COALESCE(hst.seen_time, h.created_at) < DATE_SUB(NOW(), INTERVAL ? DAY)`
WHERE COALESCE(hst.seen_time, h.detail_updated_at, h.created_at) < DATE_SUB(NOW(), INTERVAL ? DAY)`

var allIdsToDelete []uint
// Process hosts using global expiry
Expand Down
87 changes: 87 additions & 0 deletions server/datastore/mysql/hosts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ func TestHosts(t *testing.T) {
{"HostsListByDiskEncryptionStatus", testHostsListMacOSSettingsDiskEncryptionStatus},
{"HostsListFailingPolicies", printReadsInTest(testHostsListFailingPolicies)},
{"HostsExpiration", testHostsExpiration},
{"IOSHostExpiration", testIOSHostsExpiration},
{"TeamHostsExpiration", testTeamHostsExpiration},
{"HostsIncludesScheduledQueriesInPackStats", testHostsIncludesScheduledQueriesInPackStats},
{"HostsAllPackStats", testHostsAllPackStats},
Expand Down Expand Up @@ -4148,6 +4149,92 @@ func testHostsExpiration(t *testing.T, ds *Datastore) {
require.Len(t, hosts, 5)
}

func testIOSHostsExpiration(t *testing.T, ds *Datastore) {
// iOS/iPadOS devices don't have host_seen_times, meaning they
// would previously rely on created_at records for removal,
// and get deleted once the host's age was beyong the expiry
// window. We now check detail_updated_at, something that gets
// updated every time details are refetched, if seen time is
// not present.
hostExpiryWindow := 70

ac, err := ds.AppConfig(context.Background())
require.NoError(t, err)

ac.HostExpirySettings.HostExpiryEnabled = false
ac.HostExpirySettings.HostExpiryWindow = hostExpiryWindow

err = ds.SaveAppConfig(context.Background(), ac)
require.NoError(t, err)

for i := 0; i < 10; i++ {
platform := "ios"
if i%2 == 0 {
platform = "ipados"
}
detailsUpdated := time.Now()
if i >= 5 {
detailsUpdated = detailsUpdated.Add(time.Duration(-1*(hostExpiryWindow+1)*24) * time.Hour)
}

_, err := ds.NewHost(context.Background(), &fleet.Host{
DetailUpdatedAt: detailsUpdated,
LabelUpdatedAt: time.Now(),
PolicyUpdatedAt: time.Now(),
UUID: fmt.Sprintf("%d", i),
Hostname: fmt.Sprintf("foo.local%d", i),
Platform: platform,
})
require.NoError(t, err)
}

ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error {
// There are no host_seen_times for ios/ipados devices
if _, err := q.ExecContext(context.Background(), "DELETE FROM host_seen_times"); err != nil {
return err
}
// Make sure created_at is old enough to always get
// removed, we want to make sure that
// detail_updated_at is the column being checked
if _, err := q.ExecContext(context.Background(), "UPDATE hosts SET created_at = '2020-01-01 00:00:01'"); err != nil {
return err
}
return nil
})

filter := fleet.TeamFilter{User: test.UserAdmin}

hosts := listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 10)
require.Len(t, hosts, 10)

_, err = ds.CleanupExpiredHosts(context.Background())
require.NoError(t, err)

// host expiration is still disabled
hosts = listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 10)
require.Len(t, hosts, 10)

// once enabled, it works
ac.HostExpirySettings.HostExpiryEnabled = true
err = ds.SaveAppConfig(context.Background(), ac)
require.NoError(t, err)

deleted, err := ds.CleanupExpiredHosts(context.Background())
require.NoError(t, err)
require.Len(t, deleted, 5)

hosts = listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 5)
require.Len(t, hosts, 5)

// And it doesn't remove more than it should
deleted, err = ds.CleanupExpiredHosts(context.Background())
require.NoError(t, err)
require.Len(t, deleted, 0)

hosts = listHostsCheckCount(t, ds, filter, fleet.HostListOptions{}, 5)
require.Len(t, hosts, 5)
}

func testTeamHostsExpiration(t *testing.T, ds *Datastore) {
// Set global host expiry windows
const hostExpiryWindow = 70
Expand Down

0 comments on commit 3a2a689

Please sign in to comment.