From 6908f7be4c7e620ac1ee4db9bdbf4cc5100e9e5a Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Thu, 18 Nov 2021 00:22:10 +0100 Subject: travel: Improve trip location estimation by removing outliers Outlier value (> 1.43 Z-score) based on what was needed for a trip I took long ago. Will have to fiddle with the value I assume. --- src/travel.cc | 74 +++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/travel.cc b/src/travel.cc index f8adf50..57d226a 100644 --- a/src/travel.cc +++ b/src/travel.cc @@ -13,6 +13,8 @@ #include "weak_ptr.hh" #include +#include +#include #include #include #include @@ -454,32 +456,74 @@ private: call_when_loaded_.clear(); } + static Location get_mean_location(std::vector locations) { + if (locations.empty()) + return Location(); + + auto it = locations.begin(); + Location mean = *it; + for (++it; it != locations.end(); ++it) { + mean.lat += it->lat; + mean.lng += it->lng; + } + mean.lat /= locations.size(); + mean.lng /= locations.size(); + return mean; + } + + static Location get_center_location(std::vector locations) { + while (true) { + if (locations.empty()) + return Location(); + + Location mean = get_mean_location(locations); + + float variance = 0; + std::vector location_z; + for (auto const& loc : locations) { + float deviation_square = std::pow(loc.lat - mean.lat, 2.f) + + std::pow(loc.lng - mean.lng, 2.f); + variance += deviation_square; + location_z.push_back(std::sqrt(deviation_square)); + } + float standard_deviation = std::sqrt(variance / locations.size()); + + for (auto& z : location_z) + z /= standard_deviation; + + // Remove outliers (if any) + bool removed_any = false; + size_t i = 0; + while (i < locations.size()) { + if (std::abs(location_z[i]) > 1.43f) { + locations.erase(locations.begin() + i); + location_z.erase(location_z.begin() + i); + removed_any = true; + } else { + ++i; + } + } + + if (!removed_any) + return mean; + } + } + void cleanup_trip(TripImpl& trip_impl) { // Sort by date trip_impl.sort_media(); trip_impl.setup_days(); - // TODO: Remove outliers - // TODO: Use some weighted median instead of average - Location loc; - size_t count = 0; + std::vector locations; for (size_t i = 0; i < trip_impl.media_count(); ++i) { auto& media = trip_impl.media(i); if (media.location().empty()) continue; - if (loc.empty()) { - loc = media.location(); - count = 1; - } else { - loc.lat += media.location().lat; - loc.lng += media.location().lng; - ++count; - } + locations.push_back(media.location()); } - loc.lat /= count; - loc.lng /= count; - trip_impl.set_location(loc); + + trip_impl.set_location(get_center_location(std::move(locations))); } static std::optional get_image_info(std::string id, -- cgit v1.2.3-70-g09d2