Class: Prophet::Forecaster
- Inherits:
-
Object
- Object
- Prophet::Forecaster
- Defined in:
- lib/prophet/forecaster.rb
Instance Attribute Summary collapse
-
#changepoint_prior_scale ⇒ Object
readonly
Returns the value of attribute changepoint_prior_scale.
-
#changepoint_range ⇒ Object
readonly
Returns the value of attribute changepoint_range.
-
#changepoints ⇒ Object
readonly
Returns the value of attribute changepoints.
-
#country_holidays ⇒ Object
Returns the value of attribute country_holidays.
-
#extra_regressors ⇒ Object
Returns the value of attribute extra_regressors.
-
#fit_kwargs ⇒ Object
readonly
Returns the value of attribute fit_kwargs.
-
#growth ⇒ Object
readonly
Returns the value of attribute growth.
-
#history ⇒ Object
readonly
Returns the value of attribute history.
-
#holidays ⇒ Object
readonly
Returns the value of attribute holidays.
-
#holidays_prior_scale ⇒ Object
readonly
Returns the value of attribute holidays_prior_scale.
-
#interval_width ⇒ Object
readonly
Returns the value of attribute interval_width.
-
#logger ⇒ Object
readonly
Returns the value of attribute logger.
-
#mcmc_samples ⇒ Object
readonly
Returns the value of attribute mcmc_samples.
-
#n_changepoints ⇒ Object
readonly
Returns the value of attribute n_changepoints.
-
#params ⇒ Object
readonly
Returns the value of attribute params.
-
#seasonalities ⇒ Object
Returns the value of attribute seasonalities.
-
#seasonality_mode ⇒ Object
readonly
Returns the value of attribute seasonality_mode.
-
#seasonality_prior_scale ⇒ Object
readonly
Returns the value of attribute seasonality_prior_scale.
-
#specified_changepoints ⇒ Object
readonly
Returns the value of attribute specified_changepoints.
-
#train_holiday_names ⇒ Object
readonly
Returns the value of attribute train_holiday_names.
-
#uncertainty_samples ⇒ Object
readonly
Returns the value of attribute uncertainty_samples.
Instance Method Summary collapse
- #add_country_holidays(country_name) ⇒ Object
- #add_group_component(components, name, group) ⇒ Object
- #add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil) ⇒ Object
- #add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil) ⇒ Object
- #construct_holiday_dataframe(dates) ⇒ Object
- #fit(df, **kwargs) ⇒ Object
- #flat_growth_init(df) ⇒ Object
- #flat_trend(t, m) ⇒ Object
- #fourier_series(dates, period, series_order) ⇒ Object
-
#initialize(growth: "linear", changepoints: nil, n_changepoints: 25, changepoint_range: 0.8, yearly_seasonality: "auto", weekly_seasonality: "auto", daily_seasonality: "auto", holidays: nil, seasonality_mode: "additive", seasonality_prior_scale: 10.0, holidays_prior_scale: 10.0, changepoint_prior_scale: 0.05, mcmc_samples: 0, interval_width: 0.80, uncertainty_samples: 1000) ⇒ Forecaster
constructor
A new instance of Forecaster.
- #initialize_scales(initialize_scales, df) ⇒ Object
- #linear_growth_init(df) ⇒ Object
- #logistic_growth_init(df) ⇒ Object
- #make_all_seasonality_features(df) ⇒ Object
- #make_future_dataframe(periods:, freq: "D", include_history: true) ⇒ Object
- #make_holiday_features(dates, holidays) ⇒ Object
- #make_seasonality_features(dates, period, series_order, prefix) ⇒ Object
- #parse_seasonality_args(name, arg, auto_disable, default_order) ⇒ Object
- #piecewise_linear(t, deltas, k, m, changepoint_ts) ⇒ Object
- #piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) ⇒ Object
- #predict(df = nil) ⇒ Object
- #predict_seasonal_components(df) ⇒ Object
- #predict_trend(df) ⇒ Object
- #predict_uncertainty(df) ⇒ Object
- #predictive_samples(df) ⇒ Object
- #regressor_column_matrix(seasonal_features, modes) ⇒ Object
- #sample_model(df, seasonal_features, iteration, s_a, s_m) ⇒ Object
- #sample_posterior_predictive(df) ⇒ Object
- #sample_predictive_trend(df, iteration) ⇒ Object
- #set_auto_seasonalities ⇒ Object
- #set_changepoints ⇒ Object
- #setup_dataframe(df, initialize_scales: false) ⇒ Object
- #to_json ⇒ Object
- #validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true) ⇒ Object
- #validate_inputs ⇒ Object
Methods included from Plot
#add_changepoints_to_plot, #plot, #plot_components, plot_cross_validation_metric, plt
Methods included from Holidays
#get_holiday_names, #holidays_df, #make_holidays_df
Constructor Details
#initialize(growth: "linear", changepoints: nil, n_changepoints: 25, changepoint_range: 0.8, yearly_seasonality: "auto", weekly_seasonality: "auto", daily_seasonality: "auto", holidays: nil, seasonality_mode: "additive", seasonality_prior_scale: 10.0, holidays_prior_scale: 10.0, changepoint_prior_scale: 0.05, mcmc_samples: 0, interval_width: 0.80, uncertainty_samples: 1000) ⇒ Forecaster
Returns a new instance of Forecaster.
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/prophet/forecaster.rb', line 15 def initialize( growth: "linear", changepoints: nil, n_changepoints: 25, changepoint_range: 0.8, yearly_seasonality: "auto", weekly_seasonality: "auto", daily_seasonality: "auto", holidays: nil, seasonality_mode: "additive", seasonality_prior_scale: 10.0, holidays_prior_scale: 10.0, changepoint_prior_scale: 0.05, mcmc_samples: 0, interval_width: 0.80, uncertainty_samples: 1000 ) @growth = growth @changepoints = to_datetime(changepoints) if !@changepoints.nil? @n_changepoints = @changepoints.size @specified_changepoints = true else @n_changepoints = n_changepoints @specified_changepoints = false end @changepoint_range = changepoint_range @yearly_seasonality = yearly_seasonality @weekly_seasonality = weekly_seasonality @daily_seasonality = daily_seasonality @holidays = holidays @seasonality_mode = seasonality_mode @seasonality_prior_scale = seasonality_prior_scale.to_f @changepoint_prior_scale = changepoint_prior_scale.to_f @holidays_prior_scale = holidays_prior_scale.to_f @mcmc_samples = mcmc_samples @interval_width = interval_width @uncertainty_samples = uncertainty_samples # Set during fitting or by other methods @start = nil @y_scale = nil @logistic_floor = false @t_scale = nil @changepoints_t = nil @seasonalities = {} @extra_regressors = {} @country_holidays = nil @stan_fit = nil @params = {} @history = nil @history_dates = nil @train_component_cols = nil @component_modes = nil @train_holiday_names = nil @fit_kwargs = {} validate_inputs @logger = ::Logger.new($stderr) @logger.formatter = proc do |severity, datetime, progname, msg| "[prophet] #{msg}\n" end @stan_backend = StanBackend.new(@logger) end |
Instance Attribute Details
#changepoint_prior_scale ⇒ Object (readonly)
Returns the value of attribute changepoint_prior_scale.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def changepoint_prior_scale @changepoint_prior_scale end |
#changepoint_range ⇒ Object (readonly)
Returns the value of attribute changepoint_range.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def changepoint_range @changepoint_range end |
#changepoints ⇒ Object (readonly)
Returns the value of attribute changepoints.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def changepoints @changepoints end |
#country_holidays ⇒ Object
Returns the value of attribute country_holidays.
13 14 15 |
# File 'lib/prophet/forecaster.rb', line 13 def country_holidays @country_holidays end |
#extra_regressors ⇒ Object
Returns the value of attribute extra_regressors.
13 14 15 |
# File 'lib/prophet/forecaster.rb', line 13 def extra_regressors @extra_regressors end |
#fit_kwargs ⇒ Object (readonly)
Returns the value of attribute fit_kwargs.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def fit_kwargs @fit_kwargs end |
#growth ⇒ Object (readonly)
Returns the value of attribute growth.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def growth @growth end |
#history ⇒ Object (readonly)
Returns the value of attribute history.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def history @history end |
#holidays ⇒ Object (readonly)
Returns the value of attribute holidays.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def holidays @holidays end |
#holidays_prior_scale ⇒ Object (readonly)
Returns the value of attribute holidays_prior_scale.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def holidays_prior_scale @holidays_prior_scale end |
#interval_width ⇒ Object (readonly)
Returns the value of attribute interval_width.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def interval_width @interval_width end |
#logger ⇒ Object (readonly)
Returns the value of attribute logger.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def logger @logger end |
#mcmc_samples ⇒ Object (readonly)
Returns the value of attribute mcmc_samples.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def mcmc_samples @mcmc_samples end |
#n_changepoints ⇒ Object (readonly)
Returns the value of attribute n_changepoints.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def n_changepoints @n_changepoints end |
#params ⇒ Object (readonly)
Returns the value of attribute params.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def params @params end |
#seasonalities ⇒ Object
Returns the value of attribute seasonalities.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def seasonalities @seasonalities end |
#seasonality_mode ⇒ Object (readonly)
Returns the value of attribute seasonality_mode.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def seasonality_mode @seasonality_mode end |
#seasonality_prior_scale ⇒ Object (readonly)
Returns the value of attribute seasonality_prior_scale.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def seasonality_prior_scale @seasonality_prior_scale end |
#specified_changepoints ⇒ Object (readonly)
Returns the value of attribute specified_changepoints.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def specified_changepoints @specified_changepoints end |
#train_holiday_names ⇒ Object (readonly)
Returns the value of attribute train_holiday_names.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def train_holiday_names @train_holiday_names end |
#uncertainty_samples ⇒ Object (readonly)
Returns the value of attribute uncertainty_samples.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def uncertainty_samples @uncertainty_samples end |
Instance Method Details
#add_country_holidays(country_name) ⇒ Object
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 |
# File 'lib/prophet/forecaster.rb', line 401 def add_country_holidays(country_name) raise Error, "Country holidays must be added prior to model fitting." if @history # Fix for previously documented keyword argument if country_name.is_a?(Hash) && country_name[:country_name] country_name = country_name[:country_name] end # Validate names. get_holiday_names(country_name).each do |name| # Allow merging with existing holidays validate_column_name(name, check_holidays: false) end # Set the holidays. if @country_holidays logger.warn "Changing country holidays from #{@country_holidays.inspect} to #{country_name.inspect}." end @country_holidays = country_name self end |
#add_group_component(components, name, group) ⇒ Object
509 510 511 512 513 514 515 516 517 |
# File 'lib/prophet/forecaster.rb', line 509 def add_group_component(components, name, group) new_comp = components[components["component"].in?(group)].dup group_cols = new_comp["col"].uniq if group_cols.size > 0 new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name}) components = components.concat(new_comp) end components end |
#add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil) ⇒ Object
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
# File 'lib/prophet/forecaster.rb', line 353 def add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil) raise Error, "Regressors must be added prior to model fitting." if @history validate_column_name(name, check_regressors: false) prior_scale ||= @holidays_prior_scale.to_f mode ||= @seasonality_mode raise ArgumentError, "Prior scale must be > 0" if prior_scale <= 0 if !["additive", "multiplicative"].include?(mode) raise ArgumentError, "mode must be \"additive\" or \"multiplicative\"" end @extra_regressors[name] = { prior_scale: prior_scale, standardize: standardize, mu: 0.0, std: 1.0, mode: mode } self end |
#add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil) ⇒ Object
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 |
# File 'lib/prophet/forecaster.rb', line 372 def add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil) raise Error, "Seasonality must be added prior to model fitting." if @history if !["daily", "weekly", "yearly"].include?(name) # Allow overwriting built-in seasonalities validate_column_name(name, check_seasonalities: false) end if prior_scale.nil? ps = @seasonality_prior_scale else ps = prior_scale.to_f end raise ArgumentError, "Prior scale must be > 0" if ps <= 0 raise ArgumentError, "Fourier Order must be > 0" if fourier_order <= 0 mode ||= @seasonality_mode if !["additive", "multiplicative"].include?(mode) raise ArgumentError, "mode must be \"additive\" or \"multiplicative\"" end validate_column_name(condition_name) if condition_name @seasonalities[name] = { period: period, fourier_order: fourier_order, prior_scale: ps, mode: mode, condition_name: condition_name } self end |
#construct_holiday_dataframe(dates) ⇒ Object
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
# File 'lib/prophet/forecaster.rb', line 280 def construct_holiday_dataframe(dates) all_holidays = Rover::DataFrame.new if @holidays all_holidays = @holidays.dup end if @country_holidays year_list = dates.map(&:year) country_holidays_df = make_holidays_df(year_list, @country_holidays) all_holidays = all_holidays.concat(country_holidays_df) end # Drop future holidays not previously seen in training data if @train_holiday_names # Remove holiday names didn't show up in fit all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)] # Add holiday names in fit but not in predict with ds as NA holidays_to_add = Rover::DataFrame.new({ "holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])] }) all_holidays = all_holidays.concat(holidays_to_add) end all_holidays end |
#fit(df, **kwargs) ⇒ Object
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 |
# File 'lib/prophet/forecaster.rb', line 631 def fit(df, **kwargs) raise Error, "Prophet object can only be fit once" if @history if defined?(Daru::DataFrame) && df.is_a?(Daru::DataFrame) df = Rover::DataFrame.new(df.to_h) end raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame) unless df.include?("ds") && df.include?("y") raise ArgumentError, "Data frame must have ds and y columns" end history = df[!df["y"].missing] raise Error, "Data has less than 2 non-nil rows" if history.size < 2 @history_dates = to_datetime(df["ds"]).sort history = setup_dataframe(history, initialize_scales: true) @history = history set_auto_seasonalities seasonal_features, prior_scales, component_cols, modes = make_all_seasonality_features(history) @train_component_cols = component_cols @component_modes = modes @fit_kwargs = kwargs.dup # TODO deep dup? set_changepoints trend_indicator = {"linear" => 0, "logistic" => 1, "flat" => 2} dat = { "T" => history.shape[0], "K" => seasonal_features.shape[1], "S" => @changepoints_t.size, "y" => history["y_scaled"], "t" => history["t"], "t_change" => @changepoints_t, "X" => seasonal_features, "sigmas" => prior_scales, "tau" => @changepoint_prior_scale, "trend_indicator" => trend_indicator[@growth], "s_a" => component_cols["additive_terms"], "s_m" => component_cols["multiplicative_terms"] } if @growth == "linear" dat["cap"] = Numo::DFloat.zeros(@history.shape[0]) kinit = linear_growth_init(history) elsif @growth == "flat" dat["cap"] = Numo::DFloat.zeros(@history.shape[0]) kinit = flat_growth_init(history) else dat["cap"] = history["cap_scaled"] kinit = logistic_growth_init(history) end stan_init = { "k" => kinit[0], "m" => kinit[1], "delta" => Numo::DFloat.zeros(@changepoints_t.size), "beta" => Numo::DFloat.zeros(seasonal_features.shape[1]), "sigma_obs" => 1 } if history["y"].min == history["y"].max && (@growth == "linear" || @growth == "flat") # Nothing to fit. @params = stan_init @params["sigma_obs"] = 1e-9 @params.each do |par, _| @params[par] = Numo::NArray.asarray([@params[par]]) end elsif @mcmc_samples > 0 @params = @stan_backend.sampling(stan_init, dat, @mcmc_samples, **kwargs) else @params = @stan_backend.fit(stan_init, dat, **kwargs) end # If no changepoints were requested, replace delta with 0s if @changepoints.size == 0 # Fold delta into the base rate k # Numo doesn't support -1 with reshape negative_one = @params["delta"].shape.inject(&:*) @params["k"] = @params["k"] + @params["delta"].reshape(negative_one) @params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(negative_one, 1) end self end |
#flat_growth_init(df) ⇒ Object
625 626 627 628 629 |
# File 'lib/prophet/forecaster.rb', line 625 def flat_growth_init(df) k = 0 m = df["y_scaled"].mean [k, m] end |
#flat_trend(t, m) ⇒ Object
779 780 781 782 |
# File 'lib/prophet/forecaster.rb', line 779 def flat_trend(t, m) m_t = m * t.new_ones m_t end |
#fourier_series(dates, period, series_order) ⇒ Object
264 265 266 267 268 269 270 271 272 273 |
# File 'lib/prophet/forecaster.rb', line 264 def fourier_series(dates, period, series_order) t = dates.map(&:to_i).to_numo / (3600 * 24.0) # no need for column_stack series_order.times.flat_map do |i| [Numo::DFloat::Math.method(:sin), Numo::DFloat::Math.method(:cos)].map do |fun| fun.call(2.0 * (i + 1) * Math::PI * t / period) end end end |
#initialize_scales(initialize_scales, df) ⇒ Object
216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
# File 'lib/prophet/forecaster.rb', line 216 def initialize_scales(initialize_scales, df) return unless initialize_scales if @growth == "logistic" && df.include?("floor") @logistic_floor = true floor = df["floor"] else floor = 0.0 end @y_scale = (df["y"] - floor).abs.max @y_scale = 1 if @y_scale == 0 @start = df["ds"].min @t_scale = df["ds"].max - @start end |
#linear_growth_init(df) ⇒ Object
588 589 590 591 592 593 594 595 |
# File 'lib/prophet/forecaster.rb', line 588 def linear_growth_init(df) i0 = 0 i1 = df.size - 1 t = df["t"][i1] - df["t"][i0] k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t m = df["y_scaled"][i0] - k * df["t"][i0] [k, m] end |
#logistic_growth_init(df) ⇒ Object
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 |
# File 'lib/prophet/forecaster.rb', line 597 def logistic_growth_init(df) i0 = 0 i1 = df.size - 1 t = df["t"][i1] - df["t"][i0] # Force valid values, in case y > cap or y < 0 c0 = df["cap_scaled"][i0] c1 = df["cap_scaled"][i1] y0 = [0.01 * c0, [0.99 * c0, df["y_scaled"][i0]].min].max y1 = [0.01 * c1, [0.99 * c1, df["y_scaled"][i1]].min].max r0 = c0 / y0 r1 = c1 / y1 if (r0 - r1).abs <= 0.01 r0 = 1.05 * r0 end l0 = Math.log(r0 - 1) l1 = Math.log(r1 - 1) # Initialize the offset m = l0 * t / (l0 - l1) # And the rate k = (l0 - l1) / t [k, m] end |
#make_all_seasonality_features(df) ⇒ Object
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 |
# File 'lib/prophet/forecaster.rb', line 422 def make_all_seasonality_features(df) seasonal_features = [] prior_scales = [] modes = {"additive" => [], "multiplicative" => []} # Seasonality features @seasonalities.each do |name, props| features = make_seasonality_features( df["ds"], props[:period], props[:fourier_order], name ) if props[:condition_name] features[!df.where(props[:condition_name])] = 0 end seasonal_features << features prior_scales.concat([props[:prior_scale]] * features.shape[1]) modes[props[:mode]] << name end # Holiday features holidays = construct_holiday_dataframe(df["ds"]) if holidays.size > 0 features, holiday_priors, holiday_names = make_holiday_features(df["ds"], holidays) seasonal_features << features prior_scales.concat(holiday_priors) modes[@seasonality_mode].concat(holiday_names) end # Additional regressors @extra_regressors.each do |name, props| seasonal_features << Rover::DataFrame.new({name => df[name]}) prior_scales << props[:prior_scale] modes[props[:mode]] << name end # Dummy to prevent empty X if seasonal_features.size == 0 seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]}) prior_scales << 1.0 end seasonal_features = df_concat_axis_one(seasonal_features) component_cols, modes = regressor_column_matrix(seasonal_features, modes) [seasonal_features, prior_scales, component_cols, modes] end |
#make_future_dataframe(periods:, freq: "D", include_history: true) ⇒ Object
942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 |
# File 'lib/prophet/forecaster.rb', line 942 def make_future_dataframe(periods:, freq: "D", include_history: true) raise Error, "Model has not been fit" unless @history_dates last_date = @history_dates.max # TODO add more freq # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases case freq when /\A\d+S\z/ secs = freq.to_i dates = (periods + 1).times.map { |i| last_date + i * secs } when "H" hour = 3600 dates = (periods + 1).times.map { |i| last_date + i * hour } when "D" # days have constant length with UTC (no DST or leap seconds) day = 24 * 3600 dates = (periods + 1).times.map { |i| last_date + i * day } when "W" week = 7 * 24 * 3600 dates = (periods + 1).times.map { |i| last_date + i * week } when "MS" dates = [last_date] # TODO reset day from last date, but keep time periods.times do dates << dates.last.to_datetime.next_month.to_time.utc end when "QS" dates = [last_date] # TODO reset day and month from last date, but keep time periods.times do dates << dates.last.to_datetime.next_month.next_month.next_month.to_time.utc end when "YS" dates = [last_date] # TODO reset day and month from last date, but keep time periods.times do dates << dates.last.to_datetime.next_year.to_time.utc end else raise ArgumentError, "Unknown freq: #{freq}" end dates.select! { |d| d > last_date } dates = dates.last(periods) dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history Rover::DataFrame.new({"ds" => dates}) end |
#make_holiday_features(dates, holidays) ⇒ Object
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 |
# File 'lib/prophet/forecaster.rb', line 305 def make_holiday_features(dates, holidays) = Hash.new { |hash, key| hash[key] = Numo::DFloat.zeros(dates.size) } prior_scales = {} # Makes an index so we can perform `get_loc` below. # Strip to just dates. row_index = dates.map(&:to_date) holidays.each_row do |row| dt = row["ds"] lw = nil uw = nil begin lw = row["lower_window"].to_i uw = row["upper_window"].to_i rescue IndexError lw = 0 uw = 0 end ps = @holidays_prior_scale if prior_scales[row["holiday"]] && prior_scales[row["holiday"]] != ps raise ArgumentError, "Holiday #{row["holiday"].inspect} does not have consistent prior scale specification." end raise ArgumentError, "Prior scale must be > 0" if ps <= 0 prior_scales[row["holiday"]] = ps lw.upto(uw).each do |offset| occurrence = dt ? dt + offset : nil loc = occurrence ? row_index.to_a.index(occurrence) : nil key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}" if loc [key][loc] = 1.0 else [key] # Access key to generate value end end end holiday_features = Rover::DataFrame.new() # Make sure column order is consistent holiday_features = holiday_features[holiday_features.vector_names.sort] prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] } holiday_names = prior_scales.keys # Store holiday names used in fit if @train_holiday_names.nil? @train_holiday_names = Rover::Vector.new(holiday_names) end [holiday_features, prior_scale_list, holiday_names] end |
#make_seasonality_features(dates, period, series_order, prefix) ⇒ Object
275 276 277 278 |
# File 'lib/prophet/forecaster.rb', line 275 def make_seasonality_features(dates, period, series_order, prefix) features = fourier_series(dates, period, series_order) Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h) end |
#parse_seasonality_args(name, arg, auto_disable, default_order) ⇒ Object
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 |
# File 'lib/prophet/forecaster.rb', line 519 def parse_seasonality_args(name, arg, auto_disable, default_order) case arg when "auto" fourier_order = 0 if @seasonalities.include?(name) logger.info "Found custom seasonality named #{name.inspect}, disabling built-in #{name.inspect}seasonality." elsif auto_disable logger.info "Disabling #{name} seasonality. Run prophet with #{name}_seasonality: true to override this." else fourier_order = default_order end when true fourier_order = default_order when false fourier_order = 0 else fourier_order = arg.to_i end fourier_order end |
#piecewise_linear(t, deltas, k, m, changepoint_ts) ⇒ Object
746 747 748 749 750 751 752 753 754 755 756 757 758 |
# File 'lib/prophet/forecaster.rb', line 746 def piecewise_linear(t, deltas, k, m, changepoint_ts) # Intercept changes gammas = -changepoint_ts * deltas # Get cumulative slope and intercept at each t k_t = t.new_ones * k m_t = t.new_ones * m changepoint_ts.each_with_index do |t_s, s| indx = t >= t_s k_t[indx] += deltas[s] m_t[indx] += gammas[s] end k_t * t + m_t end |
#piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) ⇒ Object
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 |
# File 'lib/prophet/forecaster.rb', line 760 def piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) k_1d = Numo::NArray.asarray(k) k_1d = k_1d.reshape(1) if k_1d.ndim < 1 k_cum = k_1d.concatenate(deltas.cumsum + k) gammas = Numo::DFloat.zeros(changepoint_ts.size) changepoint_ts.each_with_index do |t_s, i| gammas[i] = (t_s - m - gammas.sum) * (1 - k_cum[i] / k_cum[i + 1]) end # Get cumulative rate and offset at each t k_t = t.new_ones * k m_t = t.new_ones * m changepoint_ts.each_with_index do |t_s, s| indx = t >= t_s k_t[indx] += deltas[s] m_t[indx] += gammas[s] end cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t))) end |
#predict(df = nil) ⇒ Object
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 |
# File 'lib/prophet/forecaster.rb', line 718 def predict(df = nil) raise Error, "Model has not been fit." unless @history if df.nil? df = @history.dup else raise ArgumentError, "Dataframe has no rows." if df.shape[0] == 0 df = setup_dataframe(df.dup) end df["trend"] = predict_trend(df) seasonal_components = predict_seasonal_components(df) if @uncertainty_samples intervals = predict_uncertainty(df) else intervals = nil end # Drop columns except ds, cap, floor, and trend cols = ["ds", "trend"] cols << "cap" if df.include?("cap") cols << "floor" if @logistic_floor # Add in forecast components df2 = df_concat_axis_one([df[cols], intervals, seasonal_components]) df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"] df2 end |
#predict_seasonal_components(df) ⇒ Object
802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 |
# File 'lib/prophet/forecaster.rb', line 802 def predict_seasonal_components(df) seasonal_features, _, component_cols, _ = make_all_seasonality_features(df) if @uncertainty_samples lower_p = 100 * (1.0 - @interval_width) / 2 upper_p = 100 * (1.0 + @interval_width) / 2 end x = seasonal_features.to_numo data = {} component_cols.vector_names.each do |component| beta_c = @params["beta"] * component_cols[component].to_numo comp = x.dot(beta_c.transpose) if @component_modes["additive"].include?(component) comp *= @y_scale end data[component] = comp.mean(axis: 1, nan: true) if @uncertainty_samples data["#{component}_lower"] = comp.percentile(lower_p, axis: 1) data["#{component}_upper"] = comp.percentile(upper_p, axis: 1) end end Rover::DataFrame.new(data) end |
#predict_trend(df) ⇒ Object
784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 |
# File 'lib/prophet/forecaster.rb', line 784 def predict_trend(df) k = @params["k"].mean(nan: true) m = @params["m"].mean(nan: true) deltas = @params["delta"].mean(axis: 0, nan: true) t = Numo::NArray.asarray(df["t"].to_a) if @growth == "linear" trend = piecewise_linear(t, deltas, k, m, @changepoints_t) elsif @growth == "logistic" cap = df["cap_scaled"] trend = piecewise_logistic(t, cap, deltas, k, m, @changepoints_t) elsif @growth == "flat" trend = flat_trend(t, m) end trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a) end |
#predict_uncertainty(df) ⇒ Object
866 867 868 869 870 871 872 873 874 875 876 877 878 879 |
# File 'lib/prophet/forecaster.rb', line 866 def predict_uncertainty(df) sim_values = sample_posterior_predictive(df) lower_p = 100 * (1.0 - @interval_width) / 2 upper_p = 100 * (1.0 + @interval_width) / 2 series = {} ["yhat", "trend"].each do |key| series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1) series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1) end Rover::DataFrame.new(series) end |
#predictive_samples(df) ⇒ Object
860 861 862 863 864 |
# File 'lib/prophet/forecaster.rb', line 860 def predictive_samples(df) df = setup_dataframe(df.dup) sim_values = sample_posterior_predictive(df) sim_values end |
#regressor_column_matrix(seasonal_features, modes) ⇒ Object
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 |
# File 'lib/prophet/forecaster.rb', line 472 def regressor_column_matrix(seasonal_features, modes) components = Rover::DataFrame.new( "col" => seasonal_features.shape[1].times.to_a, "component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] } ) # Add total for holidays if @train_holiday_names components = add_group_component(components, "holidays", @train_holiday_names.uniq) end # Add totals additive and multiplicative components, and regressors ["additive", "multiplicative"].each do |mode| components = add_group_component(components, "#{mode}_terms", modes[mode]) regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode } .map { |r, props| r } components = add_group_component(components, "extra_regressors_#{mode}", regressors_by_mode) # Add combination components to modes modes[mode] << "#{mode}_terms" modes[mode] << "extra_regressors_#{mode}" end # After all of the additive/multiplicative groups have been added, modes[@seasonality_mode] << "holidays" # Convert to a binary matrix component_cols = components["col"].crosstab(components["component"]) component_cols["col"] = component_cols.delete("_") # Add columns for additive and multiplicative terms, if missing ["additive_terms", "multiplicative_terms"].each do |name| component_cols[name] = 0 unless component_cols.include?(name) end # TODO validation [component_cols, modes] end |
#sample_model(df, seasonal_features, iteration, s_a, s_m) ⇒ Object
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 |
# File 'lib/prophet/forecaster.rb', line 881 def sample_model(df, seasonal_features, iteration, s_a, s_m) trend = sample_predictive_trend(df, iteration) beta = @params["beta"][iteration, true] xb_a = seasonal_features.dot(beta * s_a) * @y_scale xb_m = seasonal_features.dot(beta * s_m) sigma = @params["sigma_obs"][iteration] noise = Numo::DFloat.new(*df.shape[0]).rand_norm(0, sigma) * @y_scale # skip data frame for performance { "yhat" => trend * (1 + xb_m) + xb_a + noise, "trend" => trend } end |
#sample_posterior_predictive(df) ⇒ Object
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 |
# File 'lib/prophet/forecaster.rb', line 827 def sample_posterior_predictive(df) n_iterations = @params["k"].shape[0] samp_per_iter = [1, (@uncertainty_samples / n_iterations.to_f).ceil].max # Generate seasonality features once so we can re-use them. seasonal_features, _, component_cols, _ = make_all_seasonality_features(df) # convert to Numo for performance seasonal_features = seasonal_features.to_numo additive_terms = component_cols["additive_terms"].to_numo multiplicative_terms = component_cols["multiplicative_terms"].to_numo sim_values = {"yhat" => [], "trend" => []} n_iterations.times do |i| samp_per_iter.times do sim = sample_model( df, seasonal_features, i, additive_terms, multiplicative_terms ) sim_values.each_key do |key| sim_values[key] << sim[key] end end end sim_values.each do |k, v| sim_values[k] = Numo::NArray.column_stack(v) end sim_values end |
#sample_predictive_trend(df, iteration) ⇒ Object
898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 |
# File 'lib/prophet/forecaster.rb', line 898 def sample_predictive_trend(df, iteration) k = @params["k"][iteration] m = @params["m"][iteration] deltas = @params["delta"][iteration, true] t = Numo::NArray.asarray(df["t"].to_a) upper_t = t.max # New changepoints from a Poisson process with rate S on [1, T] if upper_t > 1 s = @changepoints_t.size n_changes = poisson(s * (upper_t - 1)) else n_changes = 0 end if n_changes > 0 changepoint_ts_new = 1 + Numo::DFloat.new(n_changes).rand * (upper_t - 1) changepoint_ts_new.sort else changepoint_ts_new = [] end # Get the empirical scale of the deltas, plus epsilon to avoid NaNs. lambda_ = deltas.abs.mean + 1e-8 # Sample deltas deltas_new = laplace(0, lambda_, n_changes) # Prepend the times and deltas from the history changepoint_ts = @changepoints_t.concatenate(changepoint_ts_new) deltas = deltas.concatenate(deltas_new) if @growth == "linear" trend = piecewise_linear(t, deltas, k, m, changepoint_ts) elsif @growth == "logistic" cap = df["cap_scaled"] trend = piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) elsif @growth == "flat" trend = flat_trend(t, m) end trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a) end |
#set_auto_seasonalities ⇒ Object
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 |
# File 'lib/prophet/forecaster.rb', line 540 def set_auto_seasonalities first = @history["ds"].min last = @history["ds"].max dt = @history["ds"].diff min_dt = dt.min days = 86400 # Yearly seasonality yearly_disable = last - first < 370 * days fourier_order = parse_seasonality_args("yearly", @yearly_seasonality, yearly_disable, 10) if fourier_order > 0 @seasonalities["yearly"] = { period: 365.25, fourier_order: fourier_order, prior_scale: @seasonality_prior_scale, mode: @seasonality_mode, condition_name: nil } end # Weekly seasonality weekly_disable = last - first < 14 * days || min_dt >= 7 * days fourier_order = parse_seasonality_args("weekly", @weekly_seasonality, weekly_disable, 3) if fourier_order > 0 @seasonalities["weekly"] = { period: 7, fourier_order: fourier_order, prior_scale: @seasonality_prior_scale, mode: @seasonality_mode, condition_name: nil } end # Daily seasonality daily_disable = last - first < 2 * days || min_dt >= 1 * days fourier_order = parse_seasonality_args("daily", @daily_seasonality, daily_disable, 4) if fourier_order > 0 @seasonalities["daily"] = { period: 1, fourier_order: fourier_order, prior_scale: @seasonality_prior_scale, mode: @seasonality_mode, condition_name: nil } end end |
#set_changepoints ⇒ Object
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 |
# File 'lib/prophet/forecaster.rb', line 231 def set_changepoints if @changepoints if @changepoints.size > 0 too_low = @changepoints.min < @history["ds"].min too_high = @changepoints.max > @history["ds"].max if too_low || too_high raise ArgumentError, "Changepoints must fall within training data." end end else hist_size = (@history.shape[0] * @changepoint_range).floor if @n_changepoints + 1 > hist_size @n_changepoints = hist_size - 1 logger.info "n_changepoints greater than number of observations. Using #{@n_changepoints}" end if @n_changepoints > 0 step = (hist_size - 1) / @n_changepoints.to_f cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round } @changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1) else @changepoints = [] end end if @changepoints.size > 0 @changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f else @changepoints_t = Numo::NArray.asarray([0]) end end |
#setup_dataframe(df, initialize_scales: false) ⇒ Object
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
# File 'lib/prophet/forecaster.rb', line 150 def setup_dataframe(df, initialize_scales: false) if df.include?("y") df["y"] = df["y"].map(&:to_f) raise ArgumentError, "Found infinity in column y." unless df["y"].all?(&:finite?) end # TODO support integers df["ds"] = to_datetime(df["ds"]) raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?) @extra_regressors.each_key do |name| if !df.include?(name) raise ArgumentError, "Regressor #{name.inspect} missing from dataframe" end df[name] = df[name].map(&:to_f) if df[name].any?(&:nil?) raise ArgumentError, "Found NaN in column #{name.inspect}" end end @seasonalities.each_value do |props| condition_name = props[:condition_name] if condition_name if !df.include?(condition_name) raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe" end if df.where(!df[condition_name].in([true, false])).any? raise ArgumentError, "Found non-boolean in column #{condition_name.inspect}" end end end df = df.sort_by { |r| r["ds"] } initialize_scales(initialize_scales, df) if @logistic_floor unless df.include?("floor") raise ArgumentError, "Expected column \"floor\"." end else df["floor"] = 0 end if @growth == "logistic" unless df.include?("cap") raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\"" end if df[df["cap"] <= df["floor"]].size > 0 raise ArgumentError, "cap must be greater than floor (which defaults to 0)." end df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f end df["t"] = (df["ds"] - @start) / @t_scale.to_f if df.include?("y") df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f end @extra_regressors.each do |name, props| df[name] = (df[name] - props[:mu]) / props[:std].to_f end df end |
#to_json ⇒ Object
988 989 990 991 992 |
# File 'lib/prophet/forecaster.rb', line 988 def to_json require "json" JSON.generate(as_json) end |
#validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true) ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/prophet/forecaster.rb', line 119 def validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true) if name.include?("_delim_") raise ArgumentError, "Name cannot contain \"_delim_\"" end reserved_names = [ "trend", "additive_terms", "daily", "weekly", "yearly", "holidays", "zeros", "extra_regressors_additive", "yhat", "extra_regressors_multiplicative", "multiplicative_terms", ] rn_l = reserved_names.map { |n| "#{n}_lower" } rn_u = reserved_names.map { |n| "#{n}_upper" } reserved_names.concat(rn_l) reserved_names.concat(rn_u) reserved_names.concat(["ds", "y", "cap", "floor", "y_scaled", "cap_scaled"]) if reserved_names.include?(name) raise ArgumentError, "Name #{name.inspect} is reserved." end if check_holidays && @holidays && @holidays["holiday"].uniq.include?(name) raise ArgumentError, "Name #{name.inspect} already used for a holiday." end if check_holidays && @country_holidays && get_holiday_names(@country_holidays).include?(name) raise ArgumentError, "Name #{name.inspect} is a holiday name in #{@country_holidays.inspect}." end if check_seasonalities && @seasonalities[name] raise ArgumentError, "Name #{name.inspect} already used for a seasonality." end if check_regressors && @extra_regressors[name] raise ArgumentError, "Name #{name.inspect} already used for an added regressor." end end |
#validate_inputs ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/prophet/forecaster.rb', line 84 def validate_inputs if !["linear", "logistic", "flat"].include?(@growth) raise ArgumentError, "Parameter \"growth\" should be \"linear\", \"logistic\", or \"flat\"." end if @changepoint_range < 0 || @changepoint_range > 1 raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]" end if @holidays if !(@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")) raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns." end @holidays["ds"] = to_datetime(@holidays["ds"]) has_lower = @holidays.include?("lower_window") has_upper = @holidays.include?("upper_window") if has_lower ^ has_upper # xor raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither" end if has_lower if @holidays["lower_window"].max > 0 raise ArgumentError, "Holiday lower_window should be <= 0" end if @holidays["upper_window"].min < 0 raise ArgumentError, "Holiday upper_window should be >= 0" end end @holidays["holiday"].uniq.each do |h| validate_column_name(h, check_holidays: false) end end if !["additive", "multiplicative"].include?(@seasonality_mode) raise ArgumentError, "seasonality_mode must be \"additive\" or \"multiplicative\"" end end |