Module: JsonScanner

Defined in:
lib/json_scanner.rb,
lib/json_scanner/version.rb,
ext/json_scanner/json_scanner.c

Overview

Extract values from JSON without full parsing. This gem uses the yajl library

to scan a JSON string and allows you to parse pieces of it.

Defined Under Namespace

Classes: Error, Options, ParseError, Selector

Constant Summary collapse

VERSION =
"1.0.0"
ANY_INDEX =
rb_range_new(INT2FIX(0), INT2FIX(-1), false)
ANY_KEY =
rb_range_new(any_key_sym, any_key_sym, false)

Class Method Summary collapse

Class Method Details

.parse(json_str, config_or_path_ary, **opts) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/json_scanner.rb', line 23

def self.parse(json_str, config_or_path_ary, **opts)
  # with_path and with_roots_info is set here
  unless (extra_opts = opts.keys - ALLOWED_OPTS).empty?
    raise ArgumentError, "unknown keyword#{"s" if extra_opts.size > 1}: #{extra_opts.map(&:inspect).join(", ")}"
  end

  opts[:symbolize_path_keys] = opts.delete(:symbolize_names) if opts.key?(:symbolize_names)
  results, roots = if opts.empty?
                     scan(json_str, config_or_path_ary, SCAN_OPTIONS)
                   else
                     scan(json_str, config_or_path_ary, **opts, **SCAN_OPTS)
                   end

  res = process_results(json_str, results, roots, opts[:symbolize_path_keys])

  opts[:allow_multiple_values] ? res : res.first
end

.scan(*args) ⇒ Object

allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values



873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
# File 'ext/json_scanner/json_scanner.c', line 873

static VALUE scan(int argc, VALUE *argv, VALUE self)
{
  VALUE json_str, path_ary, rb_options;
  scan_options options;

  char *json_text;
  size_t json_text_len;
  yajl_handle handle;
  yajl_status stat;
  scan_ctx *ctx;
  int free_ctx = true;
  VALUE err_msg = Qnil, bytes_consumed = Qnil, result, roots_info_result = Qundef;
  // Turned out callbacks can't raise exceptions
  // VALUE callback_err;
  rb_scan_args(argc, argv, "21", &json_str, &path_ary, &rb_options);
  rb_check_type(json_str, T_STRING);
  // rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
  switch (TYPE(rb_options))
  {
  case T_HASH:
  case T_NIL:
    scan_options_init(&options, rb_options);
    break;
  case T_DATA:
    if (rb_obj_is_kind_of(rb_options, rb_cJsonScannerOptions))
    {
      scan_options *ptr;
      TypedData_Get_Struct(rb_options, scan_options, &options_type, ptr);
      options = *ptr;
    }
    else
    {
      rb_raise(rb_eTypeError, "Expected a Hash or %" PRIsVALUE ", got %" PRIsVALUE, rb_cJsonScannerOptions, rb_obj_class(rb_options));
    }
    break;
  default:
    rb_raise(rb_eTypeError, "Expected a Hash or %" PRIsVALUE ", got %" PRIsVALUE, rb_cJsonScannerOptions, rb_obj_class(rb_options));
    break;
  }
  if (SCAN_OPTION(&options, with_roots_info))
    roots_info_result = rb_ary_new();
  json_text = RSTRING_PTR(json_str);
#if LONG_MAX > SIZE_MAX
  json_text_len = RSTRING_LENINT(json_str);
#else
  json_text_len = RSTRING_LEN(json_str);
#endif
  if (rb_obj_is_kind_of(path_ary, rb_cJsonScannerSelector))
  {
    free_ctx = false;
    TypedData_Get_Struct(path_ary, scan_ctx, &selector_type, ctx);
  }
  else
  {
    VALUE scan_ctx_init_err;
    ctx = ruby_xmalloc(sizeof(scan_ctx));
    scan_ctx_init_err = scan_ctx_init(ctx, path_ary, Qundef);
    if (scan_ctx_init_err != Qundef)
    {
      ruby_xfree(ctx);
      rb_exc_raise(scan_ctx_init_err);
    }
  }
  // Need to keep a ref to result array on the stack to prevent it from being GC-ed
  result = rb_ary_new_capa(ctx->paths_len);
  for (int i = 0; i < ctx->paths_len; i++)
  {
    rb_ary_push(result, rb_ary_new());
  }
  scan_ctx_reset(ctx, result, roots_info_result, SCAN_OPTION(&options, with_path), SCAN_OPTION(&options, symbolize_path_keys));
  // scan_ctx_debug(ctx);

  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
  if (SCAN_OPTION_IS_SET(&options, allow_comments))
    yajl_config(handle, yajl_allow_comments, SCAN_OPTION(&options, allow_comments));
  if (SCAN_OPTION_IS_SET(&options, dont_validate_strings))
    yajl_config(handle, yajl_dont_validate_strings, SCAN_OPTION(&options, dont_validate_strings));
  if (SCAN_OPTION_IS_SET(&options, allow_trailing_garbage))
    yajl_config(handle, yajl_allow_trailing_garbage, SCAN_OPTION(&options, allow_trailing_garbage));
  if (SCAN_OPTION_IS_SET(&options, allow_multiple_values))
    yajl_config(handle, yajl_allow_multiple_values, SCAN_OPTION(&options, allow_multiple_values));
  if (SCAN_OPTION_IS_SET(&options, allow_partial_values))
    yajl_config(handle, yajl_allow_partial_values, SCAN_OPTION(&options, allow_partial_values));
  ctx->handle = handle;
  stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
  if (stat == yajl_status_ok)
  {
    scan_ctx_save_bytes_consumed(ctx);
    stat = yajl_complete_parse(handle);
  }

  if (stat != yajl_status_ok)
  {
    char *str = (char *)yajl_get_error(handle, SCAN_OPTION(&options, verbose_error), (unsigned char *)json_text, json_text_len);
    err_msg = rb_utf8_str_new_cstr(str);
    bytes_consumed = ULL2NUM(scan_ctx_get_bytes_consumed(ctx));
    yajl_free_error(handle, (unsigned char *)str);
  }
  // // Needed when yajl_allow_partial_values is set
  // if (ctx->current_path_len > 0)
  // {
  //   if (ctx->current_path_len > ctx->max_path_len)
  //     ctx->current_path_len = ctx->max_path_len;
  //   for (int i = ctx->current_path_len - 1; i > 0; i--)
  //   {
  //     switch (ctx->current_path[i].type)
  //     {
  //     case PATH_KEY:
  //       scan_on_end_object(ctx);
  //       break;
  //     case PATH_INDEX:
  //       scan_on_end_array(ctx);
  //       break;
  //     }
  //   }
  // }
  // callback_err = ctx->rb_err;
  if (free_ctx)
  {
    // fprintf(stderr, "free_ctx\n");
    scan_ctx_free(ctx);
    ruby_xfree(ctx);
  }
  yajl_free(handle);
  if (err_msg != Qnil)
  {
    VALUE err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
    rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
    rb_exc_raise(err);
  }
  // if (callback_err != Qnil)
  //   rb_exc_raise(callback_err);
  if (roots_info_result != Qundef)
  {
    result = rb_ary_new_from_args(2, result, roots_info_result);
  }
  return result;
}