Class: GEDCOM_DATE_PARSER::DateParser

Inherits:
Object
  • Object
show all
Defined in:
lib/gedcom_ruby/date_parser.rb

Constant Summary collapse

GEDFNONE =
0
GEDFBETWEEN =
1
GEDFFROM =
2
GEDFINTERP =
4
GEDFNUMBER =
8
GEDFMONTH =
16
GEDFSLASH =
32

Class Method Summary collapse

Class Method Details

.build_gedcom_date_part_string(date) ⇒ Object



938
939
940
941
942
943
944
945
# File 'lib/gedcom_ruby/date_parser.rb', line 938

def self.build_gedcom_date_part_string( date )
  # Stringify a GEDCOM date part (class method)
  # Inputs:  date      -  date part (GEDDate)
  # Outputs: buffer    -  output string
  buffer = ""
  buffer += get_date_text( date )
  buffer
end

.build_gedcom_date_string(date) ⇒ Object



894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
# File 'lib/gedcom_ruby/date_parser.rb', line 894

def self.build_gedcom_date_string( date )
  # Stringify a GEDCOM date (class method)
  # Inputs:  date      -  date (GEDDateValue)
  # Outputs: buffer    -  output string
  buffer = ""

  case ( date.flags )
    when GCABOUT then       buffer += "abt "
    when GCCALCULATED then  buffer += "cal "
    when GCESTIMATED then   buffer += "est "
    when GCBEFORE then      buffer += "bef "
    when GCAFTER then       buffer += "aft "
    when GCBETWEEN then     buffer += "bet "
    when GCFROM then
    when GCFROMTO then      buffer += "from "
    when GCTO then          buffer += "to "
    when GCINTERPRETED then buffer += "int "

    when GCCHILD then       buffer += "child"; return
    when GCCLEARED then     buffer += "cleared"; return
    when GCCOMPLETED then   buffer += "completed"; return
    when GCINFANT then      buffer += "infant"; return
    when GCPRE1970 then     buffer += "pre-1970"; return
    when GCQUALIFIED then   buffer += "qualified"; return
    when GCSTILLBORN then   buffer += "stillborn"; return
    when GCSUBMITTED then   buffer += "submitted"; return
    when GCUNCLEARED then   buffer += "uncleared"; return
    when GCBIC then         buffer += "BIC"; return
    when GCDNS then         buffer += "DNS"; return
    when GCDNSCAN then      buffer += "DNSCAN"; return
    when GCDEAD then        buffer += "dead"; return
  end

  buffer += get_date_text( date.date1 ) if (date.date1)

  case ( date.flags )
    when GCBETWEEN then buffer += " and "
    when GCFROMTO then  buffer += " to "
  end

  buffer += get_date_text( date.date2 ) if (date.date2)
  buffer
end

.get_date_text(date) ⇒ Object



516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
# File 'lib/gedcom_ruby/date_parser.rb', line 516

def self.get_date_text( date )
  # Stringify the GEDCOM Date (class method)
  # Inputs:  date      -  Date Part  (GEDDate)
  # Outputs: buffer    -  Output string
  buffer = ""

  if ( (date.flags & (GFPHRASE | GFNONSTANDARD)) != 0)
    buffer += date.data
    return buffer
  end

  case ( date.type )
    when GCTHEBREW
      months = Hebrew_Months
    when GCTFRENCH
      months = French_Months
    else
      months = Default_Months
  end

  return buffer if not (date.data)

  if ( date.data.flags && (( date.data.flags & GFNODAY ) == 0) )
    buffer += date.data.day.to_s
    buffer += " " if ( (( date.data.flags & GFNOMONTH ) == 0) || (( date.data.flags & GFNOYEAR ) == 0) )
  end

  if ( date.data.flags && (( date.data.flags & GFNOMONTH ) == 0) )
    buffer += months[ date.data.month - 1 ]
    buffer += " " if( ( date.data.flags & GFNOYEAR ) == 0 )
  end

  if ( date.data.flags && (( date.data.flags & GFNOYEAR ) == 0) )
    buffer += date.data.year.to_s
    if ( ( date.data.flags & GFYEARSPAN ) != 0 )
      buffer += "-"
      buffer += date.data.year2.to_s
    end
  end

  buffer += " BC" if ( (date.type == GCTGREGORIAN) && (date.data.adbc != GEDADBCAD) )
  buffer
end

.get_token(parser) ⇒ Object



429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
# File 'lib/gedcom_ruby/date_parser.rb', line 429

def self.get_token( parser )
  # Get a single token from this parser state (class method)
  # Inputs:  parser    -  parser state  (GEDParserState)
  # Outputs: general   -  general token
  #          specific  -  specific token
  startPos = parser.pos

  # if we've got a token saved in the parser, return it
  if ( parser.lastGeneralToken != TKNONE )
    general = parser.lastGeneralToken
    specific = parser.lastSpecificToken
    parser.lastGeneralToken = TKNONE
    parser.lastSpecificToken = TKNONE
    return general, specific
  end

  #eat leading white-space
 parser.pos+=1 while ( parser.buffer[ parser.pos, 1 ]==" " )

  #if the buffer is empty, return TKEOF
  if ( parser.buffer[ parser.pos, 1 ] == nil || parser.buffer[parser.pos, 1] == "")
    specific = TKNONE
    general = TKEOF
    return general, specific
  end

  lexeme = ""
  # if it's a number, parse it out and return it
  if ( parser.buffer[ parser.pos, 1 ] =~ /[0-9]/ )
    while ( parser.buffer[ parser.pos, 1 ] =~ /[0-9]/)
      lexeme << parser.buffer[ parser.pos, 1 ]
      parser.pos+=1
    end
    specific = lexeme.to_i
    general = TKNUMBER
    return general, specific
  end

  currentToken = 0
  lexPos = 0
  # if it is not a number, incrementally look at each token in the table
  while ( TokenTable[ currentToken ].lexeme != 0 )
    lexeme << parser.buffer[ parser.pos, 1 ].upcase
    lexPos+=1
    parser.pos+=1

    if( lexeme[ lexPos-1, 1 ] != TokenTable[ currentToken ].lexeme[ lexPos-1, 1 ] )
      currentToken+=1 while( ( TokenTable[ currentToken ].lexeme != 0 ) &&
             ( (TokenTable[ currentToken ].lexeme[0, lexPos] <=> lexeme[0, lexPos] ) < 0 ) )

      #if the lexeme does not appear in the table, exit with an error
      break if ( TokenTable[ currentToken ].lexeme == 0 || \
          (TokenTable[ currentToken ].lexeme[0, lexPos] <=> lexeme[0, lexPos] ) != 0 )

    end

    #if the lexeme terminates, return the value of the current token
    if( ( ( lexeme[0,1] =~ /[a-zA-Z]/) && ( parser.buffer[ parser.pos, 1 ] !~ /[0-9a-zA-Z]/) ) ||
        ( ( lexeme[0,1] !~ /[a-zA-Z]/ ) && ( lexPos >= TokenTable[ currentToken ].lexeme.length ) ) )
      specific = TokenTable[ currentToken ].specific
      general = TokenTable[ currentToken ].general
      return general, specific
    end

    #if the current token terminates before the lexeme, then we have an error
    break if ( TokenTable[ currentToken ].lexeme[ lexPos, 1 ] == nil )

  end

  parser.pos = startPos

  specific = TKNONE
  general = TKERROR

  return general, specific
end

.parse_date_part(parser, datePart, type) ⇒ Object

Raises:



578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
# File 'lib/gedcom_ruby/date_parser.rb', line 578

def self.parse_date_part( parser, datePart, type )
  # Parse out a date part (class method)
  # Inputs:  parser    -  parser state
  #          datePart  -  date part (GEDDate)
  #          type      -  calendar type
  # Outputs: None  (updated date part)
  state = ST_DT_START
  flags = GEDFNONE

  # Initialize the datePart, in case it contains old data
  datePart.type = type
  datePart.flags = GFNONE
  if (type == GCTGREGORIAN)
    datePart.data = GEDDateGreg.new(flags, 0, 0, 0, 0, GEDADBCAD)
  else
    datePart.data = GEDDateGeneral.new(flags, 0, 0, 0)
  end
  number = 0

  while ( ( state != ST_DT_END ) && ( state != ST_DT_ERROR ) )
    general, specific = get_token( parser )
    raise DateParseException, "error parsing datepart, pre-transition" if (general == TKERROR)
    transitionFound = 0

    case ( general )
      when TKNUMBER
      when TKMONTH
      when TKSLASH
      when TKBC
      when TKEOF
      when TKERROR
      when TKTO, TKAND
        put_token( parser, general, specific)
      else
        put_token( parser, general, specific )
        general = TKEOF
        specific = TKNONE
        break
    end

    DateStateTable.each do |dateState|
      break if dateState.state < 1

      if( ( dateState.state == state ) && ( dateState.input == general ) )
        state = dateState.nextState
        transitionFound = 1

        case dateState.action
          # 0: store number, set NUMBER
          when 0
            number = specific
            flags |= GEDFNUMBER

          # 1: if MONTH, then error, else set number to be day, set month, set MONTH
          when 1
            if ( type == GCTFRENCH )
              # if the token is "JOUR", make sure they also typed at least
              # part of "COMPLIMENTAIRES"

              case specific
                when TKJOUR
                  general, specific = get_token( parser )
                  raise DateParseException, "error parsing datepart, post-JOUR (french calendar)" if (general == TKERROR)
                  if ( general != TKMONTH && specific != TKCOMP )
                    state = ST_DT_ERROR
                    put_token( parser, general, specific )
                  end #fall through

                when TKCOMP
                  specific = TKJOUR_COMP
              end
            elsif ( type == GCTHEBREW )
              # if the token is "ADAR", see if it is followed by "SHENI",
              # and if it is, change the month to "ADAR SHENI"

              if( specific == TKADAR )
                general, specific = get_token( parser )
                raise DateParseException, "error parsing datepart, post-ADAR" if (general == TKERROR)
                if( general == TKMONTH && specific == TKSHENI )
                  specific = TKADAR_SHENI
                else
                  put_token( parser, general, specific )
                end
              end
            end

            if ( ( flags & GEDFMONTH ) != 0 )
              state = ST_DT_ERROR
            else
              month = validate_month_for_type( specific, type )
              if ( month < 1 )
                state = ST_DT_ERROR
              else
                datePart.data.day = number
                datePart.data.month = month
              end
              flags |= GEDFMONTH
              number = 0
            end

          # 2: if SLASH, then error, else set SLASH, set number to be year
          when 2
            if ( ( ( flags & GEDFSLASH ) != 0 ) || ( type != GCTGREGORIAN ) )
              state = ST_DT_ERROR
            else
              datePart.data.year = number if ( number > 0 )

              datePart.data.flags |= GFYEARSPAN
              number = 0
              flags |= GEDFSLASH
            end

          # 3: if not SLASH set number to be year, set bc
          # 4: if not SLASH set number to be year, terminate
          # 6: terminate
          when 3, 4, 6
            if (dateState.action == 3)
              if( type != GCTGREGORIAN )
                state = ST_DT_ERROR
                break
              end
              datePart.data.adbc = GEDADBCBC
            end

            if (dateState.action == 3 || dateState.action == 4)
              if( ( number > 0 ) && ( ( flags & GEDFSLASH ) == 0 ) )
                datePart.data.year = number
                number = 0
              end
            end


            datePart.data.flags |= GFNODAY if( datePart.data.day < 1 )

            datePart.data.flags |= GFNOMONTH if( datePart.data.month < 1 )

            datePart.data.flags |= GFNOYEAR if( datePart.data.year < 1 )


          # 5: if NUMBER, set number to be day.  set number to be year, store number, set NUMBER
          when 5
            datePart.data.day = number if( ( number > 0 ) && ( ( flags & GEDFNUMBER ) != 0 ) )

            datePart.data.year = specific

            number = 0
            flags |= GEDFNUMBER

          # 7: set number to be year2  (Gregorian Calendar)
          when 7
            datePart.data.year2 = ( specific % 100 )
            number = 0
        end

        break
      end
    end

    state = ST_DT_ERROR if( transitionFound == 0 )
  end

  raise DateParseException, "error parsing datepart, general" if( state == ST_DT_ERROR )

end

.parse_gedcom_date(dateString, date, type = GCTDEFAULT) ⇒ Object



744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
# File 'lib/gedcom_ruby/date_parser.rb', line 744

def self.parse_gedcom_date( dateString, date, type = GCTDEFAULT )
  # Parse out a GEDCOM date (class method)
  # Inputs:  dateString    - String containing GEDCOM date
  #          date          -  date  (GEDDateValue)
  #          type          -  calendar type
  # Outputs: None  (updated date)

  parser = GEDParserState.new( "", 0, 0, 0 )
  parser.buffer = dateString

  # New date 1 if it's nil
  date.date1 = GEDDate.new( type, GFNONE, nil ) if not date.date1
  datePart = date.date1

  state = ST_DV_START
  flags = GEDFNONE
  datesRead = 0

  while ( ( state != ST_DV_END ) && ( state != ST_DV_ERROR ) )
    savePos = parser.pos
    general, specific = get_token( parser )
    raise DateParseException, "error parsing date" if (general == TKERROR)
    transitionFound = 0

    DateValueStateTable.each do |dateValueState|
      break if dateValueState.state < 1

      if( ( dateValueState.state == state ) && ( dateValueState.input == general ) )

        transitionFound = 1
        state = dateValueState.nextState

        case ( dateValueState.action )
          # 0: inc dates read, parse a date
          when 0
            put_token( parser, general, specific )
            begin
              if (datesRead != 0)
                # New date 2 if it's nil
                date.date2 = GEDDate.new( type, GFNONE, nil ) if not date.date2
                datePart = date.date2
              end
              parse_date_part( parser, datePart, type )
              datesRead+=1
            rescue
              state = ST_DV_ERROR
            end

          # 1: set the approx type
          when 1
            date.flags = case specific
              when TKABOUT      then GCABOUT
              when TKCALCULATED then GCCALCULATED
              when TKESTIMATED  then GCESTIMATED
            end

          # 2: set the range type
          when 2
            date.flags = case specific
            when TKBEFORE then GCBEFORE
            when TKAFTER  then GCAFTER
            when TKBETWEEN
              flags |= GEDFBETWEEN
              GCBETWEEN
            end

          # 3: set the period type
          when 3
            if general == TKTO
              date.flags = GCTO
            elsif specific == TKFROM
              date.flags = GCFROM
              flags |= GEDFFROM
            end

          # 4: set interpreted
          when 4
            date.flags = GCINTERPRETED
            flags |= GEDFINTERP

          # 5: get remaining buffer as phrase
          # 7: if 'interpreted', get remaining buffer as phrase
          when 5, 7
            # This is kind of a sucky way to handle this, but the shared functionality
            # between action 5 and 7 doesn't seem like enough to warrant breaking out
            # into it's own method.
            if dateValueState.action == 7 && ( flags & GEDFINTERP ) == 0
              state = ST_DV_ERROR
              break
            end

            # Strip off trailing whitespace and closing parenthesis
            buffer = parser.buffer.slice( parser.pos, parser.buffer.length ).rstrip.split( ')' )[0]
            datePart.data = buffer
            datePart.flags = GFPHRASE
            parser.pos = parser.buffer.length

          # 6: if 'between' and not second date read, error, else terminate
          when 6
            state = ST_DV_ERROR if( ( ( flags & GEDFBETWEEN ) != 0 ) && datesRead < 2 )

          # else -- nextState is ST_DV_END, so we're done!

          # 7: see above 5

          # 8: if 'between', prepare to read next date
          when 8
            state = ST_DV_ERROR if( ( flags & GEDFBETWEEN ) == 0 )

          # 9: if 'from', set FROMTO, prepare to read next date
          when 9
            if( ( flags & GEDFFROM ) == 0 )
              state = ST_DV_ERROR
            else
              date.flags = GCFROMTO
            end

          # 10: set status
          when 10
            date.flags = case specific
            when TKCHILD     then GCCHILD
            when TKCLEARED   then GCCLEARED
            when TKCOMPLETED then GCCOMPLETED
            when TKINFANT    then GCINFANT
            when TKPRE1970   then GCPRE1970
            when TKQUALIFIED then GCQUALIFIED
            when TKSTILLBORN then GCSTILLBORN
            when TKSUBMITTED then GCSUBMITTED
            when TKUNCLEARED then GCUNCLEARED
            when TKBIC       then GCBIC
            when TKDNS       then GCDNS
            when TKDNSCAN    then GCDNSCAN
            when TKDEAD      then GCDEAD
            end
        end
        break  # ... Out of the DateValueStateTable.each block
      end
    end

    state = ST_DV_ERROR if( transitionFound == 0 )
  end

  if( state == ST_DV_ERROR )
    parser.pos = savePos
    datePart.flags = GFNONSTANDARD
    datePart.data = parser.buffer.slice( parser.pos, parser.buffer.length )
    raise DateParseException, "error parsing date, general"
  end
end

.put_token(parser, general, specific) ⇒ Object



506
507
508
509
510
511
512
513
514
# File 'lib/gedcom_ruby/date_parser.rb', line 506

def self.put_token( parser, general, specific )
  # Update the parser state (class method)
  # Inputs:  parser    -  parser state  (GEDParserState)
  #          general   -  general token
  #          specific  -  specific token
  # Outputs: None
  parser.lastGeneralToken = general
  parser.lastSpecificToken = specific
end

.validate_month_for_type(month, calType) ⇒ Object



560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
# File 'lib/gedcom_ruby/date_parser.rb', line 560

def self.validate_month_for_type( month, calType )
  # Make sure this is a valid month for this calendar type (class method)
  # Inputs:  parser    -  parser state
  # Outputs: general   -  general token
  #          specific  -  specific token
  case calType
    when GCTGREGORIAN || GCTJULIAN
      return ( month - TKJANUARY + 1 ) if( month >= TKJANUARY && month <= TKDECEMBER )

    when GCTHEBREW
      return ( month - TKTISHRI + 1 ) if( month >= TKTISHRI && month <= TKELUL )

    when GCTFRENCH
      return ( month - TKVENDEMIAIRE + 1 )if( month >= TKVENDEMIAIRE && month <= TKJOUR_COMP )
  end
  return -1
end