Class: Datasets::AozoraBunko::Book

Inherits:
Struct
  • Object
show all
Defined in:
lib/datasets/aozora-bunko.rb,
lib/datasets/aozora-bunko.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Book

Returns a new instance of Book.



75
76
77
78
79
80
# File 'lib/datasets/aozora-bunko.rb', line 75

def initialize(*args)
  super
  @text = nil
  @html = nil
  @cache_path = nil
end

Instance Attribute Details

#base_of_original_book_first_published_date1Object

Returns the value of attribute base_of_original_book_first_published_date1

Returns:

  • (Object)

    the current value of base_of_original_book_first_published_date1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def base_of_original_book_first_published_date1
  @base_of_original_book_first_published_date1
end

#base_of_original_book_first_published_date2Object

Returns the value of attribute base_of_original_book_first_published_date2

Returns:

  • (Object)

    the current value of base_of_original_book_first_published_date2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def base_of_original_book_first_published_date2
  @base_of_original_book_first_published_date2
end

#base_of_original_book_name1Object

Returns the value of attribute base_of_original_book_name1

Returns:

  • (Object)

    the current value of base_of_original_book_name1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def base_of_original_book_name1
  @base_of_original_book_name1
end

#base_of_original_book_name2Object

Returns the value of attribute base_of_original_book_name2

Returns:

  • (Object)

    the current value of base_of_original_book_name2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def base_of_original_book_name2
  @base_of_original_book_name2
end

#base_of_original_book_publisher_name1Object

Returns the value of attribute base_of_original_book_publisher_name1

Returns:

  • (Object)

    the current value of base_of_original_book_publisher_name1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def base_of_original_book_publisher_name1
  @base_of_original_book_publisher_name1
end

#base_of_original_book_publisher_name2Object

Returns the value of attribute base_of_original_book_publisher_name2

Returns:

  • (Object)

    the current value of base_of_original_book_publisher_name2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def base_of_original_book_publisher_name2
  @base_of_original_book_publisher_name2
end

#cache_path=(value) ⇒ Object (writeonly)

Sets the attribute cache_path

Parameters:

  • value

    the value to set the attribute cache_path to.



73
74
75
# File 'lib/datasets/aozora-bunko.rb', line 73

def cache_path=(value)
  @cache_path = value
end

#copyrightedObject Also known as: copyrighted?

Returns the value of attribute copyrighted

Returns:

  • (Object)

    the current value of copyrighted



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def copyrighted
  @copyrighted
end

#detail_urlObject

Returns the value of attribute detail_url

Returns:

  • (Object)

    the current value of detail_url



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def detail_url
  @detail_url
end

#first_appearanceObject

Returns the value of attribute first_appearance

Returns:

  • (Object)

    the current value of first_appearance



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def first_appearance
  @first_appearance
end

#html_file_character_encodingObject

Returns the value of attribute html_file_character_encoding

Returns:

  • (Object)

    the current value of html_file_character_encoding



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def html_file_character_encoding
  @html_file_character_encoding
end

#html_file_character_setObject

Returns the value of attribute html_file_character_set

Returns:

  • (Object)

    the current value of html_file_character_set



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def html_file_character_set
  @html_file_character_set
end

#html_file_updating_countObject

Returns the value of attribute html_file_updating_count

Returns:

  • (Object)

    the current value of html_file_updating_count



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def html_file_updating_count
  @html_file_updating_count
end

#html_file_urlObject

Returns the value of attribute html_file_url

Returns:

  • (Object)

    the current value of html_file_url



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def html_file_url
  @html_file_url
end

#last_html_file_updated_dateObject

Returns the value of attribute last_html_file_updated_date

Returns:

  • (Object)

    the current value of last_html_file_updated_date



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def last_html_file_updated_date
  @last_html_file_updated_date
end

#last_text_file_updated_dateObject

Returns the value of attribute last_text_file_updated_date

Returns:

  • (Object)

    the current value of last_text_file_updated_date



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def last_text_file_updated_date
  @last_text_file_updated_date
end

#last_updated_dateObject

Returns the value of attribute last_updated_date

Returns:

  • (Object)

    the current value of last_updated_date



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def last_updated_date
  @last_updated_date
end

#ndc_codeObject

Returns the value of attribute ndc_code

Returns:

  • (Object)

    the current value of ndc_code



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def ndc_code
  @ndc_code
end

#original_book_first_published_date1Object

Returns the value of attribute original_book_first_published_date1

Returns:

  • (Object)

    the current value of original_book_first_published_date1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def original_book_first_published_date1
  @original_book_first_published_date1
end

#original_book_first_published_date2Object

Returns the value of attribute original_book_first_published_date2

Returns:

  • (Object)

    the current value of original_book_first_published_date2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def original_book_first_published_date2
  @original_book_first_published_date2
end

#original_book_name1Object

Returns the value of attribute original_book_name1

Returns:

  • (Object)

    the current value of original_book_name1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def original_book_name1
  @original_book_name1
end

#original_book_name2Object

Returns the value of attribute original_book_name2

Returns:

  • (Object)

    the current value of original_book_name2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def original_book_name2
  @original_book_name2
end

#original_book_publisher_name1Object

Returns the value of attribute original_book_publisher_name1

Returns:

  • (Object)

    the current value of original_book_publisher_name1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def original_book_publisher_name1
  @original_book_publisher_name1
end

#original_book_publisher_name2Object

Returns the value of attribute original_book_publisher_name2

Returns:

  • (Object)

    the current value of original_book_publisher_name2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def original_book_publisher_name2
  @original_book_publisher_name2
end

#original_titleObject

Returns the value of attribute original_title

Returns:

  • (Object)

    the current value of original_title



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def original_title
  @original_title
end

#person_birthdayObject

Returns the value of attribute person_birthday

Returns:

  • (Object)

    the current value of person_birthday



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_birthday
  @person_birthday
end

#person_copyrightedObject Also known as: person_copyrighted?

Returns the value of attribute person_copyrighted

Returns:

  • (Object)

    the current value of person_copyrighted



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_copyrighted
  @person_copyrighted
end

#person_date_of_deathObject

Returns the value of attribute person_date_of_death

Returns:

  • (Object)

    the current value of person_date_of_death



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_date_of_death
  @person_date_of_death
end

#person_family_nameObject

Returns the value of attribute person_family_name

Returns:

  • (Object)

    the current value of person_family_name



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_family_name
  @person_family_name
end

#person_family_name_readingObject

Returns the value of attribute person_family_name_reading

Returns:

  • (Object)

    the current value of person_family_name_reading



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_family_name_reading
  @person_family_name_reading
end

#person_family_name_reading_collationObject

Returns the value of attribute person_family_name_reading_collation

Returns:

  • (Object)

    the current value of person_family_name_reading_collation



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_family_name_reading_collation
  @person_family_name_reading_collation
end

#person_family_name_romajiObject

Returns the value of attribute person_family_name_romaji

Returns:

  • (Object)

    the current value of person_family_name_romaji



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_family_name_romaji
  @person_family_name_romaji
end

#person_first_nameObject

Returns the value of attribute person_first_name

Returns:

  • (Object)

    the current value of person_first_name



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_first_name
  @person_first_name
end

#person_first_name_readingObject

Returns the value of attribute person_first_name_reading

Returns:

  • (Object)

    the current value of person_first_name_reading



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_first_name_reading
  @person_first_name_reading
end

#person_first_name_reading_collationObject

Returns the value of attribute person_first_name_reading_collation

Returns:

  • (Object)

    the current value of person_first_name_reading_collation



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_first_name_reading_collation
  @person_first_name_reading_collation
end

#person_first_name_romajiObject

Returns the value of attribute person_first_name_romaji

Returns:

  • (Object)

    the current value of person_first_name_romaji



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_first_name_romaji
  @person_first_name_romaji
end

#person_idObject

Returns the value of attribute person_id

Returns:

  • (Object)

    the current value of person_id



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_id
  @person_id
end

#person_typeObject

Returns the value of attribute person_type

Returns:

  • (Object)

    the current value of person_type



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def person_type
  @person_type
end

#proofreader_nameObject

Returns the value of attribute proofreader_name

Returns:

  • (Object)

    the current value of proofreader_name



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def proofreader_name
  @proofreader_name
end

#published_dateObject

Returns the value of attribute published_date

Returns:

  • (Object)

    the current value of published_date



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def published_date
  @published_date
end

#registered_person_nameObject

Returns the value of attribute registered_person_name

Returns:

  • (Object)

    the current value of registered_person_name



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def registered_person_name
  @registered_person_name
end

#subtitleObject

Returns the value of attribute subtitle

Returns:

  • (Object)

    the current value of subtitle



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def subtitle
  @subtitle
end

#subtitle_readingObject

Returns the value of attribute subtitle_reading

Returns:

  • (Object)

    the current value of subtitle_reading



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def subtitle_reading
  @subtitle_reading
end

#syllabary_spelling_typeObject

Returns the value of attribute syllabary_spelling_type

Returns:

  • (Object)

    the current value of syllabary_spelling_type



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def syllabary_spelling_type
  @syllabary_spelling_type
end

#text_file_character_encodingObject

Returns the value of attribute text_file_character_encoding

Returns:

  • (Object)

    the current value of text_file_character_encoding



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def text_file_character_encoding
  @text_file_character_encoding
end

#text_file_character_setObject

Returns the value of attribute text_file_character_set

Returns:

  • (Object)

    the current value of text_file_character_set



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def text_file_character_set
  @text_file_character_set
end

#text_file_updating_countObject

Returns the value of attribute text_file_updating_count

Returns:

  • (Object)

    the current value of text_file_updating_count



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def text_file_updating_count
  @text_file_updating_count
end

#text_file_urlObject

Returns the value of attribute text_file_url

Returns:

  • (Object)

    the current value of text_file_url



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def text_file_url
  @text_file_url
end

#titleObject

Returns the value of attribute title

Returns:

  • (Object)

    the current value of title



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def title
  @title
end

#title_idObject

Returns the value of attribute title_id

Returns:

  • (Object)

    the current value of title_id



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def title_id
  @title_id
end

#title_readingObject

Returns the value of attribute title_reading

Returns:

  • (Object)

    the current value of title_reading



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def title_reading
  @title_reading
end

#title_reading_collationObject

Returns the value of attribute title_reading_collation

Returns:

  • (Object)

    the current value of title_reading_collation



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def title_reading_collation
  @title_reading_collation
end

#used_version_for_proofreading1Object

Returns the value of attribute used_version_for_proofreading1

Returns:

  • (Object)

    the current value of used_version_for_proofreading1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def used_version_for_proofreading1
  @used_version_for_proofreading1
end

#used_version_for_proofreading2Object

Returns the value of attribute used_version_for_proofreading2

Returns:

  • (Object)

    the current value of used_version_for_proofreading2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def used_version_for_proofreading2
  @used_version_for_proofreading2
end

#used_version_for_registration1Object

Returns the value of attribute used_version_for_registration1

Returns:

  • (Object)

    the current value of used_version_for_registration1



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def used_version_for_registration1
  @used_version_for_registration1
end

#used_version_for_registration2Object

Returns the value of attribute used_version_for_registration2

Returns:

  • (Object)

    the current value of used_version_for_registration2



7
8
9
# File 'lib/datasets/aozora-bunko.rb', line 7

def used_version_for_registration2
  @used_version_for_registration2
end

Instance Method Details

#htmlObject



103
104
105
106
107
108
109
110
111
112
113
# File 'lib/datasets/aozora-bunko.rb', line 103

def html
  return @html unless @html.nil?
  return @html if html_file_url.nil? || html_file_url.empty?

  downloader = Downloader.new(html_file_url)
  downloader.download(html_file_output_path)
  @html = File.read(html_file_output_path).encode(Encoding::UTF_8,
                                                  normalize_encoding(html_file_character_encoding))

  @html
end

#textObject



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/datasets/aozora-bunko.rb', line 85

def text
  return @text unless @text.nil?
  return @text if text_file_url.nil? || text_file_url.empty?

  # when url is not zip file, it needs to open web page by brower and has to download
  # e.g. https://mega.nz/file/6tMxgAjZ#PglDDyJL0syRhnULqK0qhTMC7cktsgqwObj5fY_knpE
  return @text unless text_file_url.end_with?('.zip')

  downloader = Downloader.new(text_file_url)
  downloader.download(text_file_output_path)

  @text = ZipExtractor.new(text_file_output_path).extract_first_file do |input|
    input.read.encode(Encoding::UTF_8, normalize_encoding(text_file_character_encoding))
  end

  @text
end