Module: Chakin::Downloader

Included in:
Vectors
Defined in:
lib/chakin-rb/chakin.rb

Instance Method Summary collapse

Instance Method Details

#download(number: nil, name: '', save_dir: './') ⇒ Object

Download pre-trained word vector



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/chakin-rb/chakin.rb', line 23

def download(number: nil, name: '', save_dir: './')
  df = load_datasets

  row = if !number.nil?
          df.row[number]
        elsif name
          df.df.where(df['Name'].eq(name))
        end

  url = row['URL']
  raise 'The word vector you specified was not found. Please specify correct name.' if url.nil?


  file_name = url.split('/')[-1]

  FileUtils.mkdir_p(save_dir) unless File.exist?(save_dir)

  save_path = File.join(save_dir, file_name)
  begin
    download_file(save_path, url)
  rescue Chakin::HttpRedirect => e
    download_file(save_path, e.new_url)
  end
  save_path
end

#download_file(save_path, url) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/chakin-rb/chakin.rb', line 49

def download_file(save_path, url)
  progressbar = ProgressBar.create

  f = File.open(save_path, 'wb')
  begin
    my_uri = URI.parse(url)
    http = Net::HTTP.new(my_uri.host, my_uri.port)

    if my_uri.instance_of?(URI::HTTPS)
      http.use_ssl = true
    end

    http.request_get(my_uri.path) do |resp|
      total_size = resp.content_length
      progressbar.total = total_size

      if resp.code == "302"
        raise HttpRedirect.new(resp.header['Location'])
      end
      resp.read_body do |segment|
        progressbar.progress += segment.size
        f.write(segment)
      end
    end
  ensure
    f.close
  end
end

#load_datasets(path = File.join(__dir__, 'datasets.csv')) ⇒ Object



17
18
19
# File 'lib/chakin-rb/chakin.rb', line 17

def load_datasets(path = File.join(__dir__, 'datasets.csv'))
  Daru::DataFrame.from_csv(path)
end

#search(lang = '') ⇒ Object



78
79
80
81
82
83
84
85
86
# File 'lib/chakin-rb/chakin.rb', line 78

def search(lang = '')
  df = load_datasets
  if lang == ''
    puts df.inspect
  else
    rows = df.where(df['Language'].eq(lang))
    puts rows['Name', 'Dimension', 'Corpus', 'VocabularySize', 'Method', 'Language', 'Author'].inspect
  end
end