Module: WikidataDiffAnalyzer

Defined in:
lib/wikidata-diff-analyzer.rb,
lib/wikidata-diff-analyzer/version.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

VERSION =
"2.0.2"

Class Method Summary collapse

Class Method Details

.analyze(revision_ids) ⇒ Object

This method analyzes a set of revision ids and returns the differences between them.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/wikidata-diff-analyzer.rb', line 10

def self.analyze(revision_ids)
  diffs_analyzed_count = 0
  diffs_analyzed = []
  diffs_not_analyzed = []
  diffs = {}
  total = {
    claims_added: 0,
    claims_removed: 0,
    claims_changed: 0,
    references_added: 0,
    references_removed: 0,
    references_changed: 0,
    qualifiers_added: 0,
    qualifiers_removed: 0,
    qualifiers_changed: 0,
    aliases_added: 0,
    aliases_removed: 0,
    aliases_changed: 0,
    labels_added: 0,
    labels_removed: 0,
    labels_changed: 0,
    descriptions_added: 0,
    descriptions_removed: 0,
    descriptions_changed: 0,
    sitelinks_added: 0,
    sitelinks_removed: 0,
    sitelinks_changed: 0,
    lemmas_added: 0,
    lemmas_removed: 0,
    lemmas_changed: 0,
    forms_added: 0,
    forms_removed: 0,
    forms_changed: 0,
    representations_added: 0,
    representations_removed: 0,
    representations_changed: 0,
    formclaims_added: 0,
    formclaims_removed: 0,
    formclaims_changed: 0,
    senses_added: 0,
    senses_removed: 0,
    senses_changed: 0,
    glosses_added: 0,
    glosses_removed: 0,
    glosses_changed: 0,
    senseclaims_added: 0,
    senseclaims_removed: 0,
    senseclaims_changed: 0,
    merge_to: 0,
    merge_from: 0,
    redirect: 0,
    undo: 0,
    restore: 0,
    clear_item: 0,
    create_item: 0,
    create_property: 0,
    create_lexeme: 0
  }

  # if revision_ids has 0, then 0 can never be analyzed, so remove it and add in not analyzed
  if revision_ids.include?(0)
    revision_ids.delete(0)
    diffs_not_analyzed << 0
  end

  result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 50)

  result.each do |revision_id, revision_data|
    current_content = revision_data[:current_content]
    if current_content
      diff = RevisionAnalyzer.analyze_diff(revision_data)
      diffs[revision_id] = diff
      Total.accumulate_totals(diff, total)
      diffs_analyzed << revision_id
      diffs_analyzed_count += 1
    end
  end

  # adding the bad rev_ids to the not_analyzed list
  diffs_not_analyzed += revision_ids - diffs_analyzed

  {
    diffs_analyzed_count: diffs_analyzed_count,
    diffs_not_analyzed: diffs_not_analyzed,
    diffs: diffs,
    total: total
  }
end