summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2023-11-07 16:24:54 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2023-11-07 16:24:54 +0900
commitfd2cc80a31a5177d4f2cced99ed2e7235af9d6ee (patch)
treef36ea011da95dfef55020143cf57e42d75c4a0a3
downloadrust-difflib-upstream.tar.gz
rust-difflib-upstream.tar.bz2
rust-difflib-upstream.zip
Import difflib 0.4.0upstream/0.4.0upstream
-rw-r--r--Cargo.toml26
-rw-r--r--Cargo.toml.orig19
-rw-r--r--examples/example.rs63
-rw-r--r--src/differ.rs340
-rw-r--r--src/lib.rs169
-rw-r--r--src/sequencematcher.rs346
-rw-r--r--src/utils.rs47
-rw-r--r--tests/tests.rs194
8 files changed, 1204 insertions, 0 deletions
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..29a094f
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,26 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "difflib"
+version = "0.4.0"
+authors = ["Dima Kudosh <dimakudosh@gmail.com>"]
+include = ["**/*.rs", "Cargo.toml"]
+description = "Port of Python's difflib library to Rust."
+homepage = "https://github.com/DimaKudosh/difflib"
+documentation = "https://github.com/DimaKudosh/difflib/wiki"
+keywords = ["difflib", "text", "diff"]
+license = "MIT"
+repository = "https://github.com/DimaKudosh/difflib"
+
+[[test]]
+name = "tests"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..2edf6df
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,19 @@
+[package]
+name = "difflib"
+version = "0.4.0"
+authors = ["Dima Kudosh <dimakudosh@gmail.com>"]
+description = "Port of Python's difflib library to Rust."
+documentation = "https://github.com/DimaKudosh/difflib/wiki"
+homepage = "https://github.com/DimaKudosh/difflib"
+repository = "https://github.com/DimaKudosh/difflib"
+keywords = ["difflib", "text", "diff"]
+license = "MIT"
+include = [
+ "**/*.rs",
+ "Cargo.toml",
+]
+
+
+[[test]]
+name = "tests"
+
diff --git a/examples/example.rs b/examples/example.rs
new file mode 100644
index 0000000..3f06d0a
--- /dev/null
+++ b/examples/example.rs
@@ -0,0 +1,63 @@
+extern crate difflib;
+
+use difflib::differ::Differ;
+use difflib::sequencematcher::SequenceMatcher;
+
+fn main() {
+ // unified_diff
+ let first_text = "one two three four".split(" ").collect::<Vec<&str>>();
+ let second_text = "zero one tree four".split(" ").collect::<Vec<&str>>();
+ let diff = difflib::unified_diff(
+ &first_text,
+ &second_text,
+ "Original",
+ "Current",
+ "2005-01-26 23:30:50",
+ "2010-04-02 10:20:52",
+ 3,
+ );
+ for line in &diff {
+ println!("{:?}", line);
+ }
+
+ //context_diff
+ let diff = difflib::context_diff(
+ &first_text,
+ &second_text,
+ "Original",
+ "Current",
+ "2005-01-26 23:30:50",
+ "2010-04-02 10:20:52",
+ 3,
+ );
+ for line in &diff {
+ println!("{:?}", line);
+ }
+
+ //get_close_matches
+ let words = vec!["ape", "apple", "peach", "puppy"];
+ let result = difflib::get_close_matches("appel", words, 3, 0.6);
+ println!("{:?}", result);
+
+ //Differ examples
+ let differ = Differ::new();
+ let diff = differ.compare(&first_text, &second_text);
+ for line in &diff {
+ println!("{:?}", line);
+ }
+
+ //SequenceMatcher examples
+ let mut matcher = SequenceMatcher::new("one two three four", "zero one tree four");
+ let m = matcher.find_longest_match(0, 18, 0, 18);
+ println!("{:?}", m);
+ let all_matches = matcher.get_matching_blocks();
+ println!("{:?}", all_matches);
+ let opcode = matcher.get_opcodes();
+ println!("{:?}", opcode);
+ let grouped_opcodes = matcher.get_grouped_opcodes(2);
+ println!("{:?}", grouped_opcodes);
+ let ratio = matcher.ratio();
+ println!("{:?}", ratio);
+ matcher.set_seqs("aaaaa", "aaaab");
+ println!("{:?}", matcher.ratio());
+}
diff --git a/src/differ.rs b/src/differ.rs
new file mode 100644
index 0000000..5caef9a
--- /dev/null
+++ b/src/differ.rs
@@ -0,0 +1,340 @@
+use sequencematcher::SequenceMatcher;
+use std::cmp;
+use utils::{count_leading, str_with_similar_chars};
+
+#[derive(Default)]
+pub struct Differ {
+ pub line_junk: Option<fn(&&str) -> bool>,
+ pub char_junk: Option<fn(&char) -> bool>,
+}
+
+impl Differ {
+ pub fn new() -> Differ {
+ Differ {
+ line_junk: None,
+ char_junk: None,
+ }
+ }
+
+ pub fn compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec<String> {
+ let mut matcher = SequenceMatcher::new(first_sequence, second_sequence);
+ matcher.set_is_junk(self.line_junk);
+ let mut res = Vec::new();
+ for opcode in matcher.get_opcodes() {
+ let mut gen = Vec::new();
+ match opcode.tag.as_ref() {
+ "replace" => {
+ gen = self.fancy_replace(
+ first_sequence,
+ opcode.first_start,
+ opcode.first_end,
+ second_sequence,
+ opcode.second_start,
+ opcode.second_end,
+ )
+ }
+ "delete" => {
+ gen = self.dump("-", first_sequence, opcode.first_start, opcode.first_end)
+ }
+ "insert" => {
+ gen = self.dump("+", second_sequence, opcode.second_start, opcode.second_end)
+ }
+ "equal" => {
+ gen = self.dump(" ", first_sequence, opcode.first_start, opcode.first_end)
+ }
+ _ => {}
+ }
+ for i in gen {
+ res.push(i);
+ }
+ }
+ res
+ }
+
+ fn dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec<String> {
+ let mut res = Vec::new();
+ for i in start..end {
+ if let Some(s) = sequence.get(i) {
+ res.push(format!("{} {}", tag, s))
+ }
+ }
+ res
+ }
+
+ fn plain_replace(
+ &self,
+ first_sequence: &[&str],
+ first_start: usize,
+ first_end: usize,
+ second_sequence: &[&str],
+ second_start: usize,
+ second_end: usize,
+ ) -> Vec<String> {
+ if !(first_start < first_end && second_start < second_end) {
+ return Vec::new();
+ }
+ let (mut first, second) = if second_end - second_start < first_end - first_start {
+ (
+ self.dump("+", second_sequence, second_start, second_end),
+ self.dump("-", first_sequence, first_start, first_end),
+ )
+ } else {
+ (
+ self.dump("-", first_sequence, first_start, first_end),
+ self.dump("+", second_sequence, second_start, second_end),
+ )
+ };
+ for s in second {
+ first.push(s);
+ }
+ first
+ }
+
+ fn fancy_replace(
+ &self,
+ first_sequence: &[&str],
+ first_start: usize,
+ first_end: usize,
+ second_sequence: &[&str],
+ second_start: usize,
+ second_end: usize,
+ ) -> Vec<String> {
+ let mut res = Vec::new();
+ let (mut best_ratio, cutoff) = (0.74, 0.75);
+ let (mut best_i, mut best_j) = (0, 0);
+ let mut eqi: Option<usize> = None;
+ let mut eqj: Option<usize> = None;
+ for (j, second_sequence_str) in second_sequence
+ .iter()
+ .enumerate()
+ .take(second_end)
+ .skip(second_start)
+ {
+ for (i, first_sequence_str) in first_sequence
+ .iter()
+ .enumerate()
+ .take(second_end)
+ .skip(second_start)
+ {
+ if first_sequence_str == second_sequence_str {
+ if eqi.is_none() {
+ eqi = Some(i);
+ eqj = Some(j);
+ }
+ continue;
+ }
+ let (first_sequence_chars, second_sequence_chars) = (
+ first_sequence_str.chars().collect::<Vec<char>>(),
+ second_sequence_str.chars().collect::<Vec<char>>(),
+ );
+ let mut cruncher =
+ SequenceMatcher::new(&first_sequence_chars, &second_sequence_chars);
+ cruncher.set_is_junk(self.char_junk);
+ if cruncher.ratio() > best_ratio {
+ best_ratio = cruncher.ratio();
+ best_i = i;
+ best_j = j;
+ }
+ }
+ }
+ if best_ratio < cutoff {
+ if eqi.is_none() {
+ res.extend(
+ self.plain_replace(
+ first_sequence,
+ first_start,
+ first_end,
+ second_sequence,
+ second_start,
+ second_end,
+ ).iter()
+ .cloned(),
+ );
+ return res;
+ }
+ best_i = eqi.unwrap();
+ best_j = eqj.unwrap();
+ } else {
+ eqi = None;
+ }
+ res.extend(
+ self.fancy_helper(
+ first_sequence,
+ first_start,
+ best_i,
+ second_sequence,
+ second_start,
+ best_j,
+ ).iter()
+ .cloned(),
+ );
+ let first_element = &first_sequence[best_i];
+ let second_element = &second_sequence[best_j];
+ if eqi.is_none() {
+ let (mut first_tag, mut second_tag) = (String::new(), String::new());
+ let first_element_chars: Vec<char> = first_element.chars().collect();
+ let second_element_chars: Vec<char> = second_element.chars().collect();
+ let mut cruncher = SequenceMatcher::new(&first_element_chars, &second_element_chars);
+ cruncher.set_is_junk(self.char_junk);
+ for opcode in &cruncher.get_opcodes() {
+ let (first_length, second_length) = (
+ opcode.first_end - opcode.first_start,
+ opcode.second_end - opcode.second_start,
+ );
+ match opcode.tag.as_ref() {
+ "replace" => {
+ first_tag.push_str(&str_with_similar_chars('^', first_length));
+ second_tag.push_str(&str_with_similar_chars('^', second_length));
+ }
+ "delete" => {
+ first_tag.push_str(&str_with_similar_chars('-', first_length));
+ }
+ "insert" => {
+ second_tag.push_str(&str_with_similar_chars('+', second_length));
+ }
+ "equal" => {
+ first_tag.push_str(&str_with_similar_chars(' ', first_length));
+ second_tag.push_str(&str_with_similar_chars(' ', second_length));
+ }
+ _ => {}
+ }
+ }
+ res.extend(
+ self.qformat(&first_element, &second_element, &first_tag, &second_tag)
+ .iter()
+ .cloned(),
+ );
+ } else {
+ let mut s = String::from(" ");
+ s.push_str(&first_element);
+ res.push(s);
+ }
+ res.extend(
+ self.fancy_helper(
+ first_sequence,
+ best_i + 1,
+ first_end,
+ second_sequence,
+ best_j + 1,
+ second_end,
+ ).iter()
+ .cloned(),
+ );
+ res
+ }
+
+ fn fancy_helper(
+ &self,
+ first_sequence: &[&str],
+ first_start: usize,
+ first_end: usize,
+ second_sequence: &[&str],
+ second_start: usize,
+ second_end: usize,
+ ) -> Vec<String> {
+ let mut res = Vec::new();
+ if first_start < first_end {
+ if second_start < second_end {
+ res = self.fancy_replace(
+ first_sequence,
+ first_start,
+ first_end,
+ second_sequence,
+ second_start,
+ second_end,
+ );
+ } else {
+ res = self.dump("-", first_sequence, first_start, first_end);
+ }
+ } else if second_start < second_end {
+ res = self.dump("+", second_sequence, second_start, second_end);
+ }
+ res
+ }
+
+ fn qformat(
+ &self,
+ first_line: &str,
+ second_line: &str,
+ first_tags: &str,
+ second_tags: &str,
+ ) -> Vec<String> {
+ let mut res = Vec::new();
+ let mut first_tags = first_tags;
+ let mut second_tags = second_tags;
+ let mut common = cmp::min(
+ count_leading(first_line, '\t'),
+ count_leading(second_line, '\t'),
+ );
+ common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
+ common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
+ first_tags = first_tags.split_at(common).1.trim_right();
+ second_tags = second_tags.split_at(common).1.trim_right();
+ let mut s = format!("- {}", first_line);
+ res.push(s);
+ if first_tags != "" {
+ s = format!("? {}{}\n", str_with_similar_chars('\t', common), first_tags);
+ res.push(s);
+ }
+ s = format!("+ {}", second_line);
+ res.push(s);
+ if second_tags != "" {
+ s = format!(
+ "? {}{}\n",
+ str_with_similar_chars('\t', common),
+ second_tags
+ );
+ res.push(s);
+ }
+ res
+ }
+
+ pub fn restore(delta: &[String], which: usize) -> Vec<String> {
+ if !(which == 1 || which == 2) {
+ panic!("Second parameter must be 1 or 2");
+ }
+ let mut res = Vec::new();
+ let tag = if which == 1 { "- " } else { "+ " }.to_string();
+ let prefixes = vec![tag, " ".to_string()];
+ for line in delta {
+ for prefix in &prefixes {
+ if line.starts_with(prefix) {
+ res.push(line.split_at(2).1.to_string());
+ }
+ }
+ }
+ res
+ }
+}
+
+#[test]
+fn test_fancy_replace() {
+ let differ = Differ::new();
+ let result = differ
+ .fancy_replace(&vec!["abcDefghiJkl\n"], 0, 1, &vec!["abcdefGhijkl\n"], 0, 1)
+ .join("");
+ assert_eq!(
+ result,
+ "- abcDefghiJkl\n? ^ ^ ^\n+ abcdefGhijkl\n? ^ ^ ^\n"
+ );
+}
+
+#[test]
+fn test_qformat() {
+ let differ = Differ::new();
+ let result = differ.qformat(
+ "\tabcDefghiJkl\n",
+ "\tabcdefGhijkl\n",
+ " ^ ^ ^ ",
+ " ^ ^ ^ ",
+ );
+ assert_eq!(
+ result,
+ vec![
+ "- \tabcDefghiJkl\n",
+ "? \t ^ ^ ^\n",
+ "+ \tabcdefGhijkl\n",
+ "? \t ^ ^ ^\n",
+ ]
+ );
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..ca6b1cc
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,169 @@
+pub mod differ;
+pub mod sequencematcher;
+mod utils;
+
+use sequencematcher::{Sequence, SequenceMatcher};
+use std::collections::HashMap;
+use std::fmt::Display;
+use utils::{format_range_context, format_range_unified};
+
+pub fn get_close_matches<'a>(
+ word: &str,
+ possibilities: Vec<&'a str>,
+ n: usize,
+ cutoff: f32,
+) -> Vec<&'a str> {
+ if !(0.0 <= cutoff && cutoff <= 1.0) {
+ panic!("Cutoff must be greater than 0.0 and lower than 1.0");
+ }
+ let mut res: Vec<(f32, &str)> = Vec::new();
+ let mut matcher = SequenceMatcher::new("", word);
+ for i in &possibilities {
+ matcher.set_first_seq(i);
+ let ratio = matcher.ratio();
+ if ratio >= cutoff {
+ res.push((ratio, i));
+ }
+ }
+ res.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
+ res.truncate(n);
+ res.iter().map(|x| x.1).collect()
+}
+
+pub fn unified_diff<T: Sequence + Display>(
+ first_sequence: &[T],
+ second_sequence: &[T],
+ from_file: &str,
+ to_file: &str,
+ from_file_date: &str,
+ to_file_date: &str,
+ n: usize,
+) -> Vec<String> {
+ let mut res = Vec::new();
+ let lineterm = '\n';
+ let mut started = false;
+ let mut matcher = SequenceMatcher::new(first_sequence, second_sequence);
+ for group in &matcher.get_grouped_opcodes(n) {
+ if !started {
+ started = true;
+ let from_date = format!("\t{}", from_file_date);
+ let to_date = format!("\t{}", to_file_date);
+ res.push(format!("--- {}{}{}", from_file, from_date, lineterm));
+ res.push(format!("+++ {}{}{}", to_file, to_date, lineterm));
+ }
+ let (first, last) = (group.first().unwrap(), group.last().unwrap());
+ let file1_range = format_range_unified(first.first_start, last.first_end);
+ let file2_range = format_range_unified(first.second_start, last.second_end);
+ res.push(format!(
+ "@@ -{} +{} @@{}",
+ file1_range, file2_range, lineterm
+ ));
+ for code in group {
+ if code.tag == "equal" {
+ for item in first_sequence
+ .iter()
+ .take(code.first_end)
+ .skip(code.first_start)
+ {
+ res.push(format!(" {}", item));
+ }
+ continue;
+ }
+ if code.tag == "replace" || code.tag == "delete" {
+ for item in first_sequence
+ .iter()
+ .take(code.first_end)
+ .skip(code.first_start)
+ {
+ res.push(format!("-{}", item));
+ }
+ }
+ if code.tag == "replace" || code.tag == "insert" {
+ for item in second_sequence
+ .iter()
+ .take(code.second_end)
+ .skip(code.second_start)
+ {
+ res.push(format!("+{}", item));
+ }
+ }
+ }
+ }
+ res
+}
+
+pub fn context_diff<T: Sequence + Display>(
+ first_sequence: &[T],
+ second_sequence: &[T],
+ from_file: &str,
+ to_file: &str,
+ from_file_date: &str,
+ to_file_date: &str,
+ n: usize,
+) -> Vec<String> {
+ let mut res = Vec::new();
+ let lineterm = '\n';
+ let mut prefix: HashMap<String, String> = HashMap::new();
+ prefix.insert(String::from("insert"), String::from("+ "));
+ prefix.insert(String::from("delete"), String::from("- "));
+ prefix.insert(String::from("replace"), String::from("! "));
+ prefix.insert(String::from("equal"), String::from(" "));
+ let mut started = false;
+ let mut matcher = SequenceMatcher::new(first_sequence, second_sequence);
+ for group in &matcher.get_grouped_opcodes(n) {
+ if !started {
+ started = true;
+ let from_date = format!("\t{}", from_file_date);
+ let to_date = format!("\t{}", to_file_date);
+ res.push(format!("*** {}{}{}", from_file, from_date, lineterm));
+ res.push(format!("--- {}{}{}", to_file, to_date, lineterm));
+ }
+ let (first, last) = (group.first().unwrap(), group.last().unwrap());
+ res.push(format!("***************{}", lineterm));
+ let file1_range = format_range_context(first.first_start, last.first_end);
+ res.push(format!("*** {} ****{}", file1_range, lineterm));
+ let mut any = false;
+ for opcode in group {
+ if opcode.tag == "replace" || opcode.tag == "delete" {
+ any = true;
+ break;
+ }
+ }
+ if any {
+ for opcode in group {
+ if opcode.tag != "insert" {
+ for item in first_sequence
+ .iter()
+ .take(opcode.first_end)
+ .skip(opcode.first_start)
+ {
+ res.push(format!("{}{}", &prefix[&opcode.tag], item));
+ }
+ }
+ }
+ }
+ let file2_range = format_range_context(first.second_start, last.second_end);
+ res.push(format!("--- {} ----{}", file2_range, lineterm));
+ any = false;
+ for opcode in group {
+ if opcode.tag == "replace" || opcode.tag == "insert" {
+ any = true;
+ break;
+ }
+ }
+ if any {
+ for opcode in group {
+ if opcode.tag != "delete" {
+ for item in second_sequence
+ .iter()
+ .take(opcode.second_end)
+ .skip(opcode.second_start)
+ {
+ res.push(format!("{}{}", prefix[&opcode.tag], item));
+ }
+ }
+ }
+ }
+ }
+ res
+}
diff --git a/src/sequencematcher.rs b/src/sequencematcher.rs
new file mode 100644
index 0000000..4157808
--- /dev/null
+++ b/src/sequencematcher.rs
@@ -0,0 +1,346 @@
+use std::cmp::{max, min};
+use std::collections::HashMap;
+use std::hash::Hash;
+use utils::calculate_ratio;
+
+#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord)]
+pub struct Match {
+ pub first_start: usize,
+ pub second_start: usize,
+ pub size: usize,
+}
+
+impl Match {
+ fn new(first_start: usize, second_start: usize, size: usize) -> Match {
+ Match {
+ first_start,
+ second_start,
+ size,
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Opcode {
+ pub tag: String,
+ pub first_start: usize,
+ pub first_end: usize,
+ pub second_start: usize,
+ pub second_end: usize,
+}
+
+impl Opcode {
+ fn new(
+ tag: String,
+ first_start: usize,
+ first_end: usize,
+ second_start: usize,
+ second_end: usize,
+ ) -> Opcode {
+ Opcode {
+ tag,
+ first_start,
+ first_end,
+ second_start,
+ second_end,
+ }
+ }
+}
+
+pub trait Sequence: Eq + Hash {}
+impl<T: Eq + Hash> Sequence for T {}
+
+pub struct SequenceMatcher<'a, T: 'a + Sequence> {
+ first_sequence: &'a [T],
+ second_sequence: &'a [T],
+ matching_blocks: Option<Vec<Match>>,
+ opcodes: Option<Vec<Opcode>>,
+ is_junk: Option<fn(&T) -> bool>,
+ second_sequence_elements: HashMap<&'a T, Vec<usize>>,
+}
+
+impl<'a, T: Sequence> SequenceMatcher<'a, T> {
+ pub fn new<S>(first_sequence: &'a S, second_sequence: &'a S) -> SequenceMatcher<'a, T>
+ where
+ S: AsRef<[T]> + ?Sized,
+ {
+ let mut matcher = SequenceMatcher {
+ first_sequence: first_sequence.as_ref(),
+ second_sequence: second_sequence.as_ref(),
+ matching_blocks: None,
+ opcodes: None,
+ is_junk: None,
+ second_sequence_elements: HashMap::new(),
+ };
+ matcher.set_seqs(first_sequence, second_sequence);
+ matcher
+ }
+
+ pub fn set_is_junk(&mut self, is_junk: Option<fn(&T) -> bool>) {
+ self.is_junk = is_junk;
+ self.matching_blocks = None;
+ self.opcodes = None;
+ self.chain_second_seq();
+ }
+
+ pub fn set_seqs<S>(&mut self, first_sequence: &'a S, second_sequence: &'a S)
+ where
+ S: AsRef<[T]> + ?Sized,
+ {
+ self.set_first_seq(first_sequence);
+ self.set_second_seq(second_sequence);
+ }
+
+ pub fn set_first_seq<S>(&mut self, sequence: &'a S)
+ where
+ S: AsRef<[T]> + ?Sized,
+ {
+ self.first_sequence = sequence.as_ref();
+ self.matching_blocks = None;
+ self.opcodes = None;
+ }
+
+ pub fn set_second_seq<S>(&mut self, sequence: &'a S)
+ where
+ S: AsRef<[T]> + ?Sized,
+ {
+ self.second_sequence = sequence.as_ref();
+ self.matching_blocks = None;
+ self.opcodes = None;
+ self.chain_second_seq();
+ }
+
+ fn chain_second_seq(&mut self) {
+ let second_sequence = self.second_sequence;
+ let mut second_sequence_elements = HashMap::new();
+ for (i, item) in second_sequence.iter().enumerate() {
+ let mut counter = second_sequence_elements
+ .entry(item)
+ .or_insert_with(Vec::new);
+ counter.push(i);
+ }
+ if let Some(junk_func) = self.is_junk {
+ second_sequence_elements = second_sequence_elements
+ .into_iter()
+ .filter(|&(element, _)| !junk_func(element))
+ .collect();
+ }
+ // Filter out popular elements
+ let len = second_sequence.len();
+ if len >= 200 {
+ let test_len = (len as f32 / 100.0).floor() as usize + 1;
+ second_sequence_elements = second_sequence_elements
+ .into_iter()
+ .filter(|&(_, ref indexes)| indexes.len() > test_len)
+ .collect();
+ }
+ self.second_sequence_elements = second_sequence_elements;
+ }
+
+ pub fn find_longest_match(
+ &self,
+ first_start: usize,
+ first_end: usize,
+ second_start: usize,
+ second_end: usize,
+ ) -> Match {
+ let first_sequence = &self.first_sequence;
+ let second_sequence = &self.second_sequence;
+ let second_sequence_elements = &self.second_sequence_elements;
+ let (mut best_i, mut best_j, mut best_size) = (first_start, second_start, 0);
+ let mut j2len: HashMap<usize, usize> = HashMap::new();
+ for (i, item) in first_sequence
+ .iter()
+ .enumerate()
+ .take(first_end)
+ .skip(first_start)
+ {
+ let mut new_j2len: HashMap<usize, usize> = HashMap::new();
+ if let Some(indexes) = second_sequence_elements.get(item) {
+ for j in indexes {
+ let j = *j;
+ if j < second_start {
+ continue;
+ };
+ if j >= second_end {
+ break;
+ };
+ let mut size = 0;
+ if j > 0 {
+ if let Some(k) = j2len.get(&(j - 1)) {
+ size = *k;
+ }
+ }
+ size += 1;
+ new_j2len.insert(j, size);
+ if size > best_size {
+ best_i = i + 1 - size;
+ best_j = j + 1 - size;
+ best_size = size;
+ }
+ }
+ }
+ j2len = new_j2len;
+ }
+ for _ in 0..2 {
+ while best_i > first_start
+ && best_j > second_start
+ && first_sequence.get(best_i - 1) == second_sequence.get(best_j - 1)
+ {
+ best_i -= 1;
+ best_j -= 1;
+ best_size += 1;
+ }
+ while best_i + best_size < first_end
+ && best_j + best_size < second_end
+ && first_sequence.get(best_i + best_size) == second_sequence.get(best_j + best_size)
+ {
+ best_size += 1;
+ }
+ }
+ Match::new(best_i, best_j, best_size)
+ }
+
+ pub fn get_matching_blocks(&mut self) -> Vec<Match> {
+ if self.matching_blocks.as_ref().is_some() {
+ return self.matching_blocks.as_ref().unwrap().clone();
+ }
+ let (first_length, second_length) = (self.first_sequence.len(), self.second_sequence.len());
+ let mut matches = Vec::new();
+ let mut queue = vec![(0, first_length, 0, second_length)];
+ while !queue.is_empty() {
+ let (first_start, first_end, second_start, second_end) = queue.pop().unwrap();
+ let m = self.find_longest_match(first_start, first_end, second_start, second_end);
+ match m.size {
+ 0 => {}
+ _ => {
+ if first_start < m.first_start && second_start < m.second_start {
+ queue.push((first_start, m.first_start, second_start, m.second_start));
+ }
+ if m.first_start + m.size < first_end && m.second_start + m.size < second_end {
+ queue.push((
+ m.first_start + m.size,
+ first_end,
+ m.second_start + m.size,
+ second_end,
+ ));
+ }
+ matches.push(m);
+ }
+ }
+ }
+ matches.sort_by(|a, b| a.cmp(b));
+ let (mut first_start, mut second_start, mut size) = (0, 0, 0);
+ let mut non_adjacent = Vec::new();
+ for m in &matches {
+ if first_start + size == m.first_start && second_start + size == m.second_start {
+ size += m.size
+ } else {
+ if size != 0 {
+ non_adjacent.push(Match::new(first_start, second_start, size));
+ }
+ first_start = m.first_start;
+ second_start = m.second_start;
+ size = m.size;
+ }
+ }
+ if size != 0 {
+ non_adjacent.push(Match::new(first_start, second_start, size));
+ }
+ non_adjacent.push(Match::new(first_length, second_length, 0));
+ self.matching_blocks = Some(non_adjacent);
+ self.matching_blocks.as_ref().unwrap().clone()
+ }
+
+ pub fn get_opcodes(&mut self) -> Vec<Opcode> {
+ if self.opcodes.as_ref().is_some() {
+ return self.opcodes.as_ref().unwrap().clone();
+ }
+ let mut opcodes = Vec::new();
+ let (mut i, mut j) = (0, 0);
+ for m in self.get_matching_blocks() {
+ let mut tag = String::new();
+ if i < m.first_start && j < m.second_start {
+ tag = String::from("replace");
+ } else if i < m.first_start {
+ tag = String::from("delete");
+ } else if j < m.second_start {
+ tag = String::from("insert");
+ }
+ if !tag.is_empty() {
+ opcodes.push(Opcode::new(tag, i, m.first_start, j, m.second_start));
+ }
+ i = m.first_start + m.size;
+ j = m.second_start + m.size;
+ if m.size != 0 {
+ opcodes.push(Opcode::new(
+ String::from("equal"),
+ m.first_start,
+ i,
+ m.second_start,
+ j,
+ ));
+ }
+ }
+ self.opcodes = Some(opcodes);
+ self.opcodes.as_ref().unwrap().clone()
+ }
+
+ pub fn get_grouped_opcodes(&mut self, n: usize) -> Vec<Vec<Opcode>> {
+ let mut res = Vec::new();
+ let mut codes = self.get_opcodes();
+ if codes.is_empty() {
+ codes.push(Opcode::new("equal".to_string(), 0, 1, 0, 1));
+ }
+
+ if codes.first().unwrap().tag == "equal" {
+ let opcode = codes.first_mut().unwrap();
+ opcode.first_start = max(opcode.first_start, opcode.first_end.saturating_sub(n));
+ opcode.second_start = max(opcode.second_start, opcode.second_end.saturating_sub(n));
+ }
+ if codes.last().unwrap().tag == "equal" {
+ let opcode = codes.last_mut().unwrap();
+ opcode.first_end = min(opcode.first_start + n, opcode.first_end);
+ opcode.second_end = min(opcode.second_start + n, opcode.second_end);
+ }
+ let nn = n + n;
+ let mut group = Vec::new();
+ for code in &codes {
+ let (mut first_start, mut second_start) = (code.first_start, code.second_start);
+ if code.tag == "equal" && code.first_end - code.first_start > nn {
+ group.push(Opcode::new(
+ code.tag.clone(),
+ code.first_start,
+ min(code.first_end, code.first_start + n),
+ code.second_start,
+ min(code.second_end, code.second_start + n),
+ ));
+ res.push(group.clone());
+ group.clear();
+ first_start = max(first_start, code.first_end.saturating_sub(n));
+ second_start = max(second_start, code.second_end.saturating_sub(n));
+ }
+ group.push(Opcode::new(
+ code.tag.clone(),
+ first_start,
+ code.first_end,
+ second_start,
+ code.second_end,
+ ));
+ }
+ if !(group.len() == 1 && group.first().unwrap().tag == "equal") || group.is_empty() {
+ res.push(group.clone());
+ }
+ res
+ }
+
+ pub fn ratio(&mut self) -> f32 {
+ let matches = self.get_matching_blocks()
+ .iter()
+ .fold(0, |res, &m| res + m.size);
+ calculate_ratio(
+ matches,
+ self.first_sequence.len() + self.second_sequence.len(),
+ )
+ }
+}
diff --git a/src/utils.rs b/src/utils.rs
new file mode 100644
index 0000000..b711bf8
--- /dev/null
+++ b/src/utils.rs
@@ -0,0 +1,47 @@
+pub fn calculate_ratio(matches: usize, length: usize) -> f32 {
+ if length != 0 {
+ return 2.0 * matches as f32 / length as f32;
+ }
+ 1.0
+}
+
+pub fn str_with_similar_chars(c: char, length: usize) -> String {
+ let mut s = String::new();
+ for _ in 0..length {
+ s.push_str(&c.to_string());
+ }
+ s
+}
+
+pub fn count_leading(line: &str, c: char) -> usize {
+ let (mut i, n) = (0, line.len());
+ let line: Vec<char> = line.chars().collect();
+ while (i < n) && line[i] == c {
+ i += 1;
+ }
+ i
+}
+
+pub fn format_range_unified(start: usize, end: usize) -> String {
+ let mut beginning = start + 1;
+ let length = end - start;
+ if length == 1 {
+ return beginning.to_string();
+ }
+ if length == 0 {
+ beginning -= 1;
+ }
+ format!("{},{}", beginning, length)
+}
+
+pub fn format_range_context(start: usize, end: usize) -> String {
+ let mut beginning = start + 1;
+ let length = end - start;
+ if length == 0 {
+ beginning -= 1
+ }
+ if length <= 1 {
+ return beginning.to_string();
+ }
+ format!("{},{}", beginning, beginning + length - 1)
+}
diff --git a/tests/tests.rs b/tests/tests.rs
new file mode 100644
index 0000000..8f49de9
--- /dev/null
+++ b/tests/tests.rs
@@ -0,0 +1,194 @@
+extern crate difflib;
+
+use difflib::differ::Differ;
+use difflib::sequencematcher::{Match, Opcode, SequenceMatcher};
+
+#[test]
+fn test_longest_match() {
+ let matcher = SequenceMatcher::new(" abcd", "abcd abcd");
+ let m = matcher.find_longest_match(0, 5, 0, 9);
+ assert_eq!(m.first_start, 0);
+ assert_eq!(m.second_start, 4);
+ assert_eq!(m.size, 5);
+}
+
+#[test]
+fn test_all_matches() {
+ let mut matcher = SequenceMatcher::new("abxcd", "abcd");
+ let result = matcher.get_matching_blocks();
+ let mut expected_result = Vec::new();
+ expected_result.push(Match {
+ first_start: 0,
+ second_start: 0,
+ size: 2,
+ });
+ expected_result.push(Match {
+ first_start: 3,
+ second_start: 2,
+ size: 2,
+ });
+ expected_result.push(Match {
+ first_start: 5,
+ second_start: 4,
+ size: 0,
+ });
+ assert_eq!(result, expected_result);
+}
+
+#[test]
+fn test_get_opcodes() {
+ let mut matcher = SequenceMatcher::new("qabxcd", "abycdf");
+ let result = matcher.get_opcodes();
+ let mut expected_result = Vec::new();
+ expected_result.push(Opcode {
+ tag: "delete".to_string(),
+ first_start: 0,
+ first_end: 1,
+ second_start: 0,
+ second_end: 0,
+ });
+ expected_result.push(Opcode {
+ tag: "equal".to_string(),
+ first_start: 1,
+ first_end: 3,
+ second_start: 0,
+ second_end: 2,
+ });
+ expected_result.push(Opcode {
+ tag: "replace".to_string(),
+ first_start: 3,
+ first_end: 4,
+ second_start: 2,
+ second_end: 3,
+ });
+ expected_result.push(Opcode {
+ tag: "equal".to_string(),
+ first_start: 4,
+ first_end: 6,
+ second_start: 3,
+ second_end: 5,
+ });
+ expected_result.push(Opcode {
+ tag: "insert".to_string(),
+ first_start: 6,
+ first_end: 6,
+ second_start: 5,
+ second_end: 6,
+ });
+ assert_eq!(result, expected_result);
+}
+
+#[test]
+fn test_ratio() {
+ let mut matcher = SequenceMatcher::new("abcd", "bcde");
+ assert_eq!(matcher.ratio(), 0.75);
+}
+
+#[test]
+fn test_get_close_matches() {
+ let words = vec!["ape", "apple", "peach", "puppy"];
+ let result = difflib::get_close_matches("appel", words, 3, 0.6);
+ assert_eq!(result, vec!["apple", "ape"]);
+}
+
+#[test]
+fn test_differ_compare() {
+ let first_text = vec!["one\n", "two\n", "three\n"];
+ let second_text = vec!["ore\n", "tree\n", "emu\n"];
+ let differ = Differ::new();
+ let result = differ.compare(&first_text, &second_text).join("");
+ assert_eq!(
+ result,
+ "- one\n? ^\n+ ore\n? ^\n- two\n- three\n? -\n+ tree\n+ emu\n"
+ );
+}
+
+fn is_junk_char(ch: &char) -> bool {
+ if *ch == ' ' || *ch == '\t' {
+ return true;
+ }
+ false
+}
+
+#[test]
+fn test_differ_compare_with_func() {
+ let first_text = vec!["one\n", "two\n", "three\n"];
+ let second_text = vec!["ore\n", "tree\n", "emu\n"];
+ let mut differ = Differ::new();
+ differ.char_junk = Some(is_junk_char);
+ let result = differ.compare(&first_text, &second_text).join("");
+ assert_eq!(
+ result,
+ "- one\n? ^\n+ ore\n? ^\n- two\n- three\n? -\n+ tree\n+ emu\n"
+ );
+}
+
+#[test]
+fn test_differ_restore() {
+ let first_text = vec!["one\n", " two\n", "three\n"];
+ let second_text = vec!["ore\n", "tree\n", "emu\n"];
+ let differ = Differ::new();
+ let diff = differ.compare(&first_text, &second_text);
+ assert_eq!(first_text, Differ::restore(&diff, 1));
+ assert_eq!(second_text, Differ::restore(&diff, 2));
+}
+
+#[test]
+fn test_unified_diff() {
+ let first_text = "one two three four".split(" ").collect::<Vec<&str>>();
+ let second_text = "zero one tree four".split(" ").collect::<Vec<&str>>();
+ let result = difflib::unified_diff(
+ &first_text,
+ &second_text,
+ "Original",
+ "Current",
+ "2005-01-26 23:30:50",
+ "2010-04-02 10:20:52",
+ 3,
+ ).join("");
+ assert_eq!(
+ result,
+ "--- Original\t2005-01-26 23:30:50\n+++ Current\t2010-04-02 10:20:52\n@@ -1,4 \
+ +1,4 @@\n+zero one-two-three+tree four"
+ );
+}
+
+#[test]
+fn test_context_diff() {
+ let first_text = "one two three four".split(" ").collect::<Vec<&str>>();
+ let second_text = "zero one tree four".split(" ").collect::<Vec<&str>>();
+ let result = difflib::context_diff(
+ &first_text,
+ &second_text,
+ "Original",
+ "Current",
+ "2005-01-26 23:30:50",
+ "2010-04-02 10:20:52",
+ 3,
+ ).join("");
+ assert_eq!(
+ result,
+ "*** Original\t2005-01-26 23:30:50\n--- Current\t2010-04-02 \
+ 10:20:52\n***************\n*** 1,4 ****\n one! two! three four--- 1,4 ----\n+ \
+ zero one! tree four"
+ );
+}
+
+#[test]
+fn test_integer_slice() {
+ let s1 = vec![1, 2, 3, 4, 5];
+ let s2 = vec![5, 4, 3, 2, 1];
+ let result = SequenceMatcher::new(&s1, &s2).get_matching_blocks();
+ let mut expected_result = Vec::new();
+ expected_result.push(Match {
+ first_start: 0,
+ second_start: 4,
+ size: 1,
+ });
+ expected_result.push(Match {
+ first_start: 5,
+ second_start: 5,
+ size: 0,
+ });
+ assert_eq!(result, expected_result);
+}