retrained with new data from opensubs. qwerty subs

Files changed (11) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "finetuned-model/",
   "architectures": [
     "T5ForConditionalGeneration"
   ],

 {
+  "_name_or_path": "t5-small",
   "architectures": [
     "T5ForConditionalGeneration"
   ],

dataset_dict.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"splits": ["train", "eval"]}

eval/data-00000-of-00001.arrow ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3914c2653952d5ef669d4dec5cfa59bd7587d0999b3038929d02ef51c3f3f7
+size 187010360

eval/dataset_info.json ADDED Viewed

+{
+  "builder_name": "csv",
+  "citation": "",
+  "config_name": "default",
+  "dataset_name": "csv",
+  "dataset_size": 472235292,
+  "description": "",
+  "download_checksums": {
+    "/home/ubuntu/wwdrive2/14March/clean_train.csv": {
+      "num_bytes": 369392984,
+      "checksum": null
+    },
+    "/home/ubuntu/wwdrive2/14March/clean_eval.csv": {
+      "num_bytes": 71786407,
+      "checksum": null
+    }
+  },
+  "download_size": 441179391,
+  "features": {
+    "input_ids": {
+      "feature": {
+        "dtype": "int32",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "attention_mask": {
+      "feature": {
+        "dtype": "int8",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "labels": {
+      "feature": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": "",
+  "size_in_bytes": 913414683,
+  "splits": {
+    "train": {
+      "name": "train",
+      "num_bytes": 394316099,
+      "num_examples": 4507525,
+      "dataset_name": "csv"
+    },
+    "eval": {
+      "name": "eval",
+      "num_bytes": 77919193,
+      "num_examples": 1127410,
+      "dataset_name": "csv"
+    }
+  },
+  "version": {
+    "version_str": "0.0.0",
+    "major": 0,
+    "minor": 0,
+    "patch": 0
+  }
+}

eval/state.json ADDED Viewed

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "150697196cb85c2e",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": "eval"
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7570f8c76e695d91c713c25c55aed0bc65343df9f4b9aa20cf3f595ff6114e32
 size 241984552

 version https://git-lfs.github.com/spec/v1
+oid sha256:35a20c7f6786661d16cdff57b5432726582b676eb3c02e5f7a869c58db2dd3de
 size 241984552

tokenizer_config.json CHANGED Viewed

@@ -930,12 +930,8 @@
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "extra_ids": 100,
-  "max_length": 512,
   "model_max_length": 512,
   "pad_token": "<pad>",
-  "stride": 0,
   "tokenizer_class": "T5Tokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
   "unk_token": "<unk>"
 }

   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "extra_ids": 100,
   "model_max_length": 512,
   "pad_token": "<pad>",
   "tokenizer_class": "T5Tokenizer",
   "unk_token": "<unk>"
 }

train/data-00000-of-00002.arrow ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee121506abd0676a5266c0592070a202642789ef9a38fd5876aa403135c44a04
+size 446399184

train/data-00001-of-00002.arrow ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cc62e5c0f5a5137d80e124fb9156e0a60274f953ed6b95347cf7c32df324d25
+size 425352424

train/dataset_info.json ADDED Viewed

+{
+  "builder_name": "csv",
+  "citation": "",
+  "config_name": "default",
+  "dataset_name": "csv",
+  "dataset_size": 472235292,
+  "description": "",
+  "download_checksums": {
+    "/home/ubuntu/wwdrive2/14March/clean_train.csv": {
+      "num_bytes": 369392984,
+      "checksum": null
+    },
+    "/home/ubuntu/wwdrive2/14March/clean_eval.csv": {
+      "num_bytes": 71786407,
+      "checksum": null
+    }
+  },
+  "download_size": 441179391,
+  "features": {
+    "input_ids": {
+      "feature": {
+        "dtype": "int32",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "attention_mask": {
+      "feature": {
+        "dtype": "int8",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "labels": {
+      "feature": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": "",
+  "size_in_bytes": 913414683,
+  "splits": {
+    "train": {
+      "name": "train",
+      "num_bytes": 394316099,
+      "num_examples": 4507525,
+      "dataset_name": "csv"
+    },
+    "eval": {
+      "name": "eval",
+      "num_bytes": 77919193,
+      "num_examples": 1127410,
+      "dataset_name": "csv"
+    }
+  },
+  "version": {
+    "version_str": "0.0.0",
+    "major": 0,
+    "minor": 0,
+    "patch": 0
+  }
+}

train/state.json ADDED Viewed

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00002.arrow"
+    },
+    {
+      "filename": "data-00001-of-00002.arrow"
+    }
+  ],
+  "_fingerprint": "26c810267bb075b4",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": "train"
+}